mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'master' into graphics
This commit is contained in:
commit
e380ded5e1
542 changed files with 124552 additions and 18682 deletions
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -1,3 +1,6 @@
|
|||
[submodule "hw/rtl/fp_cores/fpnew"]
|
||||
path = hw/rtl/fp_cores/fpnew
|
||||
url = https://github.com/pulp-platform/fpnew.git
|
||||
[submodule "sim/common/softfloat"]
|
||||
path = sim/common/softfloat
|
||||
url = https://github.com/ucb-bar/berkeley-softfloat-3.git
|
||||
|
|
17
.travis.yml
17
.travis.yml
|
@ -30,22 +30,25 @@ jobs:
|
|||
include:
|
||||
- stage: test
|
||||
name: coverage
|
||||
script: cp -r $PWD ../build1 && cd ../build1 && ./ci/regression.sh -coverage
|
||||
script: cp -r $PWD ../build1 && cd ../build1 && ./ci/travis_run.py ./ci/regression.sh -coverage
|
||||
- stage: test
|
||||
name: cluster
|
||||
script: cp -r $PWD ../build2 && cd ../build2 && ./ci/regression.sh -cluster
|
||||
script: cp -r $PWD ../build2 && cd ../build2 && ./ci/travis_run.py ./ci/regression.sh -cluster
|
||||
- stage: test
|
||||
name: debug
|
||||
script: cp -r $PWD ../build3 && cd ../build3 && ./ci/regression.sh -debug
|
||||
script: cp -r $PWD ../build3 && cd ../build3 && ./ci/travis_run.py ./ci/regression.sh -debug
|
||||
- stage: test
|
||||
name: config
|
||||
script: cp -r $PWD ../build4 && cd ../build4 && ./ci/regression.sh -config
|
||||
script: cp -r $PWD ../build4 && cd ../build4 && ./ci/travis_run.py ./ci/regression.sh -config
|
||||
- stage: test
|
||||
name: stress
|
||||
script: cp -r $PWD ../build5 && cd ../build5 && ./ci/regression.sh -stress
|
||||
name: stress0
|
||||
script: cp -r $PWD ../build5 && cd ../build5 && ./ci/travis_run.py ./ci/regression.sh -stress0
|
||||
- stage: test
|
||||
name: stress1
|
||||
script: cp -r $PWD ../build6 && cd ../build6 && ./ci/travis_run.py ./ci/regression.sh -stress1
|
||||
- stage: test
|
||||
name: compiler
|
||||
script: cp -r $PWD ../build6 && cd ../build6 && ./ci/test_compiler.sh
|
||||
script: cp -r $PWD ../build7 && cd ../build7 && ./ci/travis_run.py ./ci/test_compiler.sh
|
||||
|
||||
after_success:
|
||||
# Gather code coverage
|
||||
|
|
5
Makefile
5
Makefile
|
@ -1,14 +1,13 @@
|
|||
|
||||
all:
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C sim
|
||||
$(MAKE) -C driver
|
||||
$(MAKE) -C runtime
|
||||
$(MAKE) -C simX
|
||||
$(MAKE) -C tests
|
||||
|
||||
clean:
|
||||
$(MAKE) -C hw clean
|
||||
$(MAKE) -C sim clean
|
||||
$(MAKE) -C driver clean
|
||||
$(MAKE) -C simX clean
|
||||
$(MAKE) -C runtime clean
|
||||
$(MAKE) -C tests clean
|
10
README.md
10
README.md
|
@ -8,7 +8,11 @@ Vortex is a full-system RISCV-based GPGPU processor.
|
|||
## Specifications
|
||||
|
||||
- Support RISC-V RV32IMF ISA
|
||||
- Scalability: 1 to 32 cores with optional L2 and L3 caches
|
||||
- Performance:
|
||||
- 1024 total threads running at 250 MHz
|
||||
- 128 Gflops of compute bandwidth
|
||||
- 16 GB/s of memory bandwidth
|
||||
- Scalability: up to 64 cores with optional L2 and L3 caches
|
||||
- Software: OpenCL 1.2 Support
|
||||
- Supported FPGAs:
|
||||
- Intel Arria 10
|
||||
|
@ -20,11 +24,11 @@ Vortex is a full-system RISCV-based GPGPU processor.
|
|||
|
||||
- `hw`: Hardware sources.
|
||||
|
||||
- `driver`: Host driver software.
|
||||
- `driver`: Host drivers repository.
|
||||
|
||||
- `runtime`: Kernel Runtime software.
|
||||
|
||||
- `simX`: Cycle-approximate simulator.
|
||||
- `sim`: Simulators repository.
|
||||
|
||||
- `tests`: Tests repository.
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ L3=0
|
|||
DEBUG=0
|
||||
SCOPE=0
|
||||
HAS_ARGS=0
|
||||
DEBUG_LEVEL=1
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
|
@ -88,28 +89,19 @@ done
|
|||
case $DRIVER in
|
||||
rtlsim)
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/rtlsim
|
||||
DRIVER_EXTRA=
|
||||
CLEAN_TOKEN=clean
|
||||
;;
|
||||
vlsim)
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/opae
|
||||
DRIVER_EXTRA=vlsim
|
||||
CLEAN_TOKEN=clean-vlsim
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/vlsim
|
||||
;;
|
||||
asesim)
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/opae
|
||||
DRIVER_EXTRA=asesim
|
||||
CLEAN_TOKEN=clean-asesim
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/asesim
|
||||
;;
|
||||
fpga)
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/opae
|
||||
DRIVER_EXTRA=fpga
|
||||
CLEAN_TOKEN=clean-fpga
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/fpga
|
||||
;;
|
||||
simx)
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/simx
|
||||
DRIVER_EXTRA=
|
||||
CLEAN_TOKEN=clean
|
||||
DEBUG_LEVEL=3
|
||||
;;
|
||||
*)
|
||||
echo "invalid driver: $DRIVER"
|
||||
|
@ -132,7 +124,7 @@ CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_TH
|
|||
|
||||
echo "CONFIGS=$CONFIGS"
|
||||
|
||||
make -C $DRIVER_PATH $CLEAN_TOKEN
|
||||
make -C $DRIVER_PATH clean
|
||||
|
||||
status=0
|
||||
|
||||
|
@ -140,9 +132,9 @@ if [ $DEBUG -eq 1 ]
|
|||
then
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
DEBUG=1 SCOPE=1 CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH $DRIVER_EXTRA
|
||||
DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH
|
||||
else
|
||||
DEBUG=1 CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH $DRIVER_EXTRA
|
||||
DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH
|
||||
fi
|
||||
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
|
@ -161,9 +153,9 @@ then
|
|||
else
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
SCOPE=1 CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH $DRIVER_EXTRA
|
||||
SCOPE=1 CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH
|
||||
else
|
||||
CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH $DRIVER_EXTRA
|
||||
CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH
|
||||
fi
|
||||
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
|
|
|
@ -3,9 +3,13 @@
|
|||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
OS_DIR=ubuntu/bionic
|
||||
SRCDIR=/opt
|
||||
DESTDIR=.
|
||||
OS_DIR=${OS_DIR:-'ubuntu/bionic'}
|
||||
SRCDIR=${SRCDIR:-'/opt'}
|
||||
DESTDIR=${DESTDIR:-'.'}
|
||||
|
||||
echo "OS_DIR=${OS_DIR}"
|
||||
echo "SRCDIR=${SRCDIR}"
|
||||
echo "DESTDIR=${DESTDIR}"
|
||||
|
||||
riscv()
|
||||
{
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
# build sources
|
||||
# ensure build
|
||||
make -s
|
||||
|
||||
coverage()
|
||||
|
@ -27,17 +27,17 @@ cluster()
|
|||
echo "begin clustering tests..."
|
||||
|
||||
# warp/threads configurations
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=demo
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=demo
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=demo
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=demo
|
||||
|
||||
# cores clustering
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --clusters=1 --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --clusters=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=demo --args="-n1"
|
||||
|
||||
# L2/L3
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
|
||||
|
||||
echo "clustering tests done!"
|
||||
}
|
||||
|
@ -46,9 +46,9 @@ debug()
|
|||
{
|
||||
echo "begin debugging tests..."
|
||||
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=basic --args="-t0 -n1"
|
||||
./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --perf --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --debug --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=basic --args="-t0 -n1"
|
||||
|
||||
echo "debugging tests done!"
|
||||
}
|
||||
|
@ -75,13 +75,21 @@ FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
|
|||
# using FPNEW FPU core
|
||||
FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
|
||||
|
||||
# using AXI bus
|
||||
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
|
||||
# adjust l1 block size to match l2
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16 -DL1_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1"
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
|
||||
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
|
||||
|
||||
# test cache multi-porting
|
||||
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
CONFIGS="-DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
|
||||
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --debug --args="-n1"
|
||||
CONFIGS="-DL2_NUM_PORTS=2 -DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr
|
||||
CONFIGS="-DL2_NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=io_addr
|
||||
|
||||
# test 128-bit MEM block
|
||||
CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
|
||||
|
@ -95,51 +103,70 @@ CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=
|
|||
# test 128-bit DRAM block
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=28 -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
|
||||
|
||||
# test verilator reset values
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm
|
||||
|
||||
# test long memory latency
|
||||
CONFIGS="-DMEM_LATENCY=100 -DMEM_RQ_SIZE=4 -DMEM_STALLS_MODULO=4" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
|
||||
|
||||
echo "configuration tests done!"
|
||||
}
|
||||
|
||||
stress()
|
||||
stress0()
|
||||
{
|
||||
echo "begin stress tests..."
|
||||
echo "begin stress0 tests..."
|
||||
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --clusters=2 --l3cache --app=sgemm --args="-n256"
|
||||
# test verilator reset values
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=sgemm
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=sgemm
|
||||
FPU_CORE=FPU_DEFAULT CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
FPU_CORE=FPU_DEFAULT CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=printf
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=printf
|
||||
|
||||
echo "stress tests done!"
|
||||
echo "stress0 tests done!"
|
||||
}
|
||||
|
||||
stress1()
|
||||
{
|
||||
echo "begin stress1 tests..."
|
||||
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --clusters=2 --l3cache --app=sgemm --args="-n256"
|
||||
|
||||
echo "stress1 tests done!"
|
||||
}
|
||||
|
||||
usage()
|
||||
{
|
||||
echo "usage: regression [-coverage] [-cluster] [-debug] [-config] [-stress] [-all] [-h|--help]"
|
||||
echo "usage: regression [-coverage] [-cluster] [-debug] [-config] [-stress[#n]] [-all] [-h|--help]"
|
||||
}
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
-coverage ) coverage
|
||||
;;
|
||||
;;
|
||||
-cluster ) cluster
|
||||
;;
|
||||
;;
|
||||
-debug ) debug
|
||||
;;
|
||||
;;
|
||||
-config ) config
|
||||
;;
|
||||
-stress ) stress
|
||||
-stress0 ) stress0
|
||||
;;
|
||||
-stress1 ) stress1
|
||||
;;
|
||||
-stress ) stress0
|
||||
stress1
|
||||
;;
|
||||
-all ) coverage
|
||||
cluster
|
||||
debug
|
||||
config
|
||||
stress
|
||||
;;
|
||||
stress0
|
||||
stress1
|
||||
;;
|
||||
-h | --help ) usage
|
||||
exit
|
||||
;;
|
||||
;;
|
||||
* ) usage
|
||||
exit 1
|
||||
esac
|
||||
|
|
|
@ -7,25 +7,30 @@ The directory/file layout of the Vortex codebase is as followed:
|
|||
- `cache`: cache subsystem code
|
||||
- `fp_cores`: floating point unit code
|
||||
- `interfaces`: interfaces for inter-module communication
|
||||
- `libs`: general-purpose modules (i.e., encoder, arbiter, ...)
|
||||
- `libs`: general-purpose RTL modules
|
||||
- `syn`: synthesis directory
|
||||
- `opae`: OPAE synthesis scripts
|
||||
- `quartus`: Quartus synthesis scripts
|
||||
- `synopsys`: Synopsys synthesis scripts
|
||||
- `modelsim`: Modelsim synthesis scripts
|
||||
- `yosys`: Yosys synthesis scripts
|
||||
- `simulate`: baseline RTL simulator (used by RTLSIM)
|
||||
- `unit_tests`: unit tests for some hardware components
|
||||
- `driver`: Host driver software
|
||||
- `driver`: host drivers repository
|
||||
- `include`: Vortex driver public headers
|
||||
- `opae`: software driver that uses Intel OPAE
|
||||
- `vlsim`: software driver that simulates Full RTL (include AFU)
|
||||
- `rtlsim`: software driver that simulates processor RTL
|
||||
- `stub`: Vortex stub driver library
|
||||
- `fpga`: software driver that uses Intel OPAE FPGA
|
||||
- `asesim`: software driver that uses Intel ASE simulator
|
||||
- `vlsim`: software driver that uses vlsim simulator
|
||||
- `rtlsim`: software driver that uses rtlsim simulator
|
||||
- `simx`: software driver that uses simX simulator
|
||||
- `runtime`: Kernel runtime software
|
||||
- `runtime`: kernel runtime software
|
||||
- `include`: Vortex runtime public headers
|
||||
- `linker`: linker file for compiling kernels
|
||||
- `src`: runtime implementation
|
||||
- `simX`: cycle approximate simulator for vortex
|
||||
- `sim`:
|
||||
- `vlsim`: AFU RTL simulator
|
||||
- `rtlsim`: processor RTL simulator
|
||||
- `simX`: cycle approximate simulator for vortex
|
||||
- `tests`: tests repository.
|
||||
- `runtime`: runtime tests
|
||||
- `regression`: regression tests
|
||||
|
|
|
@ -1,10 +1,16 @@
|
|||
all: stub rtlsim simx opae
|
||||
all: stub rtlsim simx vlsim
|
||||
|
||||
stub:
|
||||
$(MAKE) -C stub
|
||||
|
||||
opae:
|
||||
$(MAKE) -C opae
|
||||
fpga:
|
||||
$(MAKE) -C fpga
|
||||
|
||||
asesim:
|
||||
$(MAKE) -C asesim
|
||||
|
||||
vlsim:
|
||||
$(MAKE) -C vlsim
|
||||
|
||||
rtlsim:
|
||||
$(MAKE) -C rtlsim
|
||||
|
@ -14,8 +20,10 @@ simx:
|
|||
|
||||
clean:
|
||||
$(MAKE) clean -C stub
|
||||
$(MAKE) clean -C opae
|
||||
$(MAKE) clean -C fpga
|
||||
$(MAKE) clean -C asesim
|
||||
$(MAKE) clean -C vlsim
|
||||
$(MAKE) clean -C rtlsim
|
||||
$(MAKE) clean -C simx
|
||||
|
||||
.PHONY: all stub opae rtlsim simx clean
|
||||
.PHONY: all stub fpga asesim vlsim rtlsim simx clean
|
74
driver/asesim/Makefile
Normal file
74
driver/asesim/Makefile
Normal file
|
@ -0,0 +1,74 @@
|
|||
OPAE_HOME ?= /tools/opae/1.4.0
|
||||
|
||||
RTL_DIR=../../hw/rtl
|
||||
|
||||
SCRIPT_DIR=../../hw/scripts
|
||||
|
||||
OPAE_SYN_DIR=../../hw/syn/opae
|
||||
|
||||
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I. -I../include -I../../hw -I$(OPAE_HOME)/include -I$(OPAE_SYN_DIR)
|
||||
|
||||
LDFLAGS += -L$(OPAE_HOME)/lib -luuid -lopae-c-ase
|
||||
|
||||
# stack execution protection
|
||||
LDFLAGS +=-z noexecstack
|
||||
|
||||
# data relocation and projection
|
||||
LDFLAGS +=-z relro -z now
|
||||
|
||||
# stack buffer overrun detection
|
||||
CXXFLAGS +=-fstack-protector
|
||||
|
||||
# Position independent code
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
# Add external configuration
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Dump perf stats
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SRCS += ../common/vx_scope.cpp
|
||||
SCOPE_H = scope-defs.h
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(OPAE_SYN_DIR)/vortex_afu.h:
|
||||
$(MAKE) -C $(OPAE_SYN_DIR) vortex_afu.h
|
||||
|
||||
scope-defs.h: $(SCRIPT_DIR)/scope.json
|
||||
$(SCRIPT_DIR)/scope.py $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
|
||||
|
||||
# generate scope data
|
||||
scope: scope-defs.h
|
||||
|
||||
$(PROJECT): $(SRCS) $(OPAE_SYN_DIR)/vortex_afu.h $(SCOPE_H)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) -o $(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o .depend scope-defs.h
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
|
@ -8,12 +8,13 @@
|
|||
#include <cmath>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <list>
|
||||
|
||||
#if defined(USE_FPGA) || defined(USE_ASE)
|
||||
#include <opae/fpga.h>
|
||||
#include <uuid/uuid.h>
|
||||
#elif defined(USE_VLSIM)
|
||||
#include "vlsim/fpga.h"
|
||||
#include <fpga.h>
|
||||
#endif
|
||||
|
||||
#include <vortex.h>
|
||||
|
@ -78,6 +79,34 @@ inline bool is_aligned(size_t addr, size_t alignment) {
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
class AutoPerfDump {
|
||||
private:
|
||||
std::list<vx_device_h> devices_;
|
||||
|
||||
public:
|
||||
AutoPerfDump() {}
|
||||
|
||||
~AutoPerfDump() {
|
||||
for (auto device : devices_) {
|
||||
vx_dump_perf(device, stdout);
|
||||
}
|
||||
}
|
||||
|
||||
void add_device(vx_device_h device) {
|
||||
devices_.push_back(device);
|
||||
}
|
||||
|
||||
void remove_device(vx_device_h device) {
|
||||
devices_.remove(device);
|
||||
}
|
||||
};
|
||||
|
||||
AutoPerfDump gAutoPerfDump;
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
@ -223,6 +252,10 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||
|
||||
*hdevice = device;
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
gAutoPerfDump.add_device(*hdevice);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -237,7 +270,8 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
#endif
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
vx_dump_perf(device, stdout);
|
||||
gAutoPerfDump.remove_device(hdevice);
|
||||
vx_dump_perf(hdevice, stdout);
|
||||
#endif
|
||||
|
||||
fpgaClose(device->fpga);
|
|
@ -1,3 +1,4 @@
|
|||
#include "vx_scope.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <thread>
|
||||
|
@ -7,17 +8,9 @@
|
|||
#include <chrono>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
|
||||
#ifdef USE_VLSIM
|
||||
#include "vlsim/fpga.h"
|
||||
#else
|
||||
#include <opae/fpga.h>
|
||||
#endif
|
||||
|
||||
#include <VX_config.h>
|
||||
#include "vx_scope.h"
|
||||
#include "vortex_afu.h"
|
||||
#include "scope-defs.h"
|
||||
#include <vortex_afu.h>
|
||||
#include <scope-defs.h>
|
||||
|
||||
#define FRAME_FLUSH_SIZE 100
|
||||
|
|
@ -1,5 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#ifdef USE_VLSIM
|
||||
#include <fpga.h>
|
||||
#else
|
||||
#include <opae/fpga.h>
|
||||
#endif
|
||||
|
||||
#if defined(USE_FPGA)
|
||||
#define HANG_TIMEOUT 60
|
||||
#else
|
76
driver/fpga/Makefile
Normal file
76
driver/fpga/Makefile
Normal file
|
@ -0,0 +1,76 @@
|
|||
OPAE_HOME ?= /tools/opae/1.4.0
|
||||
|
||||
RTL_DIR=../../hw/rtl
|
||||
|
||||
SCRIPT_DIR=../../hw/scripts
|
||||
|
||||
OPAE_SYN_DIR=../../hw/syn/opae
|
||||
|
||||
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I. -I../include -I../../hw -I$(OPAE_HOME)/include -I$(OPAE_SYN_DIR)
|
||||
|
||||
LDFLAGS += -L$(OPAE_HOME)/lib -luuid -lopae-c
|
||||
|
||||
#SCOPE=1
|
||||
|
||||
# stack execution protection
|
||||
LDFLAGS +=-z noexecstack
|
||||
|
||||
# data relocation and projection
|
||||
LDFLAGS +=-z relro -z now
|
||||
|
||||
# stack buffer overrun detection
|
||||
CXXFLAGS +=-fstack-protector
|
||||
|
||||
# Position independent code
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
# Add external configuration
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Dump perf stats
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SRCS += ../common/vx_scope.cpp
|
||||
SCOPE_H = scope-defs.h
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(OPAE_SYN_DIR)/vortex_afu.h:
|
||||
$(MAKE) -C $(OPAE_SYN_DIR) vortex_afu.h
|
||||
|
||||
scope-defs.h: $(SCRIPT_DIR)/scope.json
|
||||
$(SCRIPT_DIR)/scope.py $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
|
||||
|
||||
# generate scope data
|
||||
scope: scope-defs.h
|
||||
|
||||
$(PROJECT): $(SRCS) $(OPAE_SYN_DIR)/vortex_afu.h $(SCOPE_H)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) -o $(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o .depend scope-defs.h
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
|
@ -1,116 +0,0 @@
|
|||
OPAE_HOME ?= /tools/opae/1.4.0
|
||||
|
||||
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw
|
||||
|
||||
LDFLAGS += -L$(OPAE_HOME)/lib
|
||||
|
||||
#SCOPE=1
|
||||
|
||||
# stack execution protection
|
||||
LDFLAGS +=-z noexecstack
|
||||
|
||||
# data relocation and projection
|
||||
LDFLAGS +=-z relro -z now
|
||||
|
||||
# stack buffer overrun detection
|
||||
CXXFLAGS +=-fstack-protector
|
||||
|
||||
# Position independent code
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
# Add external configuration
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Dump perf stats
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared
|
||||
|
||||
FPGA_LIBS += -luuid -lopae-c
|
||||
|
||||
ASE_LIBS += -luuid -lopae-c-ase
|
||||
|
||||
VLSIM_LIBS += -lopae-c-vlsim
|
||||
|
||||
ASE_DIR = ase
|
||||
|
||||
VLSIM_DIR = vlsim
|
||||
|
||||
RTL_DIR=../../hw/rtl
|
||||
|
||||
SCRIPT_DIR=../../hw/scripts
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
PROJECT_ASE = $(ASE_DIR)/libvortex.so
|
||||
|
||||
PROJECT_VLSIM = $(VLSIM_DIR)/libvortex.so
|
||||
|
||||
AFU_JSON_INFO = vortex_afu.h
|
||||
|
||||
SRCS = vortex.cpp ../common/vx_utils.cpp
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SRCS += vx_scope.cpp
|
||||
SCOPE_ENABLE = SCOPE=1
|
||||
SCOPE_H = scope-defs.h
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
PERF_ENABLE = PERF=1
|
||||
endif
|
||||
|
||||
all: vlsim
|
||||
|
||||
# AFU info from JSON file, including AFU UUID
|
||||
json: ../../hw/opae/vortex_afu.json
|
||||
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
|
||||
|
||||
scope-defs.h: $(SCRIPT_DIR)/scope.json
|
||||
$(SCRIPT_DIR)/scope.py $(RTL_INCLUDE) $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
|
||||
|
||||
# generate scope data
|
||||
scope: scope-defs.h
|
||||
|
||||
vlsim-hw: $(SCOPE_H)
|
||||
$(SCOPE_ENABLE) $(PERF_ENABLE) $(MAKE) -C vlsim
|
||||
|
||||
fpga: $(SRCS) $(SCOPE_H)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
|
||||
|
||||
asesim: $(SRCS) $(ASE_DIR) $(SCOPE_H)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
|
||||
|
||||
vlsim: $(SRCS) vlsim-hw
|
||||
$(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -L./vlsim $(VLSIM_LIBS) -o $(PROJECT_VLSIM)
|
||||
|
||||
vortex.o: vortex.cpp
|
||||
$(CXX) $(CXXFLAGS) -c vortex.cpp -o $@
|
||||
|
||||
$(ASE_DIR):
|
||||
mkdir -p ase
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
|
||||
|
||||
clean-fpga:
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
|
||||
clean-asesim:
|
||||
rm -rf $(PROJECT_ASE) *.o .depend
|
||||
|
||||
clean-vlsim:
|
||||
$(MAKE) -C vlsim clean
|
||||
|
||||
clean: clean-fpga clean-asesim clean-vlsim
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
|
@ -1,100 +0,0 @@
|
|||
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CFLAGS += -DUSE_VLSIM -fPIC -Wno-maybe-uninitialized
|
||||
CFLAGS += -I../../../../hw
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
CFLAGS += $(CONFIGS)
|
||||
CFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
# LDFLAGS += -dynamiclib -pthread
|
||||
|
||||
TOP = vortex_afu_shim
|
||||
|
||||
RTL_DIR=../../../hw/rtl
|
||||
DPI_DIR=../../../hw/dpi
|
||||
|
||||
SRCS = fpga.cpp opae_sim.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) $(TEX_INCLUDE)
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
|
||||
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
VL_FLAGS += -DSCOPE
|
||||
CFLAGS += -DSCOPE
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# use our OPAE shim
|
||||
VL_FLAGS += -DNOPAE
|
||||
CFLAGS += -DNOPAE
|
||||
|
||||
# ALU backend
|
||||
VL_FLAGS += -DIMUL_DPI
|
||||
VL_FLAGS += -DIDIV_DPI
|
||||
|
||||
# FPU backend
|
||||
FPU_CORE ?= FPU_DPI
|
||||
VL_FLAGS += -D$(FPU_CORE)
|
||||
|
||||
PROJECT = libopae-c-vlsim.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
vortex_afu.h : $(RTL_DIR)/afu/vortex_afu.vh
|
||||
../../../hw/scripts/gen_config.py -i $(RTL_DIR)/afu/vortex_afu.vh -o vortex_afu.h
|
||||
|
||||
$(PROJECT): $(SRCS) vortex_afu.h
|
||||
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
make -j -C obj_dir -f V$(TOP).mk
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh vortex_afu.h
|
|
@ -1,64 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
class RAM {
|
||||
private:
|
||||
|
||||
mutable uint8_t *mem_[(1 << 12)];
|
||||
|
||||
uint8_t *get(uint32_t address) const {
|
||||
uint32_t block_addr = address >> 20;
|
||||
uint32_t block_offset = address & 0x000FFFFF;
|
||||
if (mem_[block_addr] == NULL) {
|
||||
mem_[block_addr] = new uint8_t[(1 << 20)];
|
||||
}
|
||||
return mem_[block_addr] + block_offset;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
RAM() {
|
||||
for (uint32_t i = 0; i < (1 << 12); i++) {
|
||||
mem_[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
~RAM() {
|
||||
this->clear();
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return (1ull << 32);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (uint32_t i = 0; i < (1 << 12); i++) {
|
||||
if (mem_[i]) {
|
||||
delete [] mem_[i];
|
||||
mem_[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void read(uint32_t address, uint32_t length, uint8_t *data) const {
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
data[i] = *this->get(address + i);
|
||||
}
|
||||
}
|
||||
|
||||
void write(uint32_t address, uint32_t length, const uint8_t *data) {
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
*this->get(address + i) = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t& operator[](uint32_t address) {
|
||||
return *get(address);
|
||||
}
|
||||
|
||||
const uint8_t& operator[](uint32_t address) const {
|
||||
return *get(address);
|
||||
}
|
||||
};
|
|
@ -1,24 +0,0 @@
|
|||
//
|
||||
// Generated by afu_json_mgr from ../../hw/opae/vortex_afu.json
|
||||
//
|
||||
|
||||
#ifndef __AFU_JSON_INFO__
|
||||
#define __AFU_JSON_INFO__
|
||||
|
||||
#define AFU_ACCEL_NAME "vortex_afu"
|
||||
#define AFU_ACCEL_UUID "35F9452B-25C2-434C-93D5-6F8C60DB361C"
|
||||
#define AFU_IMAGE_CMD_MEM_READ 1
|
||||
#define AFU_IMAGE_CMD_MEM_WRITE 2
|
||||
#define AFU_IMAGE_CMD_RUN 3
|
||||
#define AFU_IMAGE_MMIO_CMD_TYPE 10
|
||||
#define AFU_IMAGE_MMIO_DATA_SIZE 16
|
||||
#define AFU_IMAGE_MMIO_IO_ADDR 12
|
||||
#define AFU_IMAGE_MMIO_MEM_ADDR 14
|
||||
#define AFU_IMAGE_MMIO_SCOPE_READ 20
|
||||
#define AFU_IMAGE_MMIO_SCOPE_WRITE 22
|
||||
#define AFU_IMAGE_MMIO_DEV_CAPS 24
|
||||
#define AFU_IMAGE_MMIO_STATUS 18
|
||||
#define AFU_IMAGE_POWER 0
|
||||
#define AFU_TOP_IFC "ccip_std_afu_avalon_mm"
|
||||
|
||||
#endif // __AFU_JSON_INFO__
|
|
@ -1,87 +1,38 @@
|
|||
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
RTLSIM_DIR = ../../sim/rtlsim
|
||||
|
||||
CFLAGS += -DUSE_RTLSIM -fPIC -Wno-maybe-uninitialized
|
||||
CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
|
||||
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
|
||||
CXXFLAGS += -I../include -I../../hw -I$(RTLSIM_DIR) -I$(RTLSIM_DIR)/../common
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
LDFLAGS += $(RTLSIM_DIR)/librtlsim.a
|
||||
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
# Position independent code
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
CFLAGS += $(CONFIGS)
|
||||
CFLAGS += -DDUMP_PERF_STATS
|
||||
# Add external configuration
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Dump perf stats
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
#LDFLAGS += -dynamiclib -pthread
|
||||
|
||||
TOP = Vortex
|
||||
|
||||
RTL_DIR = ../../hw/rtl
|
||||
DPI_DIR = ../../hw/dpi
|
||||
|
||||
SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) $(TEX_INCLUDE)
|
||||
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CFLAGS += -DNDEBUG
|
||||
endif
|
||||
SRCS = vortex.cpp ../common/vx_utils.cpp
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CFLAGS += -DPERF_ENABLE
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# ALU backend
|
||||
VL_FLAGS += -DIMUL_DPI
|
||||
VL_FLAGS += -DIDIV_DPI
|
||||
|
||||
# FPU backend
|
||||
FPU_CORE ?= FPU_DPI
|
||||
VL_FLAGS += -D$(FPU_CORE)
|
||||
|
||||
PROJECT = libvortex.so
|
||||
# PROJECT = libvortex.dylib
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
make -j -C obj_dir -f V$(TOP).mk
|
||||
$(MAKE) -C $(RTLSIM_DIR) static
|
||||
$(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $(PROJECT)
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir
|
||||
$(MAKE) -C $(RTLSIM_DIR) clean-static
|
||||
rm -rf $(PROJECT) *.o .depend
|
|
@ -1,64 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
class RAM {
|
||||
private:
|
||||
|
||||
mutable uint8_t *mem_[(1 << 12)];
|
||||
|
||||
uint8_t *get(uint32_t address) const {
|
||||
uint32_t block_addr = address >> 20;
|
||||
uint32_t block_offset = address & 0x000FFFFF;
|
||||
if (mem_[block_addr] == NULL) {
|
||||
mem_[block_addr] = new uint8_t[(1 << 20)];
|
||||
}
|
||||
return mem_[block_addr] + block_offset;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
RAM() {
|
||||
for (uint32_t i = 0; i < (1 << 12); i++) {
|
||||
mem_[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
~RAM() {
|
||||
this->clear();
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return (1ull << 32);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (uint32_t i = 0; i < (1 << 12); i++) {
|
||||
if (mem_[i]) {
|
||||
delete [] mem_[i];
|
||||
mem_[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void read(uint32_t address, uint32_t length, uint8_t *data) const {
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
data[i] = *this->get(address + i);
|
||||
}
|
||||
}
|
||||
|
||||
void write(uint32_t address, uint32_t length, const uint8_t *data) {
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
*this->get(address + i) = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t& operator[](uint32_t address) {
|
||||
return *get(address);
|
||||
}
|
||||
|
||||
const uint8_t& operator[](uint32_t address) const {
|
||||
return *get(address);
|
||||
}
|
||||
};
|
|
@ -8,20 +8,15 @@
|
|||
|
||||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
#include <ram.h>
|
||||
#include <mem.h>
|
||||
#include <util.h>
|
||||
#include <simulator.h>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline size_t align_size(size_t size, size_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
using namespace vortex;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device;
|
||||
|
||||
class vx_buffer {
|
||||
public:
|
||||
vx_buffer(size_t size, vx_device* device)
|
||||
|
@ -59,7 +54,7 @@ private:
|
|||
|
||||
class vx_device {
|
||||
public:
|
||||
vx_device() {
|
||||
vx_device() : ram_((1<<12), (1<<20)) {
|
||||
mem_allocation_ = ALLOC_BASE_ADDR;
|
||||
}
|
||||
|
||||
|
@ -84,16 +79,16 @@ public:
|
|||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
/*printf("VXDRV: upload %d bytes from 0x%lx to 0x%lx", size, (uint8_t*)src + src_offset, dest_addr);
|
||||
if (size <= 1024) {
|
||||
printf(": ");
|
||||
for (int i = asize-1; i >= 0; --i) {
|
||||
printf("%x", *((uint8_t*)src + src_offset + i));
|
||||
/*printf("VXDRV: upload %ld bytes from 0x%lx:", size, uintptr_t((uint8_t*)src + src_offset));
|
||||
for (int i = 0; i < (asize / CACHE_BLOCK_SIZE); ++i) {
|
||||
printf("\n0x%08lx=", dest_addr + i * CACHE_BLOCK_SIZE);
|
||||
for (int j = 0; j < CACHE_BLOCK_SIZE; ++j) {
|
||||
printf("%02x", *((uint8_t*)src + src_offset + i * CACHE_BLOCK_SIZE + CACHE_BLOCK_SIZE - 1 - j));
|
||||
}
|
||||
}
|
||||
printf("\n");*/
|
||||
|
||||
ram_.write(dest_addr, asize, (const uint8_t*)src + src_offset);
|
||||
ram_.write((const uint8_t*)src + src_offset, dest_addr, asize);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -102,13 +97,13 @@ public:
|
|||
if (src_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
|
||||
ram_.read((uint8_t*)dest + dest_offset, src_addr, asize);
|
||||
|
||||
/*printf("VXDRV: download %d bytes from 0x%lx to 0x%lx", size, src_addr, (uint8_t*)dest + dest_offset);
|
||||
if (size <= 1024) {
|
||||
printf(": ");
|
||||
for (int i = asize-1; i >= 0; --i) {
|
||||
printf("%x", *((uint8_t*)dest + dest_offset + i));
|
||||
/*printf("VXDRV: download %ld bytes to 0x%lx:", size, uintptr_t((uint8_t*)dest + dest_offset));
|
||||
for (int i = 0; i < (asize / CACHE_BLOCK_SIZE); ++i) {
|
||||
printf("\n0x%08lx=", src_addr + i * CACHE_BLOCK_SIZE);
|
||||
for (int j = 0; j < CACHE_BLOCK_SIZE; ++j) {
|
||||
printf("%02x", *((uint8_t*)dest + dest_offset + i * CACHE_BLOCK_SIZE + CACHE_BLOCK_SIZE - 1 - j));
|
||||
}
|
||||
}
|
||||
printf("\n");*/
|
||||
|
@ -154,6 +149,34 @@ private:
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
class AutoPerfDump {
|
||||
private:
|
||||
std::list<vx_device_h> devices_;
|
||||
|
||||
public:
|
||||
AutoPerfDump() {}
|
||||
|
||||
~AutoPerfDump() {
|
||||
for (auto device : devices_) {
|
||||
vx_dump_perf(device, stdout);
|
||||
}
|
||||
}
|
||||
|
||||
void add_device(vx_device_h device) {
|
||||
devices_.push_back(device);
|
||||
}
|
||||
|
||||
void remove_device(vx_device_h device) {
|
||||
devices_.remove(device);
|
||||
}
|
||||
};
|
||||
|
||||
AutoPerfDump gAutoPerfDump;
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
@ -198,6 +221,10 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||
|
||||
*hdevice = new vx_device();
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
gAutoPerfDump.add_device(*hdevice);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -208,7 +235,8 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
vx_dump_perf(device, stdout);
|
||||
gAutoPerfDump.remove_device(hdevice);
|
||||
vx_dump_perf(hdevice, stdout);
|
||||
#endif
|
||||
|
||||
delete device;
|
||||
|
|
|
@ -1,37 +1,29 @@
|
|||
PROJECT = libvortex.so
|
||||
#PROJECT = libvortex.dylib
|
||||
|
||||
SIMX_DIR = ../../simX
|
||||
SIMX_DIR = ../../sim/simX
|
||||
|
||||
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -DUSE_SIMX -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I../include -I../../hw -I$(SIMX_DIR)
|
||||
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I../include -I../../hw -I$(SIMX_DIR) -I$(SIMX_DIR)/../common
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
#LDFLAGS += -dynamiclib -pthread
|
||||
LDFLAGS += $(SIMX_DIR)/libsimX.a
|
||||
|
||||
SRCS = vortex.cpp ../common/vx_utils.cpp
|
||||
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/pipeline.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
|
||||
|
||||
# Debugigng
|
||||
ifndef DEBUG
|
||||
CXXFLAGS += -DNDEBUG
|
||||
endif
|
||||
PROJECT = libvortex.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(MAKE) -C $(SIMX_DIR) static
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
$(MAKE) -C $(SIMX_DIR) clean-static
|
||||
rm -rf $(PROJECT) *.o .depend
|
|
@ -10,15 +10,11 @@
|
|||
#include <vortex.h>
|
||||
#include <core.h>
|
||||
#include <VX_config.h>
|
||||
#include <util.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline size_t align_size(size_t size, size_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
using namespace vortex;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -74,7 +70,7 @@ public:
|
|||
mem_allocation_ = ALLOC_BASE_ADDR;
|
||||
mmu_.attach(ram_, 0, 0xffffffff);
|
||||
for (int i = 0; i < arch_.num_cores(); ++i) {
|
||||
cores_[i] = std::make_shared<vortex::Core>(arch_, decoder_, mmu_, i);
|
||||
cores_[i] = std::make_shared<Core>(arch_, decoder_, mmu_, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -101,7 +97,7 @@ public:
|
|||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
ram_.write(dest_addr, (const uint8_t*)src + src_offset, asize);
|
||||
ram_.write((const uint8_t*)src + src_offset, dest_addr, asize);
|
||||
|
||||
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
|
@ -116,7 +112,7 @@ public:
|
|||
if (src_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
ram_.read(src_addr, (uint8_t*)dest + dest_offset, asize);
|
||||
ram_.read((uint8_t*)dest + dest_offset, src_addr, asize);
|
||||
|
||||
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
|
@ -209,25 +205,57 @@ private:
|
|||
device->thread_proc();
|
||||
}
|
||||
|
||||
vortex::ArchDef arch_;
|
||||
vortex::Decoder decoder_;
|
||||
vortex::MemoryUnit mmu_;
|
||||
std::vector<std::shared_ptr<vortex::Core>> cores_;
|
||||
ArchDef arch_;
|
||||
Decoder decoder_;
|
||||
MemoryUnit mmu_;
|
||||
std::vector<std::shared_ptr<Core>> cores_;
|
||||
bool is_done_;
|
||||
bool is_running_;
|
||||
size_t mem_allocation_;
|
||||
std::thread thread_;
|
||||
vortex::RAM ram_;
|
||||
RAM ram_;
|
||||
std::mutex mutex_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
class AutoPerfDump {
|
||||
private:
|
||||
std::list<vx_device_h> devices_;
|
||||
|
||||
public:
|
||||
AutoPerfDump() {}
|
||||
|
||||
~AutoPerfDump() {
|
||||
for (auto device : devices_) {
|
||||
vx_dump_perf(device, stdout);
|
||||
}
|
||||
}
|
||||
|
||||
void add_device(vx_device_h device) {
|
||||
devices_.push_back(device);
|
||||
}
|
||||
|
||||
void remove_device(vx_device_h device) {
|
||||
devices_.remove(device);
|
||||
}
|
||||
};
|
||||
|
||||
AutoPerfDump gAutoPerfDump;
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
*hdevice = new vx_device();
|
||||
*hdevice = new vx_device();
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
gAutoPerfDump.add_device(*hdevice);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -239,7 +267,8 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
vx_dump_perf(device, stdout);
|
||||
gAutoPerfDump.remove_device(hdevice);
|
||||
vx_dump_perf(hdevice, stdout);
|
||||
#endif
|
||||
|
||||
delete device;
|
||||
|
|
62
driver/vlsim/Makefile
Normal file
62
driver/vlsim/Makefile
Normal file
|
@ -0,0 +1,62 @@
|
|||
VLSIM_DIR = ../../sim/vlsim
|
||||
|
||||
RTL_DIR=../../hw/rtl
|
||||
|
||||
SCRIPT_DIR=../../hw/scripts
|
||||
|
||||
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I. -I../include -I../../hw -I$(VLSIM_DIR)
|
||||
|
||||
LDFLAGS += $(VLSIM_DIR)/libopae-c-vlsim.a
|
||||
|
||||
# Position independent code
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
# Add external configuration
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Dump perf stats
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
|
||||
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SRCS += ../common/vx_scope.cpp
|
||||
SCOPE_H = scope-defs.h
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
scope-defs.h: $(SCRIPT_DIR)/scope.json
|
||||
$(SCRIPT_DIR)/scope.py $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
|
||||
|
||||
# generate scope data
|
||||
scope: scope-defs.h
|
||||
|
||||
$(PROJECT): $(SRCS) $(SCOPE_H)
|
||||
$(SCOPE_ENABLE) $(PERF_ENABLE) $(MAKE) -C $(VLSIM_DIR) static
|
||||
$(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -o $(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
|
||||
|
||||
clean:
|
||||
$(MAKE) -C $(VLSIM_DIR) clean-static
|
||||
rm -rf $(PROJECT) *.o .depend scope-defs.h
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
15
hw/Makefile
15
hw/Makefile
|
@ -1,9 +1,12 @@
|
|||
.PHONY: build_config
|
||||
RTL_DIR=./rtl
|
||||
SCRIPT_DIR=./scripts
|
||||
|
||||
build_config: ./rtl/VX_config.vh
|
||||
./scripts/gen_config.py -i ./rtl/VX_config.vh -o ./VX_config.h
|
||||
$(MAKE) -C simulate
|
||||
all: VX_config.h
|
||||
|
||||
VX_config.h: $(RTL_DIR)/VX_config.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_config.vh -o VX_config.h
|
||||
|
||||
clean:
|
||||
rm -f ./VX_config.h
|
||||
$(MAKE) -C simulate clean
|
||||
rm -f VX_config.h
|
||||
|
||||
.PHONY: VX_config.h
|
2
hw/configs/.gitignore
vendored
2
hw/configs/.gitignore
vendored
|
@ -1,2 +0,0 @@
|
|||
*.v
|
||||
*.sh
|
|
@ -4,293 +4,168 @@
|
|||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <iostream>
|
||||
#include <rvfloats.h>
|
||||
#include "svdpi.h"
|
||||
#include "verilated_vpi.h"
|
||||
#include "VX_config.h"
|
||||
|
||||
extern "C" {
|
||||
void dpi_fadd(int a, int b, int frm, int* result, int* fflags);
|
||||
void dpi_fsub(int a, int b, int frm, int* result, int* fflags);
|
||||
void dpi_fmul(int a, int b, int frm, int* result, int* fflags);
|
||||
void dpi_fmadd(int a, int b, int c, int frm, int* result, int* fflags);
|
||||
void dpi_fmsub(int a, int b, int c, int frm, int* result, int* fflags);
|
||||
void dpi_fnmadd(int a, int b, int c, int frm, int* result, int* fflags);
|
||||
void dpi_fnmsub(int a, int b, int c, int frm, int* result, int* fflags);
|
||||
void dpi_fadd(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fsub(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fmul(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fnmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fnmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
|
||||
void dpi_fdiv(int a, int b, int frm, int* result, int* fflags);
|
||||
void dpi_fsqrt(int a, int frm, int* result, int* fflags);
|
||||
void dpi_fdiv(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fsqrt(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
|
||||
void dpi_ftoi(int a, int frm, int* result, int* fflags);
|
||||
void dpi_ftou(int a, int frm, int* result, int* fflags);
|
||||
void dpi_itof(int a, int frm, int* result, int* fflags);
|
||||
void dpi_utof(int a, int frm, int* result, int* fflags);
|
||||
void dpi_ftoi(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_ftou(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_itof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_utof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
|
||||
void dpi_fclss(int a, int* result);
|
||||
void dpi_fsgnj(int a, int b, int* result);
|
||||
void dpi_fsgnjn(int a, int b, int* result);
|
||||
void dpi_fsgnjx(int a, int b, int* result);
|
||||
void dpi_fclss(bool enable, int a, int* result);
|
||||
void dpi_fsgnj(bool enable, int a, int b, int* result);
|
||||
void dpi_fsgnjn(bool enable, int a, int b, int* result);
|
||||
void dpi_fsgnjx(bool enable, int a, int b, int* result);
|
||||
|
||||
void dpi_flt(int a, int b, int* result, int* fflags);
|
||||
void dpi_fle(int a, int b, int* result, int* fflags);
|
||||
void dpi_feq(int a, int b, int* result, int* fflags);
|
||||
void dpi_fmin(int a, int b, int* result, int* fflags);
|
||||
void dpi_fmax(int a, int b, int* result, int* fflags);
|
||||
void dpi_flt(bool enable, int a, int b, int* result, svBitVecVal* fflags);
|
||||
void dpi_fle(bool enable, int a, int b, int* result, svBitVecVal* fflags);
|
||||
void dpi_feq(bool enable, int a, int b, int* result, svBitVecVal* fflags);
|
||||
void dpi_fmin(bool enable, int a, int b, int* result, svBitVecVal* fflags);
|
||||
void dpi_fmax(bool enable, int a, int b, int* result, svBitVecVal* fflags);
|
||||
}
|
||||
|
||||
union Float_t {
|
||||
float f;
|
||||
int i;
|
||||
struct {
|
||||
uint32_t man : 23;
|
||||
uint32_t exp : 8;
|
||||
uint32_t sign : 1;
|
||||
} parts;
|
||||
};
|
||||
|
||||
void dpi_fadd(int a, int b, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f + fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fadd(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fadd(a, b, (*frm & 0x7), fflags);
|
||||
}
|
||||
|
||||
void dpi_fsub(int a, int b, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f - fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fsub(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fsub(a, b, (*frm & 0x7), fflags);
|
||||
}
|
||||
|
||||
void dpi_fmul(int a, int b, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f * fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fmul(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fmul(a, b, (*frm & 0x7), fflags);
|
||||
}
|
||||
|
||||
void dpi_fmadd(int a, int b, int c, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = fa.f * fb.f + fc.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fmadd(a, b, c, (*frm & 0x7), fflags);
|
||||
}
|
||||
|
||||
void dpi_fmsub(int a, int b, int c, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = fa.f * fb.f - fc.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fmsub(a, b, c, (*frm & 0x7), fflags);
|
||||
}
|
||||
|
||||
void dpi_fnmadd(int a, int b, int c, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = -(fa.f * fb.f + fc.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fnmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fnmadd(a, b, c, (*frm & 0x7), fflags);
|
||||
}
|
||||
|
||||
void dpi_fnmsub(int a, int b, int c, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = -(fa.f * fb.f - fc.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fnmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fnmsub(a, b, c, (*frm & 0x7), fflags);
|
||||
}
|
||||
|
||||
void dpi_fdiv(int a, int b, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f / fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fdiv(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fdiv(a, b, (*frm & 0x7), fflags);
|
||||
}
|
||||
|
||||
void dpi_fsqrt(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
fa.i = a;
|
||||
fr.f = sqrtf(fa.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fsqrt(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fsqrt(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
|
||||
void dpi_ftoi(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
fa.i = a;
|
||||
fr.i = int(fa.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_ftoi(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_ftoi(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
|
||||
void dpi_ftou(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
fa.i = a;
|
||||
fr.i = unsigned(fa.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_ftou(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_ftou(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
|
||||
void dpi_itof(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
fr.f = (float)a;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_itof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_itof(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
|
||||
void dpi_utof(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
unsigned ua = a;
|
||||
fr.f = (float)ua;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_utof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_utof(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
|
||||
void dpi_flt(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f < fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_flt(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_flt(a, b, fflags);
|
||||
}
|
||||
|
||||
void dpi_fle(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f <= fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fle(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fle(a, b, fflags);
|
||||
}
|
||||
|
||||
void dpi_feq(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f == fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_feq(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_feq(a, b, fflags);
|
||||
}
|
||||
|
||||
void dpi_fmin(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = std::min<float>(fa.f, fb.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fmin(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fmin(a, b, fflags);
|
||||
}
|
||||
|
||||
void dpi_fmax(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = std::max<float>(fa.f, fb.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fmax(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fmax(a, b, fflags);
|
||||
}
|
||||
|
||||
void dpi_fclss(int a, int* result) {
|
||||
|
||||
int r = 0; // clear all bits
|
||||
|
||||
bool fsign = (a >> 31);
|
||||
uint32_t expo = (a >> 23) & 0xFF;
|
||||
uint32_t fraction = a & 0x7FFFFF;
|
||||
|
||||
if ((expo == 0) && (fraction == 0)) {
|
||||
r = fsign ? (1 << 3) : (1 << 4); // +/- 0
|
||||
} else if ((expo == 0) && (fraction != 0)) {
|
||||
r = fsign ? (1 << 2) : (1 << 5); // +/- subnormal
|
||||
} else if ((expo == 0xFF) && (fraction == 0)) {
|
||||
r = fsign ? (1<<0) : (1<<7); // +/- infinity
|
||||
} else if ((expo == 0xFF ) && (fraction != 0)) {
|
||||
if (!fsign && (fraction == 0x00400000)) {
|
||||
r = (1 << 9); // quiet NaN
|
||||
} else {
|
||||
r = (1 << 8); // signaling NaN
|
||||
}
|
||||
} else {
|
||||
r = fsign ? (1 << 1) : (1 << 6); // +/- normal
|
||||
}
|
||||
|
||||
*result = r;
|
||||
void dpi_fclss(bool enable, int a, int* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fclss(a);
|
||||
}
|
||||
|
||||
void dpi_fsgnj(int a, int b, int* result) {
|
||||
|
||||
int sign = b & 0x80000000;
|
||||
int r = sign | (a & 0x7FFFFFFF);
|
||||
|
||||
*result = r;
|
||||
void dpi_fsgnj(bool enable, int a, int b, int* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fsgnj(a, b);
|
||||
}
|
||||
|
||||
void dpi_fsgnjn(int a, int b, int* result) {
|
||||
|
||||
int sign = ~b & 0x80000000;
|
||||
int r = sign | (a & 0x7FFFFFFF);
|
||||
|
||||
*result = r;
|
||||
void dpi_fsgnjn(bool enable, int a, int b, int* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fsgnjn(a, b);
|
||||
}
|
||||
|
||||
void dpi_fsgnjx(int a, int b, int* result) {
|
||||
|
||||
int sign1 = a & 0x80000000;
|
||||
int sign2 = b & 0x80000000;
|
||||
int r = (sign1 ^ sign2) | (a & 0x7FFFFFFF);
|
||||
|
||||
*result = r;
|
||||
void dpi_fsgnjx(bool enable, int a, int b, int* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fsgnjx(a, b);
|
||||
}
|
|
@ -1,31 +1,31 @@
|
|||
`ifndef FLOAT_DPI
|
||||
`define FLOAT_DPI
|
||||
|
||||
import "DPI-C" context function void dpi_fadd(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fsub(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmul(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmadd(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmsub(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fnmadd(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fnmsub(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fadd(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fsub(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmul(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmadd(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmsub(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fnmadd(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fnmsub(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
|
||||
import "DPI-C" context function void dpi_fdiv(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fsqrt(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fdiv(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fsqrt(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
|
||||
import "DPI-C" context function void dpi_ftoi(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_ftou(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_itof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_ftoi(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_ftou(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_itof(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_utof(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
|
||||
import "DPI-C" context function void dpi_fclss(input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnj(input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjn(input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjx(input int a, input int b, output int result);
|
||||
import "DPI-C" function void dpi_fclss(input logic enable, input int a, output int result);
|
||||
import "DPI-C" function void dpi_fsgnj(input logic enable, input int a, input int b, output int result);
|
||||
import "DPI-C" function void dpi_fsgnjn(input logic enable, input int a, input int b, output int result);
|
||||
import "DPI-C" function void dpi_fsgnjx(input logic enable, input int a, input int b, output int result);
|
||||
|
||||
import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_feq(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmin(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmax(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_flt(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fle(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_feq(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmin(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmax(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
|
||||
`endif
|
|
@ -9,13 +9,20 @@
|
|||
#include "VX_config.h"
|
||||
|
||||
extern "C" {
|
||||
void dpi_imul(int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth);
|
||||
void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder);
|
||||
void dpi_imul(bool enable, int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth);
|
||||
void dpi_idiv(bool enable, int a, int b, bool is_signed, int* quotient, int* remainder);
|
||||
|
||||
int dpi_register();
|
||||
void dpi_assert(int inst, bool cond, int delay);
|
||||
|
||||
void dpi_trace(const char* format, ...);
|
||||
void dpi_trace_start();
|
||||
void dpi_trace_stop();
|
||||
}
|
||||
|
||||
bool sim_trace_enabled();
|
||||
void sim_trace_enable(bool enable);
|
||||
|
||||
class ShiftRegister {
|
||||
public:
|
||||
ShiftRegister() : init_(false), depth_(0) {}
|
||||
|
@ -86,15 +93,18 @@ void dpi_assert(int inst, bool cond, int delay) {
|
|||
}
|
||||
}
|
||||
|
||||
void dpi_imul(int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth) {
|
||||
uint64_t first = a;
|
||||
uint64_t second = b;
|
||||
void dpi_imul(bool enable, int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth) {
|
||||
if (!enable)
|
||||
return;
|
||||
|
||||
if (is_signed_a && (a & 0x80000000)) {
|
||||
uint64_t first = *(uint32_t*)&a;
|
||||
uint64_t second = *(uint32_t*)&b;
|
||||
|
||||
if (is_signed_a && (first & 0x80000000)) {
|
||||
first |= 0xFFFFFFFF00000000;
|
||||
}
|
||||
|
||||
if (is_signed_b && (b & 0x80000000)) {
|
||||
if (is_signed_b && (second & 0x80000000)) {
|
||||
second |= 0xFFFFFFFF00000000;
|
||||
}
|
||||
|
||||
|
@ -109,9 +119,12 @@ void dpi_imul(int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, in
|
|||
*resulth = (result >> 32) & 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder) {
|
||||
uint32_t dividen = a;
|
||||
uint32_t divisor = b;
|
||||
void dpi_idiv(bool enable, int a, int b, bool is_signed, int* quotient, int* remainder) {
|
||||
if (!enable)
|
||||
return;
|
||||
|
||||
uint32_t dividen = *(uint32_t*)&a;
|
||||
uint32_t divisor = *(uint32_t*)&b;
|
||||
|
||||
if (is_signed) {
|
||||
if (b == 0) {
|
||||
|
@ -133,4 +146,21 @@ void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder) {
|
|||
*remainder = dividen % divisor;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_trace(const char* format, ...) {
|
||||
if (!sim_trace_enabled())
|
||||
return;
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
vprintf(format, va);
|
||||
va_end(va);
|
||||
}
|
||||
|
||||
void dpi_trace_start() {
|
||||
sim_trace_enable(true);
|
||||
}
|
||||
|
||||
void dpi_trace_stop() {
|
||||
sim_trace_enable(false);
|
||||
}
|
|
@ -1,10 +1,14 @@
|
|||
`ifndef UTIL_DPI
|
||||
`define UTIL_DPI
|
||||
|
||||
import "DPI-C" context function void dpi_imul(input int a, input int b, input logic is_signed_a, input logic is_signed_b, output int resultl, output int resulth);
|
||||
import "DPI-C" context function void dpi_idiv(input int a, input int b, input logic is_signed, output int quotient, output int remainder);
|
||||
import "DPI-C" function void dpi_imul(input logic enable, input int a, input int b, input logic is_signed_a, input logic is_signed_b, output int resultl, output int resulth);
|
||||
import "DPI-C" function void dpi_idiv(input logic enable, input int a, input int b, input logic is_signed, output int quotient, output int remainder);
|
||||
|
||||
import "DPI-C" context function int dpi_register();
|
||||
import "DPI-C" context function void dpi_assert(int inst, input logic cond, input int delay);
|
||||
import "DPI-C" function int dpi_register();
|
||||
import "DPI-C" function void dpi_assert(int inst, input logic cond, input int delay);
|
||||
|
||||
import "DPI-C" function void dpi_trace(input string format /*verilator sformat*/);
|
||||
import "DPI-C" function void dpi_trace_start();
|
||||
import "DPI-C" function void dpi_trace_stop();
|
||||
|
||||
`endif
|
|
@ -3,15 +3,15 @@
|
|||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_alu_req_if.slave alu_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_commit_if alu_commit_if
|
||||
VX_branch_ctl_if.master branch_ctl_if,
|
||||
VX_commit_if.master alu_commit_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
@ -22,15 +22,15 @@ module VX_alu_unit #(
|
|||
wire [`NUM_THREADS-1:0][31:0] shr_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] msc_result;
|
||||
|
||||
wire stall_in, stall_out;
|
||||
wire ready_in;
|
||||
|
||||
`UNUSED_VAR (alu_req_if.op_mod)
|
||||
wire is_br_op = `ALU_IS_BR(alu_req_if.op_mod);
|
||||
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op_type);
|
||||
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op_type);
|
||||
wire alu_signed = `ALU_SIGNED(alu_op);
|
||||
wire [1:0] alu_op_class = `ALU_OP_CLASS(alu_op);
|
||||
wire is_sub = (alu_op == `ALU_SUB);
|
||||
wire is_br_op = `INST_ALU_IS_BR(alu_req_if.op_mod);
|
||||
wire [`INST_ALU_BITS-1:0] alu_op = `INST_ALU_BITS'(alu_req_if.op_type);
|
||||
wire [`INST_BR_BITS-1:0] br_op = `INST_BR_BITS'(alu_req_if.op_type);
|
||||
wire alu_signed = `INST_ALU_SIGNED(alu_op);
|
||||
wire [1:0] alu_op_class = `INST_ALU_OP_CLASS(alu_op);
|
||||
wire is_sub = (alu_op == `INST_ALU_SUB);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
||||
|
@ -57,10 +57,10 @@ module VX_alu_unit #(
|
|||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||
`ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||
`ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
//`ALU_SLL,
|
||||
`INST_ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||
`INST_ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
//`INST_ALU_SLL,
|
||||
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
|
||||
endcase
|
||||
end
|
||||
|
@ -81,7 +81,7 @@ module VX_alu_unit #(
|
|||
|
||||
// branch
|
||||
|
||||
wire is_jal = is_br_op && (br_op == `BR_JAL || br_op == `BR_JALR);
|
||||
wire is_jal = is_br_op && (br_op == `INST_BR_JAL || br_op == `INST_BR_JALR);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
|
||||
|
||||
wire [31:0] br_dest = add_result[alu_req_if.tid];
|
||||
|
@ -90,24 +90,51 @@ module VX_alu_unit #(
|
|||
wire is_less = cmp_result[32];
|
||||
wire is_equal = ~(| cmp_result[31:0]);
|
||||
|
||||
wire br_neg = `BR_NEG(br_op);
|
||||
wire br_less = `BR_LESS(br_op);
|
||||
wire br_static = `BR_STATIC(br_op);
|
||||
wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static;
|
||||
|
||||
// output
|
||||
|
||||
wire result_valid;
|
||||
wire [`NW_BITS-1:0] result_wid;
|
||||
wire [`NUM_THREADS-1:0] result_tmask;
|
||||
wire [31:0] result_PC;
|
||||
wire [`NR_BITS-1:0] result_rd;
|
||||
wire result_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] result_data;
|
||||
wire result_is_br;
|
||||
wire alu_valid_in;
|
||||
wire alu_ready_in;
|
||||
wire alu_valid_out;
|
||||
wire alu_ready_out;
|
||||
wire [`NW_BITS-1:0] alu_wid;
|
||||
wire [`NUM_THREADS-1:0] alu_tmask;
|
||||
wire [31:0] alu_PC;
|
||||
wire [`NR_BITS-1:0] alu_rd;
|
||||
wire alu_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_data;
|
||||
|
||||
wire [`INST_BR_BITS-1:0] br_op_r;
|
||||
wire [31:0] br_dest_r;
|
||||
wire is_less_r;
|
||||
wire is_equal_r;
|
||||
wire is_br_op_r;
|
||||
|
||||
assign alu_ready_in = alu_ready_out || ~alu_valid_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (alu_ready_in),
|
||||
.data_in ({alu_valid_in, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, is_less, is_equal, br_dest}),
|
||||
.data_out ({alu_valid_out, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, alu_data, is_br_op_r, br_op_r, is_less_r, is_equal_r, br_dest_r})
|
||||
);
|
||||
|
||||
`UNUSED_VAR (br_op_r)
|
||||
wire br_neg = `INST_BR_NEG(br_op_r);
|
||||
wire br_less = `INST_BR_LESS(br_op_r);
|
||||
wire br_static = `INST_BR_STATIC(br_op_r);
|
||||
|
||||
assign branch_ctl_if.valid = alu_valid_out && alu_ready_out && is_br_op_r;
|
||||
assign branch_ctl_if.taken = ((br_less ? is_less_r : is_equal_r) ^ br_neg) | br_static;
|
||||
assign branch_ctl_if.wid = alu_wid;
|
||||
assign branch_ctl_if.dest = br_dest_r;
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
wire mul_valid_in;
|
||||
wire mul_ready_in;
|
||||
wire mul_valid_out;
|
||||
wire mul_ready_out;
|
||||
|
@ -118,14 +145,14 @@ module VX_alu_unit #(
|
|||
wire mul_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_data;
|
||||
|
||||
wire is_mul_op = `ALU_IS_MUL(alu_req_if.op_mod);
|
||||
|
||||
wire [`INST_MUL_BITS-1:0] mul_op = `INST_MUL_BITS'(alu_req_if.op_type);
|
||||
|
||||
VX_muldiv muldiv (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Inputs
|
||||
.alu_op (`MUL_OP(alu_req_if.op_type)),
|
||||
.alu_op (mul_op),
|
||||
.wid_in (alu_req_if.wid),
|
||||
.tmask_in (alu_req_if.tmask),
|
||||
.PC_in (alu_req_if.PC),
|
||||
|
@ -143,69 +170,58 @@ module VX_alu_unit #(
|
|||
.data_out (mul_data),
|
||||
|
||||
// handshake
|
||||
.valid_in (alu_req_if.valid && is_mul_op),
|
||||
.valid_in (mul_valid_in),
|
||||
.ready_in (mul_ready_in),
|
||||
.valid_out (mul_valid_out),
|
||||
.ready_out (mul_ready_out)
|
||||
);
|
||||
|
||||
assign stall_in = (is_mul_op && ~mul_ready_in)
|
||||
|| (~is_mul_op && (mul_valid_out || stall_out));
|
||||
|
||||
assign mul_ready_out = ~stall_out;
|
||||
wire is_mul_op = `INST_ALU_IS_MUL(alu_req_if.op_mod);
|
||||
|
||||
assign result_valid = mul_valid_out || (alu_req_if.valid && ~is_mul_op);
|
||||
assign result_wid = mul_valid_out ? mul_wid : alu_req_if.wid;
|
||||
assign result_tmask = mul_valid_out ? mul_tmask : alu_req_if.tmask;
|
||||
assign result_PC = mul_valid_out ? mul_PC : alu_req_if.PC;
|
||||
assign result_rd = mul_valid_out ? mul_rd : alu_req_if.rd;
|
||||
assign result_wb = mul_valid_out ? mul_wb : alu_req_if.wb;
|
||||
assign result_data = mul_valid_out ? mul_data : alu_jal_result;
|
||||
assign result_is_br = ~mul_valid_out && is_br_op;
|
||||
assign ready_in = is_mul_op ? mul_ready_in : alu_ready_in;
|
||||
|
||||
assign alu_valid_in = alu_req_if.valid && ~is_mul_op;
|
||||
assign mul_valid_in = alu_req_if.valid && is_mul_op;
|
||||
|
||||
assign alu_commit_if.valid = alu_valid_out || mul_valid_out;
|
||||
assign alu_commit_if.wid = alu_valid_out ? alu_wid : mul_wid;
|
||||
assign alu_commit_if.tmask = alu_valid_out ? alu_tmask : mul_tmask;
|
||||
assign alu_commit_if.PC = alu_valid_out ? alu_PC : mul_PC;
|
||||
assign alu_commit_if.rd = alu_valid_out ? alu_rd : mul_rd;
|
||||
assign alu_commit_if.wb = alu_valid_out ? alu_wb : mul_wb;
|
||||
assign alu_commit_if.data = alu_valid_out ? alu_data : mul_data;
|
||||
|
||||
assign alu_ready_out = alu_commit_if.ready;
|
||||
assign mul_ready_out = alu_commit_if.ready & ~alu_valid_out; // ALU takes priority
|
||||
|
||||
`else
|
||||
|
||||
assign stall_in = stall_out;
|
||||
assign ready_in = alu_ready_in;
|
||||
|
||||
assign result_valid = alu_req_if.valid;
|
||||
assign result_wid = alu_req_if.wid;
|
||||
assign result_tmask = alu_req_if.tmask;
|
||||
assign result_PC = alu_req_if.PC;
|
||||
assign result_rd = alu_req_if.rd;
|
||||
assign result_wb = alu_req_if.wb;
|
||||
assign result_data = alu_jal_result;
|
||||
assign result_is_br = is_br_op;
|
||||
assign alu_valid_in = alu_req_if.valid;
|
||||
|
||||
assign alu_commit_if.valid = alu_valid_out;
|
||||
assign alu_commit_if.wid = alu_wid;
|
||||
assign alu_commit_if.tmask = alu_tmask;
|
||||
assign alu_commit_if.PC = alu_PC;
|
||||
assign alu_commit_if.rd = alu_rd;
|
||||
assign alu_commit_if.wb = alu_wb;
|
||||
assign alu_commit_if.data = alu_data;
|
||||
|
||||
assign alu_ready_out = alu_commit_if.ready;
|
||||
|
||||
`endif
|
||||
|
||||
wire is_br_op_r;
|
||||
|
||||
assign stall_out = ~alu_commit_if.ready && alu_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + 1 + 32),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({result_valid, result_wid, result_tmask, result_PC, result_rd, result_wb, result_data, result_is_br, br_taken, br_dest}),
|
||||
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
assign alu_commit_if.eop = 1'b1;
|
||||
|
||||
assign branch_ctl_if.valid = alu_commit_if.valid && alu_commit_if.ready && is_br_op_r;
|
||||
assign branch_ctl_if.wid = alu_commit_if.wid;
|
||||
|
||||
// can accept new request?
|
||||
assign alu_req_if.ready = ~stall_in;
|
||||
assign alu_req_if.ready = ready_in;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (branch_ctl_if.valid) begin
|
||||
$display("%t: core%0d-branch: wid=%0d, PC=%0h, taken=%b, dest=%0h", $time, CORE_ID,
|
||||
branch_ctl_if.wid, alu_commit_if.PC, branch_ctl_if.taken, branch_ctl_if.dest);
|
||||
dpi_trace("%d: core%0d-branch: wid=%0d, PC=%0h, taken=%b, dest=%0h\n",
|
||||
$time, CORE_ID, branch_ctl_if.wid, alu_commit_if.PC, branch_ctl_if.taken, branch_ctl_if.dest);
|
||||
end
|
||||
end
|
||||
`endif
|
|
@ -12,16 +12,16 @@ module VX_cluster #(
|
|||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [`L2MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`L2MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`L2MEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`L2MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire [`L2_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`L2_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`L2_MEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`L2_MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`L2MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`L2MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
input wire [`L2_MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`L2_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// Status
|
||||
|
@ -31,15 +31,15 @@ module VX_cluster #(
|
|||
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_valid;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_rw;
|
||||
wire [`NUM_CORES-1:0][`DMEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen;
|
||||
wire [`NUM_CORES-1:0][`DMEM_ADDR_WIDTH-1:0] per_core_mem_req_addr;
|
||||
wire [`NUM_CORES-1:0][`DMEM_DATA_WIDTH-1:0] per_core_mem_req_data;
|
||||
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_req_tag;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_ADDR_WIDTH-1:0] per_core_mem_req_addr;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_req_data;
|
||||
wire [`NUM_CORES-1:0][`L1_MEM_TAG_WIDTH-1:0] per_core_mem_req_tag;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_ready;
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_mem_rsp_valid;
|
||||
wire [`NUM_CORES-1:0][`DMEM_DATA_WIDTH-1:0] per_core_mem_rsp_data;
|
||||
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_rsp_data;
|
||||
wire [`NUM_CORES-1:0][`L1_MEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_rsp_ready;
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_busy;
|
||||
|
@ -69,7 +69,7 @@ module VX_cluster #(
|
|||
.mem_rsp_tag (per_core_mem_rsp_tag [i]),
|
||||
.mem_rsp_ready (per_core_mem_rsp_ready[i]),
|
||||
|
||||
.busy (per_core_busy [i])
|
||||
.busy (per_core_busy [i])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -83,21 +83,22 @@ module VX_cluster #(
|
|||
`RESET_RELAY (l2_reset);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L2CACHE_ID),
|
||||
.CACHE_SIZE (`L2CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L2CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L2NUM_BANKS),
|
||||
.WORD_SIZE (`L2WORD_SIZE),
|
||||
.NUM_REQS (`L2NUM_REQS),
|
||||
.CREQ_SIZE (`L2CREQ_SIZE),
|
||||
.CRSQ_SIZE (`L2CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L2MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L2MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L2MREQ_SIZE),
|
||||
.CACHE_ID (`L2_CACHE_ID),
|
||||
.CACHE_SIZE (`L2_CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L2_CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L2_NUM_BANKS),
|
||||
.NUM_PORTS (`L2_NUM_PORTS),
|
||||
.WORD_SIZE (`L2_WORD_SIZE),
|
||||
.NUM_REQS (`L2_NUM_REQS),
|
||||
.CREQ_SIZE (`L2_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`L2_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L2_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L2_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L2_MREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`XMEM_TAG_WIDTH),
|
||||
.CORE_TAG_WIDTH (`L1_MEM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.MEM_TAG_WIDTH (`L2MEM_TAG_WIDTH),
|
||||
.MEM_TAG_WIDTH (`L2_MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) l2cache (
|
||||
`SCOPE_BIND_VX_cluster_l2cache
|
||||
|
@ -143,16 +144,20 @@ module VX_cluster #(
|
|||
|
||||
end else begin
|
||||
|
||||
`RESET_RELAY (mem_arb_reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (`NUM_CORES),
|
||||
.DATA_WIDTH (`L2MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`L2MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`XMEM_TAG_WIDTH),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
.NUM_REQS (`NUM_CORES),
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`L1_MEM_TAG_WIDTH),
|
||||
.TYPE ("R"),
|
||||
.TAG_SEL_IDX (1), // Skip 0 for NC flag
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mem_arb_reset),
|
||||
|
||||
// Core request
|
||||
.req_valid_in (per_core_mem_req_valid),
|
130
hw/rtl/VX_commit.sv
Normal file
130
hw/rtl/VX_commit.sv
Normal file
|
@ -0,0 +1,130 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_commit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if.slave alu_commit_if,
|
||||
VX_commit_if.slave ld_commit_if,
|
||||
VX_commit_if.slave st_commit_if,
|
||||
VX_commit_if.slave csr_commit_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if.slave fpu_commit_if,
|
||||
`endif
|
||||
VX_commit_if.slave gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_writeback_if.master writeback_if,
|
||||
VX_cmt_to_csr_if.master cmt_to_csr_if
|
||||
);
|
||||
// CSRs update
|
||||
|
||||
wire alu_commit_fire = alu_commit_if.valid && alu_commit_if.ready;
|
||||
wire ld_commit_fire = ld_commit_if.valid && ld_commit_if.ready;
|
||||
wire st_commit_fire = st_commit_if.valid && st_commit_if.ready;
|
||||
wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready;
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready;
|
||||
`endif
|
||||
wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||
|
||||
wire commit_fire = alu_commit_fire
|
||||
|| ld_commit_fire
|
||||
|| st_commit_fire
|
||||
|| csr_commit_fire
|
||||
`ifdef EXT_F_ENABLE
|
||||
|| fpu_commit_fire
|
||||
`endif
|
||||
|| gpu_commit_fire;
|
||||
|
||||
wire [`NUM_THREADS-1:0] commit_tmask;
|
||||
assign commit_tmask = alu_commit_fire ? alu_commit_if.tmask:
|
||||
ld_commit_fire ? ld_commit_if.tmask:
|
||||
st_commit_fire ? st_commit_if.tmask:
|
||||
csr_commit_fire ? csr_commit_if.tmask:
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_fire ? fpu_commit_if.tmask:
|
||||
`endif
|
||||
/*gpu_commit_fire ?*/ gpu_commit_if.tmask;
|
||||
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] commit_cnt;
|
||||
`POP_COUNT(commit_cnt, commit_tmask);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + $clog2(`NUM_THREADS+1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({commit_fire, commit_cnt}),
|
||||
.data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
|
||||
);
|
||||
|
||||
// Writeback
|
||||
|
||||
VX_writeback #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) writeback (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.ld_commit_if (ld_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
`endif
|
||||
.writeback_if (writeback_if)
|
||||
);
|
||||
|
||||
// store and gpu commits don't writeback
|
||||
assign st_commit_if.ready = 1'b1;
|
||||
assign gpu_commit_if.ready = 1'b1;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_commit_if.valid && alu_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.wb, alu_commit_if.rd);
|
||||
`TRACE_ARRAY1D(alu_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
if (ld_commit_if.valid && ld_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.wb, ld_commit_if.rd);
|
||||
`TRACE_ARRAY1D(ld_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
if (st_commit_if.valid && st_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d\n", $time, CORE_ID, st_commit_if.wid, st_commit_if.PC, st_commit_if.tmask, st_commit_if.wb, st_commit_if.rd);
|
||||
end
|
||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd);
|
||||
`TRACE_ARRAY1D(csr_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.wb, fpu_commit_if.rd);
|
||||
`TRACE_ARRAY1D(fpu_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
`endif
|
||||
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=GPU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.wb, gpu_commit_if.rd);
|
||||
`TRACE_ARRAY1D(gpu_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,127 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_commit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if st_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_writeback_if writeback_if,
|
||||
VX_cmt_to_csr_if cmt_to_csr_if
|
||||
);
|
||||
localparam CMTW = $clog2(3*`NUM_THREADS+1);
|
||||
|
||||
// CSRs update
|
||||
|
||||
wire alu_commit_fire = alu_commit_if.valid && alu_commit_if.ready;
|
||||
wire ld_commit_fire = ld_commit_if.valid && ld_commit_if.ready;
|
||||
wire st_commit_fire = st_commit_if.valid && st_commit_if.ready;
|
||||
wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready;
|
||||
wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready;
|
||||
wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||
|
||||
wire commit_fire = alu_commit_fire
|
||||
|| ld_commit_fire
|
||||
|| st_commit_fire
|
||||
|| csr_commit_fire
|
||||
|| fpu_commit_fire
|
||||
|| gpu_commit_fire;
|
||||
|
||||
wire [`NUM_THREADS-1:0] commit_tmask1, commit_tmask2, commit_tmask3;
|
||||
|
||||
assign commit_tmask1 = alu_commit_fire ? alu_commit_if.tmask:
|
||||
ld_commit_fire ? ld_commit_if.tmask:
|
||||
csr_commit_fire ? csr_commit_if.tmask:
|
||||
fpu_commit_fire ? fpu_commit_if.tmask:
|
||||
0;
|
||||
|
||||
assign commit_tmask2 = st_commit_fire ? st_commit_if.tmask : 0;
|
||||
assign commit_tmask3 = gpu_commit_fire ? gpu_commit_if.tmask : 0;
|
||||
|
||||
wire [CMTW-1:0] commit_size;
|
||||
assign commit_size = $countones({commit_tmask3, commit_tmask2, commit_tmask1});
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + CMTW),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({commit_fire, commit_size}),
|
||||
.data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
|
||||
);
|
||||
|
||||
// Writeback
|
||||
|
||||
VX_writeback #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) writeback (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.ld_commit_if (ld_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
.writeback_if (writeback_if)
|
||||
);
|
||||
|
||||
// store doesn't writeback
|
||||
assign st_commit_if.ready = 1'b1;
|
||||
// assign gpu_commit_if.ready = 1'b1;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_commit_if.valid && alu_commit_if.ready) begin
|
||||
$write("%t: core%0d-commit: wid=%0d, PC=%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.wb, alu_commit_if.rd);
|
||||
`PRINT_ARRAY1D(alu_commit_if.data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (ld_commit_if.valid && ld_commit_if.ready) begin
|
||||
$write("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.wb, ld_commit_if.rd);
|
||||
`PRINT_ARRAY1D(ld_commit_if.data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (st_commit_if.valid && st_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d", $time, CORE_ID, st_commit_if.wid, st_commit_if.PC, st_commit_if.tmask, st_commit_if.wb, st_commit_if.rd);
|
||||
end
|
||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||
$write("%t: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd);
|
||||
`PRINT_ARRAY1D(csr_commit_if.data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
||||
$write("%t: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.wb, fpu_commit_if.rd);
|
||||
`PRINT_ARRAY1D(fpu_commit_if.data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
|
||||
$write("%t: core%0d-commit: wid=%0d, PC=%0h, ex=GPU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.wb, gpu_commit_if.rd);
|
||||
`PRINT_ARRAY1D(gpu_commit_if.data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
`else
|
||||
`UNUSED_VAR (fpu_commit_if.PC)
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -38,7 +38,7 @@
|
|||
`endif
|
||||
|
||||
`ifndef L1_BLOCK_SIZE
|
||||
`define L1_BLOCK_SIZE (`NUM_THREADS * 4)
|
||||
`define L1_BLOCK_SIZE ((`L2_ENABLE || `L3_ENABLE) ? 16 : `MEM_BLOCK_SIZE)
|
||||
`endif
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
|
@ -227,6 +227,7 @@
|
|||
`define CSR_LWID 12'hCC3
|
||||
`define CSR_GWID `CSR_MHARTID
|
||||
`define CSR_GCID 12'hCC5
|
||||
`define CSR_TMASK 12'hCC4
|
||||
|
||||
// Machine SIMT CSRs
|
||||
`define CSR_NT 12'hFC0
|
||||
|
@ -250,6 +251,11 @@
|
|||
|
||||
// Pipeline Queues ////////////////////////////////////////////////////////////
|
||||
|
||||
// Size of Instruction Buffer
|
||||
`ifndef IBUF_SIZE
|
||||
`define IBUF_SIZE 2
|
||||
`endif
|
||||
|
||||
// Size of LSU Request Queue
|
||||
`ifndef LSUQ_SIZE
|
||||
`define LSUQ_SIZE (`NUM_WARPS * 2)
|
||||
|
@ -268,28 +274,28 @@
|
|||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef ICREQ_SIZE
|
||||
`define ICREQ_SIZE 0
|
||||
`ifndef ICACHE_CREQ_SIZE
|
||||
`define ICACHE_CREQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef ICRSQ_SIZE
|
||||
`define ICRSQ_SIZE 2
|
||||
`ifndef ICACHE_CRSQ_SIZE
|
||||
`define ICACHE_CRSQ_SIZE 2
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef IMSHR_SIZE
|
||||
`define IMSHR_SIZE `NUM_WARPS
|
||||
`ifndef ICACHE_MSHR_SIZE
|
||||
`define ICACHE_MSHR_SIZE `NUM_WARPS
|
||||
`endif
|
||||
|
||||
// Memory Request Queue Size
|
||||
`ifndef IMREQ_SIZE
|
||||
`define IMREQ_SIZE 4
|
||||
`ifndef ICACHE_MREQ_SIZE
|
||||
`define ICACHE_MREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Memory Response Queue Size
|
||||
`ifndef IMRSQ_SIZE
|
||||
`define IMRSQ_SIZE 0
|
||||
`ifndef ICACHE_MRSQ_SIZE
|
||||
`define ICACHE_MRSQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// Dcache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
@ -300,38 +306,38 @@
|
|||
`endif
|
||||
|
||||
// Number of banks
|
||||
`ifndef DNUM_BANKS
|
||||
`define DNUM_BANKS `NUM_THREADS
|
||||
`ifndef DCACHE_NUM_BANKS
|
||||
`define DCACHE_NUM_BANKS `NUM_THREADS
|
||||
`endif
|
||||
|
||||
// Number of bank ports
|
||||
`ifndef DNUM_PORTS
|
||||
`define DNUM_PORTS 1
|
||||
// Number of ports per bank
|
||||
`ifndef DCACHE_NUM_PORTS
|
||||
`define DCACHE_NUM_PORTS 1
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef DCREQ_SIZE
|
||||
`define DCREQ_SIZE 0
|
||||
`ifndef DCACHE_CREQ_SIZE
|
||||
`define DCACHE_CREQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef DCRSQ_SIZE
|
||||
`define DCRSQ_SIZE 2
|
||||
`ifndef DCACHE_CRSQ_SIZE
|
||||
`define DCACHE_CRSQ_SIZE 2
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef DMSHR_SIZE
|
||||
`define DMSHR_SIZE `LSUQ_SIZE
|
||||
`ifndef DCACHE_MSHR_SIZE
|
||||
`define DCACHE_MSHR_SIZE `LSUQ_SIZE
|
||||
`endif
|
||||
|
||||
// Memory Request Queue Size
|
||||
`ifndef DMREQ_SIZE
|
||||
`define DMREQ_SIZE 4
|
||||
`ifndef DCACHE_MREQ_SIZE
|
||||
`define DCACHE_MREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Memory Response Queue Size
|
||||
`ifndef DMRSQ_SIZE
|
||||
`define DMRSQ_SIZE 0
|
||||
`ifndef DCACHE_MRSQ_SIZE
|
||||
`define DCACHE_MRSQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// SM Configurable Knobs //////////////////////////////////////////////////////
|
||||
|
@ -348,92 +354,102 @@
|
|||
`endif
|
||||
|
||||
// Number of banks
|
||||
`ifndef SNUM_BANKS
|
||||
`define SNUM_BANKS `NUM_THREADS
|
||||
`ifndef SMEM_NUM_BANKS
|
||||
`define SMEM_NUM_BANKS `NUM_THREADS
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef SCREQ_SIZE
|
||||
`define SCREQ_SIZE 2
|
||||
`ifndef SMEM_CREQ_SIZE
|
||||
`define SMEM_CREQ_SIZE 2
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef SCRSQ_SIZE
|
||||
`define SCRSQ_SIZE 2
|
||||
`ifndef SMEM_CRSQ_SIZE
|
||||
`define SMEM_CRSQ_SIZE 2
|
||||
`endif
|
||||
|
||||
// L2cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef L2CACHE_SIZE
|
||||
`define L2CACHE_SIZE 131072
|
||||
`ifndef L2_CACHE_SIZE
|
||||
`define L2_CACHE_SIZE 131072
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
`ifndef L2NUM_BANKS
|
||||
`define L2NUM_BANKS `MIN(`NUM_CORES, 4)
|
||||
`ifndef L2_NUM_BANKS
|
||||
`define L2_NUM_BANKS `MIN(`NUM_CORES, 4)
|
||||
`endif
|
||||
|
||||
// Number of ports per bank
|
||||
`ifndef L2_NUM_PORTS
|
||||
`define L2_NUM_PORTS 1
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef L2CREQ_SIZE
|
||||
`define L2CREQ_SIZE 0
|
||||
`ifndef L2_CREQ_SIZE
|
||||
`define L2_CREQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef L2CRSQ_SIZE
|
||||
`define L2CRSQ_SIZE 2
|
||||
`ifndef L2_CRSQ_SIZE
|
||||
`define L2_CRSQ_SIZE 2
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef L2MSHR_SIZE
|
||||
`define L2MSHR_SIZE 16
|
||||
`ifndef L2_MSHR_SIZE
|
||||
`define L2_MSHR_SIZE 16
|
||||
`endif
|
||||
|
||||
// Memory Request Queue Size
|
||||
`ifndef L2MREQ_SIZE
|
||||
`define L2MREQ_SIZE 4
|
||||
`ifndef L2_MREQ_SIZE
|
||||
`define L2_MREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Memory Response Queue Size
|
||||
`ifndef L2MRSQ_SIZE
|
||||
`define L2MRSQ_SIZE 0
|
||||
`ifndef L2_MRSQ_SIZE
|
||||
`define L2_MRSQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef L3CACHE_SIZE
|
||||
`define L3CACHE_SIZE 1048576
|
||||
`ifndef L3_CACHE_SIZE
|
||||
`define L3_CACHE_SIZE 1048576
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
`ifndef L3NUM_BANKS
|
||||
`define L3NUM_BANKS `MIN(`NUM_CLUSTERS, 4)
|
||||
`ifndef L3_NUM_BANKS
|
||||
`define L3_NUM_BANKS `MIN(`NUM_CLUSTERS, 4)
|
||||
`endif
|
||||
|
||||
// Number of ports per bank
|
||||
`ifndef L3_NUM_PORTS
|
||||
`define L3_NUM_PORTS 1
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef L3CREQ_SIZE
|
||||
`define L3CREQ_SIZE 0
|
||||
`ifndef L3_CREQ_SIZE
|
||||
`define L3_CREQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef L3CRSQ_SIZE
|
||||
`define L3CRSQ_SIZE 2
|
||||
`ifndef L3_CRSQ_SIZE
|
||||
`define L3_CRSQ_SIZE 2
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef L3MSHR_SIZE
|
||||
`define L3MSHR_SIZE 16
|
||||
`ifndef L3_MSHR_SIZE
|
||||
`define L3_MSHR_SIZE 16
|
||||
`endif
|
||||
|
||||
// Memory Request Queue Size
|
||||
`ifndef L3MREQ_SIZE
|
||||
`define L3MREQ_SIZE 4
|
||||
`ifndef L3_MREQ_SIZE
|
||||
`define L3_MREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Memory Response Queue Size
|
||||
`ifndef L3MRSQ_SIZE
|
||||
`define L3MRSQ_SIZE 0
|
||||
`ifndef L3_MRSQ_SIZE
|
||||
`define L3_MRSQ_SIZE 0
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
|
|
@ -12,16 +12,16 @@ module VX_core #(
|
|||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [`DMEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`DMEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`DMEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`XMEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire [`DCACHE_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`DCACHE_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`L1_MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory reponse
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`DMEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`XMEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
input wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`L1_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// Status
|
||||
|
@ -32,14 +32,14 @@ module VX_core #(
|
|||
`endif
|
||||
|
||||
VX_mem_req_if #(
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`XMEM_TAG_WIDTH)
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`L1_MEM_TAG_WIDTH)
|
||||
) mem_req_if();
|
||||
|
||||
VX_mem_rsp_if #(
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`XMEM_TAG_WIDTH)
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`L1_MEM_TAG_WIDTH)
|
||||
) mem_rsp_if();
|
||||
|
||||
assign mem_req_valid = mem_req_if.valid;
|
||||
|
@ -58,25 +58,25 @@ module VX_core #(
|
|||
//--
|
||||
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
|
||||
) dcache_req_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
|
||||
) dcache_rsp_if();
|
||||
|
||||
VX_icache_req_if #(
|
||||
.WORD_SIZE (`IWORD_SIZE),
|
||||
.TAG_WIDTH (`ICORE_TAG_WIDTH)
|
||||
.WORD_SIZE (`ICACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
|
||||
) icache_req_if();
|
||||
|
||||
VX_icache_rsp_if #(
|
||||
.WORD_SIZE (`IWORD_SIZE),
|
||||
.TAG_WIDTH (`ICORE_TAG_WIDTH)
|
||||
.WORD_SIZE (`ICACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
|
||||
) icache_rsp_if();
|
||||
|
||||
VX_pipeline #(
|
|
@ -7,15 +7,18 @@ module VX_csr_data #(
|
|||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
VX_perf_pipeline_if.slave perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
VX_cmt_to_csr_if.slave cmt_to_csr_if,
|
||||
VX_fetch_to_csr_if.slave fetch_to_csr_if,
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if,
|
||||
`endif
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_tex_csr_if tex_csr_if,
|
||||
VX_tex_csr_if.slave tex_csr_if,
|
||||
`endif
|
||||
|
||||
input wire read_enable,
|
||||
|
@ -30,6 +33,8 @@ module VX_csr_data #(
|
|||
|
||||
input wire busy
|
||||
);
|
||||
import fpu_types::*;
|
||||
|
||||
reg [`CSR_WIDTH-1:0] csr_satp;
|
||||
reg [`CSR_WIDTH-1:0] csr_mstatus;
|
||||
reg [`CSR_WIDTH-1:0] csr_medeleg;
|
||||
|
@ -42,38 +47,40 @@ module VX_csr_data #(
|
|||
reg [63:0] csr_cycle;
|
||||
reg [63:0] csr_instret;
|
||||
|
||||
reg [`NUM_WARPS-1:0][`FRM_BITS+`FFG_BITS-1:0] fcsr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FFLAGS_BITS-1:0] fcsr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (reset) begin
|
||||
fcsr <= '0;
|
||||
end
|
||||
|
||||
end
|
||||
if (fpu_to_csr_if.write_enable) begin
|
||||
fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0]
|
||||
| fpu_to_csr_if.write_fflags;
|
||||
fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0]
|
||||
| fpu_to_csr_if.write_fflags;
|
||||
end
|
||||
|
||||
`endif
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`CSR_FFLAGS: fcsr[write_wid][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
||||
`CSR_FRM: fcsr[write_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
|
||||
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
|
||||
`CSR_FFLAGS: fcsr[write_wid][`FFLAGS_BITS-1:0] <= write_data[`FFLAGS_BITS-1:0];
|
||||
`CSR_FRM: fcsr[write_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS] <= write_data[`INST_FRM_BITS-1:0];
|
||||
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFLAGS_BITS+`INST_FRM_BITS-1:0];
|
||||
|
||||
`CSR_SATP: csr_satp <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MSTATUS: csr_mstatus <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MEDELEG: csr_medeleg <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MIDELEG: csr_mideleg <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MIE: csr_mie <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MTVEC: csr_mtvec <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MEPC: csr_mepc <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_SATP: csr_satp <= write_data;
|
||||
|
||||
`CSR_MSTATUS: csr_mstatus <= write_data;
|
||||
`CSR_MEDELEG: csr_medeleg <= write_data;
|
||||
`CSR_MIDELEG: csr_mideleg <= write_data;
|
||||
`CSR_MIE: csr_mie <= write_data;
|
||||
`CSR_MTVEC: csr_mtvec <= write_data;
|
||||
|
||||
`CSR_MEPC: csr_mepc <= write_data;
|
||||
|
||||
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data;
|
||||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
|
||||
|
||||
default: begin
|
||||
assert (write_addr >= `CSR_TEX_BEGIN(0) && write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES))
|
||||
else $error("%t: invalid CSR write address: %0h", $time, write_addr);
|
||||
else `ASSERT(~write_enable, ("%t: invalid CSR write address: %0h", $time, write_addr));
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
@ -109,8 +116,8 @@ module VX_csr_data #(
|
|||
read_data_r = 'x;
|
||||
read_addr_valid_r = 1;
|
||||
case (read_addr)
|
||||
`CSR_FFLAGS : read_data_r = 32'(fcsr[read_wid][`FFG_BITS-1:0]);
|
||||
`CSR_FRM : read_data_r = 32'(fcsr[read_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS]);
|
||||
`CSR_FFLAGS : read_data_r = 32'(fcsr[read_wid][`FFLAGS_BITS-1:0]);
|
||||
`CSR_FRM : read_data_r = 32'(fcsr[read_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS]);
|
||||
`CSR_FCSR : read_data_r = 32'(fcsr[read_wid]);
|
||||
|
||||
`CSR_WTID ,
|
||||
|
@ -120,6 +127,9 @@ module VX_csr_data #(
|
|||
/*`CSR_MHARTID ,*/
|
||||
`CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid);
|
||||
`CSR_GCID : read_data_r = CORE_ID;
|
||||
|
||||
`CSR_TMASK : read_data_r = 32'(fetch_to_csr_if.thread_masks[read_wid]);
|
||||
|
||||
`CSR_NT : read_data_r = `NUM_THREADS;
|
||||
`CSR_NW : read_data_r = `NUM_WARPS;
|
||||
`CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS;
|
||||
|
@ -223,7 +233,9 @@ module VX_csr_data #(
|
|||
`RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("invalid CSR read address: %0h", read_addr))
|
||||
|
||||
assign read_data = read_data_r;
|
||||
|
||||
assign fpu_to_csr_if.read_frm = fcsr[fpu_to_csr_if.read_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS];
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign fpu_to_csr_if.read_frm = fcsr[fpu_to_csr_if.read_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS];
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -3,28 +3,29 @@
|
|||
module VX_csr_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
VX_perf_pipeline_if.slave perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_tex_csr_if tex_csr_if,
|
||||
`endif
|
||||
VX_cmt_to_csr_if.slave cmt_to_csr_if,
|
||||
VX_fetch_to_csr_if.slave fetch_to_csr_if,
|
||||
VX_csr_req_if.slave csr_req_if,
|
||||
VX_commit_if.master csr_commit_if,
|
||||
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if,
|
||||
input wire[`NUM_WARPS-1:0] fpu_pending,
|
||||
`endif
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_tex_csr_if.slave tex_csr_if,
|
||||
`endif
|
||||
|
||||
input wire busy,
|
||||
|
||||
input wire[`NUM_WARPS-1:0] fpu_pending,
|
||||
output wire[`NUM_WARPS-1:0] pending
|
||||
output wire[`NUM_WARPS-1:0] pending,
|
||||
input wire busy
|
||||
);
|
||||
wire csr_we_s1;
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
|
||||
|
@ -33,7 +34,7 @@ module VX_csr_unit #(
|
|||
|
||||
wire write_enable = csr_commit_if.valid && csr_we_s1;
|
||||
|
||||
wire [31:0] csr_req_data = csr_req_if.use_imm ? 32'(csr_req_if.rs1) : csr_req_if.rs1_data;
|
||||
wire [31:0] csr_req_data = csr_req_if.use_imm ? 32'(csr_req_if.imm) : csr_req_if.rs1_data;
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID(CORE_ID)
|
||||
|
@ -45,6 +46,8 @@ module VX_csr_unit #(
|
|||
.perf_pipeline_if (perf_pipeline_if),
|
||||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.tex_csr_if (tex_csr_if),
|
||||
|
@ -72,21 +75,25 @@ module VX_csr_unit #(
|
|||
always @(*) begin
|
||||
csr_we_s0_unqual = (csr_req_data != 0);
|
||||
case (csr_req_if.op_type)
|
||||
`CSR_RW: begin
|
||||
`INST_CSR_RW: begin
|
||||
csr_updated_data = csr_req_data;
|
||||
csr_we_s0_unqual = 1;
|
||||
end
|
||||
`CSR_RS: begin
|
||||
`INST_CSR_RS: begin
|
||||
csr_updated_data = csr_read_data_qual | csr_req_data;
|
||||
end
|
||||
//`CSR_RC
|
||||
//`INST_CSR_RC
|
||||
default: begin
|
||||
csr_updated_data = csr_read_data_qual & ~csr_req_data;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire stall_in = fpu_pending[csr_req_if.wid];
|
||||
`else
|
||||
wire stall_in = 0;
|
||||
`endif
|
||||
|
||||
wire csr_req_valid = csr_req_if.valid && !stall_in;
|
||||
|
|
@ -1,16 +1,17 @@
|
|||
`include "VX_define.vh"
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
`include "VX_print_instr.vh"
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define USED_IREG(r) \
|
||||
used_regs[{1'b0, r}] = 1
|
||||
`define USED_IREG(r) \
|
||||
r``_r = {1'b0, ``r}
|
||||
|
||||
`define USED_FREG(r) \
|
||||
r``_r[5] = 1; \
|
||||
used_regs[{1'b1, r}] = 1
|
||||
`define USED_FREG(r) \
|
||||
r``_r = {1'b1, ``r}
|
||||
`else
|
||||
`define USED_IREG(r) \
|
||||
used_regs[r] = 1
|
||||
r``_r = ``r
|
||||
`endif
|
||||
|
||||
module VX_decode #(
|
||||
|
@ -20,25 +21,24 @@ module VX_decode #(
|
|||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_ifetch_rsp_if ifetch_rsp_if,
|
||||
VX_ifetch_rsp_if.slave ifetch_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_decode_if decode_if,
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if
|
||||
VX_decode_if.master decode_if,
|
||||
VX_wstall_if.master wstall_if,
|
||||
VX_join_if.master join_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`OP_BITS-1:0] op_type;
|
||||
reg [`MOD_BITS-1:0] op_mod;
|
||||
reg [`INST_OP_BITS-1:0] op_type;
|
||||
reg [`INST_MOD_BITS-1:0] op_mod;
|
||||
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [31:0] imm;
|
||||
reg use_rd, use_PC, use_imm;
|
||||
reg is_join, is_wstall;
|
||||
reg [`NUM_REGS-1:0] used_regs;
|
||||
|
||||
wire [31:0] instr = ifetch_rsp_if.data;
|
||||
wire [6:0] opcode = instr[6:0];
|
||||
|
@ -58,36 +58,37 @@ module VX_decode #(
|
|||
wire [12:0] b_imm = {instr[31], instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
wire [11:0] jalr_imm = {func7, rs2};
|
||||
|
||||
`UNUSED_VAR (rs3)
|
||||
|
||||
always @(*) begin
|
||||
|
||||
ex_type = 0;
|
||||
op_type = 'x;
|
||||
op_mod = 0;
|
||||
rd_r = `NR_BITS'(rd);
|
||||
rs1_r = `NR_BITS'(rs1);
|
||||
rs2_r = `NR_BITS'(rs2);
|
||||
rs3_r = `NR_BITS'(rs3);
|
||||
rd_r = 0;
|
||||
rs1_r = 0;
|
||||
rs2_r = 0;
|
||||
rs3_r = 0;
|
||||
imm = 'x;
|
||||
use_imm = 0;
|
||||
use_PC = 0;
|
||||
use_rd = 0;
|
||||
is_join = 0;
|
||||
is_wstall = 0;
|
||||
used_regs = 0;
|
||||
|
||||
case (opcode)
|
||||
`INST_I: begin
|
||||
ex_type = `EX_ALU;
|
||||
case (func3)
|
||||
3'h0: op_type = `OP_BITS'(`ALU_ADD);
|
||||
3'h1: op_type = `OP_BITS'(`ALU_SLL);
|
||||
3'h2: op_type = `OP_BITS'(`ALU_SLT);
|
||||
3'h3: op_type = `OP_BITS'(`ALU_SLTU);
|
||||
3'h4: op_type = `OP_BITS'(`ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `OP_BITS'(`ALU_SRA) : `OP_BITS'(`ALU_SRL);
|
||||
3'h6: op_type = `OP_BITS'(`ALU_OR);
|
||||
3'h7: op_type = `OP_BITS'(`ALU_AND);
|
||||
3'h0: op_type = `INST_OP_BITS'(`INST_ALU_ADD);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL);
|
||||
3'h2: op_type = `INST_OP_BITS'(`INST_ALU_SLT);
|
||||
3'h3: op_type = `INST_OP_BITS'(`INST_ALU_SLTU);
|
||||
3'h4: op_type = `INST_OP_BITS'(`INST_ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA) : `INST_OP_BITS'(`INST_ALU_SRL);
|
||||
3'h6: op_type = `INST_OP_BITS'(`INST_ALU_OR);
|
||||
3'h7: op_type = `INST_OP_BITS'(`INST_ALU_AND);
|
||||
default:;
|
||||
endcase
|
||||
use_rd = 1;
|
||||
|
@ -101,14 +102,14 @@ module VX_decode #(
|
|||
`ifdef EXT_F_ENABLE
|
||||
if (func7[0]) begin
|
||||
case (func3)
|
||||
3'h0: op_type = `OP_BITS'(`MUL_MUL);
|
||||
3'h1: op_type = `OP_BITS'(`MUL_MULH);
|
||||
3'h2: op_type = `OP_BITS'(`MUL_MULHSU);
|
||||
3'h3: op_type = `OP_BITS'(`MUL_MULHU);
|
||||
3'h4: op_type = `OP_BITS'(`MUL_DIV);
|
||||
3'h5: op_type = `OP_BITS'(`MUL_DIVU);
|
||||
3'h6: op_type = `OP_BITS'(`MUL_REM);
|
||||
3'h7: op_type = `OP_BITS'(`MUL_REMU);
|
||||
3'h0: op_type = `INST_OP_BITS'(`INST_MUL_MUL);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_MUL_MULH);
|
||||
3'h2: op_type = `INST_OP_BITS'(`INST_MUL_MULHSU);
|
||||
3'h3: op_type = `INST_OP_BITS'(`INST_MUL_MULHU);
|
||||
3'h4: op_type = `INST_OP_BITS'(`INST_MUL_DIV);
|
||||
3'h5: op_type = `INST_OP_BITS'(`INST_MUL_DIVU);
|
||||
3'h6: op_type = `INST_OP_BITS'(`INST_MUL_REM);
|
||||
3'h7: op_type = `INST_OP_BITS'(`INST_MUL_REMU);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 2;
|
||||
|
@ -116,14 +117,14 @@ module VX_decode #(
|
|||
`endif
|
||||
begin
|
||||
case (func3)
|
||||
3'h0: op_type = (func7[5]) ? `OP_BITS'(`ALU_SUB) : `OP_BITS'(`ALU_ADD);
|
||||
3'h1: op_type = `OP_BITS'(`ALU_SLL);
|
||||
3'h2: op_type = `OP_BITS'(`ALU_SLT);
|
||||
3'h3: op_type = `OP_BITS'(`ALU_SLTU);
|
||||
3'h4: op_type = `OP_BITS'(`ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `OP_BITS'(`ALU_SRA) : `OP_BITS'(`ALU_SRL);
|
||||
3'h6: op_type = `OP_BITS'(`ALU_OR);
|
||||
3'h7: op_type = `OP_BITS'(`ALU_AND);
|
||||
3'h0: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SUB) : `INST_OP_BITS'(`INST_ALU_ADD);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL);
|
||||
3'h2: op_type = `INST_OP_BITS'(`INST_ALU_SLT);
|
||||
3'h3: op_type = `INST_OP_BITS'(`INST_ALU_SLTU);
|
||||
3'h4: op_type = `INST_OP_BITS'(`INST_ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA) : `INST_OP_BITS'(`INST_ALU_SRL);
|
||||
3'h6: op_type = `INST_OP_BITS'(`INST_ALU_OR);
|
||||
3'h7: op_type = `INST_OP_BITS'(`INST_ALU_AND);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
@ -134,7 +135,7 @@ module VX_decode #(
|
|||
end
|
||||
`INST_LUI: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`ALU_LUI);
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_LUI);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {upper_imm, 12'(0)};
|
||||
|
@ -143,7 +144,7 @@ module VX_decode #(
|
|||
end
|
||||
`INST_AUIPC: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`ALU_AUIPC);
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_AUIPC);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
|
@ -152,7 +153,7 @@ module VX_decode #(
|
|||
end
|
||||
`INST_JAL: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`BR_JAL);
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JAL);
|
||||
op_mod = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
|
@ -163,7 +164,7 @@ module VX_decode #(
|
|||
end
|
||||
`INST_JALR: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`BR_JALR);
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JALR);
|
||||
op_mod = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
|
@ -175,12 +176,12 @@ module VX_decode #(
|
|||
`INST_B: begin
|
||||
ex_type = `EX_ALU;
|
||||
case (func3)
|
||||
3'h0: op_type = `OP_BITS'(`BR_EQ);
|
||||
3'h1: op_type = `OP_BITS'(`BR_NE);
|
||||
3'h4: op_type = `OP_BITS'(`BR_LT);
|
||||
3'h5: op_type = `OP_BITS'(`BR_GE);
|
||||
3'h6: op_type = `OP_BITS'(`BR_LTU);
|
||||
3'h7: op_type = `OP_BITS'(`BR_GEU);
|
||||
3'h0: op_type = `INST_OP_BITS'(`INST_BR_EQ);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_BR_NE);
|
||||
3'h4: op_type = `INST_OP_BITS'(`INST_BR_LT);
|
||||
3'h5: op_type = `INST_OP_BITS'(`INST_BR_GE);
|
||||
3'h6: op_type = `INST_OP_BITS'(`INST_BR_LTU);
|
||||
3'h7: op_type = `INST_OP_BITS'(`INST_BR_GEU);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 1;
|
||||
|
@ -193,35 +194,37 @@ module VX_decode #(
|
|||
end
|
||||
`INST_F: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_mod = `MOD_BITS'(!func3[0]); // data fence
|
||||
op_type = `INST_OP_BITS'(func3[0]);
|
||||
op_mod = `INST_MOD_BITS'(1);
|
||||
end
|
||||
`INST_SYS : begin
|
||||
if (func3[1:0] != 0) begin
|
||||
ex_type = `EX_CSR;
|
||||
op_type = `OP_BITS'(func3[1:0]);
|
||||
op_type = `INST_OP_BITS'(func3[1:0]);
|
||||
use_rd = 1;
|
||||
use_imm = func3[2];
|
||||
imm = 32'(u_12); // addr
|
||||
imm[`CSR_ADDR_BITS-1:0] = u_12; // addr
|
||||
`USED_IREG (rd);
|
||||
if (func3[2]) begin
|
||||
rs1_r = `NR_BITS'(rs1); // imm
|
||||
imm[`CSR_ADDR_BITS +: `NRI_BITS] = rs1; // imm
|
||||
end else begin
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
end else begin
|
||||
ex_type = `EX_ALU;
|
||||
case (u_12)
|
||||
12'h000: op_type = `OP_BITS'(`BR_ECALL);
|
||||
12'h001: op_type = `OP_BITS'(`BR_EBREAK);
|
||||
12'h302: op_type = `OP_BITS'(`BR_MRET);
|
||||
12'h102: op_type = `OP_BITS'(`BR_SRET);
|
||||
12'h7B2: op_type = `OP_BITS'(`BR_DRET);
|
||||
12'h000: op_type = `INST_OP_BITS'(`INST_BR_ECALL);
|
||||
12'h001: op_type = `INST_OP_BITS'(`INST_BR_EBREAK);
|
||||
12'h302: op_type = `INST_OP_BITS'(`INST_BR_MRET);
|
||||
12'h102: op_type = `INST_OP_BITS'(`INST_BR_SRET);
|
||||
12'h7B2: op_type = `INST_OP_BITS'(`INST_BR_DRET);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = 32'd4;
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
|
@ -231,7 +234,7 @@ module VX_decode #(
|
|||
`endif
|
||||
`INST_L: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `OP_BITS'({1'b0, func3});
|
||||
op_type = `INST_OP_BITS'({1'b0, func3});
|
||||
use_rd = 1;
|
||||
imm = {{20{u_12[11]}}, u_12};
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -247,7 +250,7 @@ module VX_decode #(
|
|||
`endif
|
||||
`INST_S: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `OP_BITS'({1'b1, func3});
|
||||
op_type = `INST_OP_BITS'({1'b1, func3});
|
||||
imm = {{20{s_imm[11]}}, s_imm};
|
||||
`USED_IREG (rs1);
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -263,7 +266,7 @@ module VX_decode #(
|
|||
`INST_FNMSUB,
|
||||
`INST_FNMADD: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_type = `OP_BITS'(opcode[3:0]);
|
||||
op_type = `INST_OP_BITS'(opcode[3:0]);
|
||||
op_mod = func3;
|
||||
use_rd = 1;
|
||||
`USED_FREG (rd);
|
||||
|
@ -280,35 +283,35 @@ module VX_decode #(
|
|||
7'h04, // FSUB
|
||||
7'h08, // FMUL
|
||||
7'h0C: begin // FDIV
|
||||
op_type = `OP_BITS'(func7[3:0]);
|
||||
op_type = `INST_OP_BITS'(func7[3:0]);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
7'h2C: begin
|
||||
op_type = `OP_BITS'(`FPU_SQRT);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
7'h50: begin
|
||||
op_type = `OP_BITS'(`FPU_CMP);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_CMP);
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
7'h60: begin
|
||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
|
||||
op_type = (instr[20]) ? `INST_OP_BITS'(`INST_FPU_CVTWUS) : `INST_OP_BITS'(`INST_FPU_CVTWS);
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
7'h68: begin
|
||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
|
||||
op_type = (instr[20]) ? `INST_OP_BITS'(`INST_FPU_CVTSWU) : `INST_OP_BITS'(`INST_FPU_CVTSW);
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
7'h10: begin
|
||||
// FSGNJ=0, FSGNJN=1, FSGNJX=2
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = {1'b0, func3[1:0]};
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
|
@ -316,7 +319,7 @@ module VX_decode #(
|
|||
end
|
||||
7'h14: begin
|
||||
// FMIN=3, FMAX=4
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = func3[0] ? 4 : 3;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
|
@ -325,10 +328,10 @@ module VX_decode #(
|
|||
7'h70: begin
|
||||
if (func3[0]) begin
|
||||
// FCLASS
|
||||
op_type = `OP_BITS'(`FPU_CLASS);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_CLASS);
|
||||
end else begin
|
||||
// FMV.X.W=5
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 5;
|
||||
end
|
||||
`USED_IREG (rd);
|
||||
|
@ -336,7 +339,7 @@ module VX_decode #(
|
|||
end
|
||||
7'h78: begin
|
||||
// FMV.W.X=6
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 6;
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
|
@ -349,26 +352,26 @@ module VX_decode #(
|
|||
ex_type = `EX_GPU;
|
||||
case (func3)
|
||||
3'h0: begin
|
||||
op_type = `OP_BITS'(`GPU_TMC);
|
||||
op_type = rs2[0] ? `INST_OP_BITS'(`INST_GPU_PRED) : `INST_OP_BITS'(`INST_GPU_TMC);
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
3'h1: begin
|
||||
op_type = `OP_BITS'(`GPU_WSPAWN);
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_WSPAWN);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
3'h2: begin
|
||||
op_type = `OP_BITS'(`GPU_SPLIT);
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_SPLIT);
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
3'h3: begin
|
||||
op_type = `OP_BITS'(`GPU_JOIN);
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_JOIN);
|
||||
is_join = 1;
|
||||
end
|
||||
3'h4: begin
|
||||
op_type = `OP_BITS'(`GPU_BAR);
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_BAR);
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
|
@ -410,7 +413,6 @@ module VX_decode #(
|
|||
assign decode_if.imm = imm;
|
||||
assign decode_if.use_PC = use_PC;
|
||||
assign decode_if.use_imm = use_imm;
|
||||
assign decode_if.used_regs = used_regs;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -419,19 +421,20 @@ module VX_decode #(
|
|||
assign join_if.valid = ifetch_rsp_fire && is_join;
|
||||
assign join_if.wid = ifetch_rsp_if.wid;
|
||||
|
||||
assign wstall_if.valid = ifetch_rsp_fire && is_wstall;
|
||||
assign wstall_if.valid = ifetch_rsp_fire;
|
||||
assign wstall_if.wid = ifetch_rsp_if.wid;
|
||||
assign wstall_if.stalled = is_wstall;
|
||||
|
||||
assign ifetch_rsp_if.ready = decode_if.ready;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
$write("%t: core%0d-decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_if.wid, decode_if.PC);
|
||||
dpi_trace("%d: core%0d-decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_if.wid, decode_if.PC);
|
||||
print_ex_type(decode_if.ex_type);
|
||||
$write(", op=");
|
||||
dpi_trace(", op=");
|
||||
print_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod);
|
||||
$write(", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_regs=%b\n", decode_if.op_mod, decode_if.tmask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.use_PC, decode_if.use_imm, decode_if.used_regs);
|
||||
dpi_trace(", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b\n", decode_if.op_mod, decode_if.tmask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.use_PC, decode_if.use_imm);
|
||||
end
|
||||
end
|
||||
`endif
|
|
@ -14,14 +14,16 @@
|
|||
|
||||
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
|
||||
|
||||
`define REQS_BITS `LOG2UP(NUM_REQS)
|
||||
`define NUM_IREGS 32
|
||||
|
||||
`define NRI_BITS `LOG2UP(`NUM_IREGS)
|
||||
|
||||
`define NTEX_BITS `LOG2UP(`NUM_TEX_UNITS)
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define NUM_REGS 64
|
||||
`define NUM_REGS (2 * `NUM_IREGS)
|
||||
`else
|
||||
`define NUM_REGS 32
|
||||
`define NUM_REGS `NUM_IREGS
|
||||
`endif
|
||||
|
||||
`define NR_BITS `LOG2UP(`NUM_REGS)
|
||||
|
@ -34,6 +36,16 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_FPU 3'h4
|
||||
`define EX_GPU 3'h5
|
||||
`define EX_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_LUI 7'b0110111
|
||||
`define INST_AUIPC 7'b0010111
|
||||
`define INST_JAL 7'b1101111
|
||||
|
@ -60,139 +72,126 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define FRM_RNE 3'b000 // round to nearest even
|
||||
`define FRM_RTZ 3'b001 // round to zero
|
||||
`define FRM_RDN 3'b010 // round to -inf
|
||||
`define FRM_RUP 3'b011 // round to +inf
|
||||
`define FRM_RMM 3'b100 // round to nearest max magnitude
|
||||
`define FRM_DYN 3'b111 // dynamic mode
|
||||
`define FRM_BITS 3
|
||||
`define INST_FRM_RNE 3'b000 // round to nearest even
|
||||
`define INST_FRM_RTZ 3'b001 // round to zero
|
||||
`define INST_FRM_RDN 3'b010 // round to -inf
|
||||
`define INST_FRM_RUP 3'b011 // round to +inf
|
||||
`define INST_FRM_RMM 3'b100 // round to nearest max magnitude
|
||||
`define INST_FRM_DYN 3'b111 // dynamic mode
|
||||
`define INST_FRM_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_FPU 3'h4
|
||||
`define EX_GPU 3'h5
|
||||
`define EX_BITS 3
|
||||
|
||||
`define NUM_EXS 6
|
||||
`define NE_BITS `LOG2UP(`NUM_EXS)
|
||||
`define INST_OP_BITS 4
|
||||
`define INST_MOD_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define OP_BITS 4
|
||||
`define MOD_BITS 3
|
||||
`define INST_ALU_ADD 4'b0000
|
||||
`define INST_ALU_LUI 4'b0010
|
||||
`define INST_ALU_AUIPC 4'b0011
|
||||
`define INST_ALU_SLTU 4'b0100
|
||||
`define INST_ALU_SLT 4'b0101
|
||||
`define INST_ALU_SRL 4'b1000
|
||||
`define INST_ALU_SRA 4'b1001
|
||||
`define INST_ALU_SUB 4'b1011
|
||||
`define INST_ALU_AND 4'b1100
|
||||
`define INST_ALU_OR 4'b1101
|
||||
`define INST_ALU_XOR 4'b1110
|
||||
`define INST_ALU_SLL 4'b1111
|
||||
`define INST_ALU_OTHER 4'b0111
|
||||
`define INST_ALU_BITS 4
|
||||
`define INST_ALU_OP(x) x[`INST_ALU_BITS-1:0]
|
||||
`define INST_ALU_OP_CLASS(x) x[3:2]
|
||||
`define INST_ALU_SIGNED(x) x[0]
|
||||
`define INST_ALU_IS_BR(x) x[0]
|
||||
`define INST_ALU_IS_MUL(x) x[1]
|
||||
|
||||
`define ALU_ADD 4'b0000
|
||||
`define ALU_LUI 4'b0010
|
||||
`define ALU_AUIPC 4'b0011
|
||||
`define ALU_SLTU 4'b0100
|
||||
`define ALU_SLT 4'b0101
|
||||
`define ALU_SRL 4'b1000
|
||||
`define ALU_SRA 4'b1001
|
||||
`define ALU_SUB 4'b1011
|
||||
`define ALU_AND 4'b1100
|
||||
`define ALU_OR 4'b1101
|
||||
`define ALU_XOR 4'b1110
|
||||
`define ALU_SLL 4'b1111
|
||||
`define ALU_OTHER 4'b0111
|
||||
`define ALU_BITS 4
|
||||
`define ALU_OP(x) x[`ALU_BITS-1:0]
|
||||
`define ALU_OP_CLASS(x) x[3:2]
|
||||
`define ALU_SIGNED(x) x[0]
|
||||
`define ALU_IS_BR(x) x[0]
|
||||
`define ALU_IS_MUL(x) x[1]
|
||||
`define INST_BR_EQ 4'b0000
|
||||
`define INST_BR_NE 4'b0010
|
||||
`define INST_BR_LTU 4'b0100
|
||||
`define INST_BR_GEU 4'b0110
|
||||
`define INST_BR_LT 4'b0101
|
||||
`define INST_BR_GE 4'b0111
|
||||
`define INST_BR_JAL 4'b1000
|
||||
`define INST_BR_JALR 4'b1001
|
||||
`define INST_BR_ECALL 4'b1010
|
||||
`define INST_BR_EBREAK 4'b1011
|
||||
`define INST_BR_MRET 4'b1100
|
||||
`define INST_BR_SRET 4'b1101
|
||||
`define INST_BR_DRET 4'b1110
|
||||
`define INST_BR_OTHER 4'b1111
|
||||
`define INST_BR_BITS 4
|
||||
`define INST_BR_NEG(x) x[1]
|
||||
`define INST_BR_LESS(x) x[2]
|
||||
`define INST_BR_STATIC(x) x[3]
|
||||
|
||||
`define BR_EQ 4'b0000
|
||||
`define BR_NE 4'b0010
|
||||
`define BR_LTU 4'b0100
|
||||
`define BR_GEU 4'b0110
|
||||
`define BR_LT 4'b0101
|
||||
`define BR_GE 4'b0111
|
||||
`define BR_JAL 4'b1000
|
||||
`define BR_JALR 4'b1001
|
||||
`define BR_ECALL 4'b1010
|
||||
`define BR_EBREAK 4'b1011
|
||||
`define BR_MRET 4'b1100
|
||||
`define BR_SRET 4'b1101
|
||||
`define BR_DRET 4'b1110
|
||||
`define BR_OTHER 4'b1111
|
||||
`define BR_BITS 4
|
||||
`define BR_OP(x) x[`BR_BITS-1:0]
|
||||
`define BR_NEG(x) x[1]
|
||||
`define BR_LESS(x) x[2]
|
||||
`define BR_STATIC(x) x[3]
|
||||
`define INST_MUL_MUL 3'h0
|
||||
`define INST_MUL_MULH 3'h1
|
||||
`define INST_MUL_MULHSU 3'h2
|
||||
`define INST_MUL_MULHU 3'h3
|
||||
`define INST_MUL_DIV 3'h4
|
||||
`define INST_MUL_DIVU 3'h5
|
||||
`define INST_MUL_REM 3'h6
|
||||
`define INST_MUL_REMU 3'h7
|
||||
`define INST_MUL_BITS 3
|
||||
`define INST_MUL_IS_DIV(x) x[2]
|
||||
|
||||
`define MUL_MUL 3'h0
|
||||
`define MUL_MULH 3'h1
|
||||
`define MUL_MULHSU 3'h2
|
||||
`define MUL_MULHU 3'h3
|
||||
`define MUL_DIV 3'h4
|
||||
`define MUL_DIVU 3'h5
|
||||
`define MUL_REM 3'h6
|
||||
`define MUL_REMU 3'h7
|
||||
`define MUL_BITS 3
|
||||
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
||||
`define MUL_IS_DIV(x) x[2]
|
||||
`define INST_FMT_B 3'b000
|
||||
`define INST_FMT_H 3'b001
|
||||
`define INST_FMT_W 3'b010
|
||||
`define INST_FMT_BU 3'b100
|
||||
`define INST_FMT_HU 3'b101
|
||||
|
||||
`define FMT_B 3'b000
|
||||
`define FMT_H 3'b001
|
||||
`define FMT_W 3'b010
|
||||
`define FMT_BU 3'b100
|
||||
`define FMT_HU 3'b101
|
||||
`define INST_LSU_LB 4'b0000
|
||||
`define INST_LSU_LH 4'b0001
|
||||
`define INST_LSU_LW 4'b0010
|
||||
`define INST_LSU_LBU 4'b0100
|
||||
`define INST_LSU_LHU 4'b0101
|
||||
`define INST_LSU_SB 4'b1000
|
||||
`define INST_LSU_SH 4'b1001
|
||||
`define INST_LSU_SW 4'b1010
|
||||
`define INST_LSU_BITS 4
|
||||
`define INST_LSU_FMT(x) x[2:0]
|
||||
`define INST_LSU_WSIZE(x) x[1:0]
|
||||
`define INST_LSU_IS_FENCE(x) x[0]
|
||||
|
||||
`define LSU_LB 4'b0000
|
||||
`define LSU_LH 4'b0001
|
||||
`define LSU_LW 4'b0010
|
||||
`define LSU_LBU 4'b0100
|
||||
`define LSU_LHU 4'b0101
|
||||
`define LSU_SB 4'b1000
|
||||
`define LSU_SH 4'b1001
|
||||
`define LSU_SW 4'b1010
|
||||
`define LSU_BITS 4
|
||||
`define LSU_FMT(x) x[2:0]
|
||||
`define LSU_WSIZE(x) x[1:0]
|
||||
`define LSU_OP(x) x[`LSU_BITS-1:0]
|
||||
`define LSU_IS_FENCE(x) x[0]
|
||||
`define INST_FENCE_BITS 1
|
||||
`define INST_FENCE_D 1'h0
|
||||
`define INST_FENCE_I 1'h1
|
||||
|
||||
`define CSR_RW 2'h1
|
||||
`define CSR_RS 2'h2
|
||||
`define CSR_RC 2'h3
|
||||
`define CSR_OTHER 2'h0
|
||||
`define CSR_BITS 2
|
||||
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
||||
`define INST_CSR_RW 2'h1
|
||||
`define INST_CSR_RS 2'h2
|
||||
`define INST_CSR_RC 2'h3
|
||||
`define INST_CSR_OTHER 2'h0
|
||||
`define INST_CSR_BITS 2
|
||||
|
||||
`define FPU_ADD 4'h0
|
||||
`define FPU_SUB 4'h4
|
||||
`define FPU_MUL 4'h8
|
||||
`define FPU_DIV 4'hC
|
||||
`define FPU_CVTWS 4'h1 // FCVT.W.S
|
||||
`define FPU_CVTWUS 4'h5 // FCVT.WU.S
|
||||
`define FPU_CVTSW 4'h9 // FCVT.S.W
|
||||
`define FPU_CVTSWU 4'hD // FCVT.S.WU
|
||||
`define FPU_SQRT 4'h2
|
||||
`define FPU_CLASS 4'h6
|
||||
`define FPU_CMP 4'hA
|
||||
`define FPU_MISC 4'hE // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
|
||||
`define FPU_MADD 4'h3
|
||||
`define FPU_MSUB 4'h7
|
||||
`define FPU_NMSUB 4'hB
|
||||
`define FPU_NMADD 4'hF
|
||||
`define FPU_BITS 4
|
||||
`define FPU_OP(x) x[`FPU_BITS-1:0]
|
||||
`define INST_FPU_ADD 4'h0
|
||||
`define INST_FPU_SUB 4'h4
|
||||
`define INST_FPU_MUL 4'h8
|
||||
`define INST_FPU_DIV 4'hC
|
||||
`define INST_FPU_CVTWS 4'h1 // FCVT.W.S
|
||||
`define INST_FPU_CVTWUS 4'h5 // FCVT.WU.S
|
||||
`define INST_FPU_CVTSW 4'h9 // FCVT.S.W
|
||||
`define INST_FPU_CVTSWU 4'hD // FCVT.S.WU
|
||||
`define INST_FPU_SQRT 4'h2
|
||||
`define INST_FPU_CLASS 4'h6
|
||||
`define INST_FPU_CMP 4'hA
|
||||
`define INST_FPU_MISC 4'hE // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
|
||||
`define INST_FPU_MADD 4'h3
|
||||
`define INST_FPU_MSUB 4'h7
|
||||
`define INST_FPU_NMSUB 4'hB
|
||||
`define INST_FPU_NMADD 4'hF
|
||||
`define INST_FPU_BITS 4
|
||||
|
||||
`define GPU_TMC 3'h0
|
||||
`define GPU_WSPAWN 3'h1
|
||||
`define GPU_SPLIT 3'h2
|
||||
`define GPU_JOIN 3'h3
|
||||
`define GPU_BAR 3'h4
|
||||
`define GPU_TEX 3'h5
|
||||
`define GPU_OTHER 3'h7
|
||||
`define GPU_BITS 3
|
||||
`define GPU_OP(x) x[`GPU_BITS-1:0]
|
||||
`define INST_GPU_TMC 3'h0
|
||||
`define INST_GPU_WSPAWN 3'h1
|
||||
`define INST_GPU_SPLIT 3'h2
|
||||
`define INST_GPU_JOIN 3'h3
|
||||
`define INST_GPU_BAR 3'h4
|
||||
`define INST_GPU_PRED 3'h5
|
||||
`define INST_GPU_TEX 3'h6
|
||||
`define INST_GPU_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -244,59 +243,44 @@
|
|||
`endif
|
||||
|
||||
// non-cacheable address bit
|
||||
`define NC_ADDR_BITS 1
|
||||
`define NC_FLAG_BITS 1
|
||||
|
||||
////////////////////////// Icache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
|
||||
|
||||
// Block size in bytes
|
||||
`define ICACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
|
||||
|
||||
// Word size in bytes
|
||||
`define IWORD_SIZE 4
|
||||
`define ICACHE_WORD_SIZE 4
|
||||
|
||||
// Number of banks
|
||||
`define INUM_BANKS 1
|
||||
|
||||
// Core request address bits
|
||||
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
|
||||
|
||||
// Core request byte enable bits
|
||||
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
|
||||
// Block size in bytes
|
||||
`define ICACHE_LINE_SIZE `L1_BLOCK_SIZE
|
||||
|
||||
// TAG sharing enable
|
||||
`define ICORE_TAG_ID_BITS `NW_BITS
|
||||
`define ICACHE_CORE_TAG_ID_BITS `NW_BITS
|
||||
|
||||
// Core request tag bits
|
||||
`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
|
||||
`define ICACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICACHE_CORE_TAG_ID_BITS)
|
||||
|
||||
// Memory request data bits
|
||||
`define IMEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8)
|
||||
`define ICACHE_MEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define IMEM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define IMEM_BYTEEN_WIDTH `ICACHE_LINE_SIZE
|
||||
`define ICACHE_MEM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE))
|
||||
|
||||
// Memory request tag bits
|
||||
`define IMEM_TAG_WIDTH `IMEM_ADDR_WIDTH
|
||||
`define ICACHE_MEM_TAG_WIDTH `CLOG2(`ICACHE_MSHR_SIZE)
|
||||
|
||||
////////////////////////// Dcache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
|
||||
|
||||
// Block size in bytes
|
||||
`define DCACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
|
||||
|
||||
// Word size in bytes
|
||||
`define DWORD_SIZE 4
|
||||
`define DCACHE_WORD_SIZE 4
|
||||
|
||||
// Core request address bits
|
||||
`define DCORE_ADDR_WIDTH (32-`CLOG2(`DWORD_SIZE))
|
||||
// Block size in bytes
|
||||
`define DCACHE_LINE_SIZE `L1_BLOCK_SIZE
|
||||
|
||||
// Core request tag bits
|
||||
`define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
|
@ -304,126 +288,127 @@
|
|||
`define LSU_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE)
|
||||
`define TEX_TAG_ID_BITS (2)
|
||||
`define LSU_TEX_TAG_ID_BITS `MAX(`LSU_TAG_ID_BITS, `TEX_TAG_ID_BITS)
|
||||
`define DCORE_TAG_ID_BITS (`LSU_TEX_TAG_ID_BITS + 1)
|
||||
`define DCACHE_DCORE_TAG_ID_BITS (`LSU_TEX_TAG_ID_BITS + `NC_FLAG_BITS)
|
||||
`define LSU_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSU_TAG_ID_BITS)
|
||||
`define TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `TEX_TAG_ID_BITS)
|
||||
`define LSU_TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSU_TEX_TAG_ID_BITS)
|
||||
`else
|
||||
`define DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE)
|
||||
`define DCACHE_DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE)
|
||||
`endif
|
||||
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
`define DCACHE_DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCACHE_CORE_TAG_ID_BITS)
|
||||
|
||||
// Memory request data bits
|
||||
`define DMEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8)
|
||||
`define DCACHE_MEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define DMEM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE))
|
||||
`define DCACHE_MEM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define DMEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE
|
||||
`define DCACHE_MEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define DNUM_REQS `NUM_THREADS
|
||||
`define DCACHE_NUM_REQS `NUM_THREADS
|
||||
|
||||
// Memory request tag bits
|
||||
`define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DWORD_SIZE)
|
||||
`define _DNC_MEM_TAG_WIDTH ($clog2(`DNUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCORE_TAG_WIDTH)
|
||||
`define DMEM_TAG_WIDTH `MAX((`DMEM_ADDR_WIDTH + `NC_ADDR_BITS), `_DNC_MEM_TAG_WIDTH)
|
||||
`define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DCACHE_WORD_SIZE)
|
||||
`define _DNC_MEM_TAG_WIDTH ($clog2(`DCACHE_NUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCACHE_CORE_TAG_WIDTH)
|
||||
`define DCACHE_MEM_TAG_WIDTH `MAX((`CLOG2(`DCACHE_NUM_BANKS) + `CLOG2(`DCACHE_MSHR_SIZE) + `NC_FLAG_BITS), `_DNC_MEM_TAG_WIDTH)
|
||||
|
||||
// Merged D-cache/I-cache memory tag
|
||||
`define L1_MEM_TAG_WIDTH (`MAX(`ICACHE_MEM_TAG_WIDTH, `DCACHE_MEM_TAG_WIDTH) + `CLOG2(2))
|
||||
|
||||
////////////////////////// SM Configurable Knobs //////////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
|
||||
`define SMEM_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
|
||||
|
||||
// Word size in bytes
|
||||
`define SWORD_SIZE 4
|
||||
`define SMEM_WORD_SIZE 4
|
||||
|
||||
// bank address offset
|
||||
`define SBANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SWORD_SIZE)
|
||||
`define SMEM_BANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SMEM_WORD_SIZE)
|
||||
|
||||
// Input request size
|
||||
`define SNUM_REQS `NUM_THREADS
|
||||
`define SMEM_NUM_REQS `NUM_THREADS
|
||||
|
||||
////////////////////////// L2cache Configurable Knobs /////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
|
||||
|
||||
// Block size in bytes
|
||||
`define L2CACHE_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`define L2_CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
|
||||
|
||||
// Word size in bytes
|
||||
`define L2WORD_SIZE `DCACHE_LINE_SIZE
|
||||
`define L2_WORD_SIZE `DCACHE_LINE_SIZE
|
||||
|
||||
// Block size in bytes
|
||||
`define L2_CACHE_LINE_SIZE ((`L2_ENABLE) ? `MEM_BLOCK_SIZE : `L2_WORD_SIZE)
|
||||
|
||||
// Input request tag bits
|
||||
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
|
||||
`define L2_CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
|
||||
|
||||
// Memory request data bits
|
||||
`define L2MEM_DATA_WIDTH (`L2CACHE_LINE_SIZE * 8)
|
||||
`define L2_MEM_DATA_WIDTH (`L2_CACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define L2MEM_ADDR_WIDTH (32 - `CLOG2(`L2CACHE_LINE_SIZE))
|
||||
`define L2_MEM_ADDR_WIDTH (32 - `CLOG2(`L2_CACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define L2MEM_BYTEEN_WIDTH `L2CACHE_LINE_SIZE
|
||||
`define L2_MEM_BYTEEN_WIDTH `L2_CACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define L2NUM_REQS `NUM_CORES
|
||||
`define L2_NUM_REQS `NUM_CORES
|
||||
|
||||
// Memory request tag bits
|
||||
`define _L2MEM_ADDR_RATIO_W $clog2(`L2CACHE_LINE_SIZE / `L2WORD_SIZE)
|
||||
`define _L2NC_MEM_TAG_WIDTH ($clog2(`L2NUM_REQS) + `_L2MEM_ADDR_RATIO_W + `XMEM_TAG_WIDTH)
|
||||
`define _L2MEM_TAG_WIDTH `MAX((`L2MEM_ADDR_WIDTH + `NC_ADDR_BITS), `_L2NC_MEM_TAG_WIDTH)
|
||||
`define L2MEM_TAG_WIDTH (`L2_ENABLE ? `_L2MEM_TAG_WIDTH : (`XMEM_TAG_WIDTH + `CLOG2(`L2NUM_REQS)))
|
||||
`define _L2_MEM_ADDR_RATIO_W $clog2(`L2_CACHE_LINE_SIZE / `L2_WORD_SIZE)
|
||||
`define _L2_NC_MEM_TAG_WIDTH ($clog2(`L2_NUM_REQS) + `_L2_MEM_ADDR_RATIO_W + `L1_MEM_TAG_WIDTH)
|
||||
`define _L2_MEM_TAG_WIDTH `MAX((`CLOG2(`L2_NUM_BANKS) + `CLOG2(`L2_MSHR_SIZE) + `NC_FLAG_BITS), `_L2_NC_MEM_TAG_WIDTH)
|
||||
`define L2_MEM_TAG_WIDTH ((`L2_ENABLE) ? `_L2_MEM_TAG_WIDTH : (`L1_MEM_TAG_WIDTH + `CLOG2(`L2_NUM_REQS)))
|
||||
|
||||
////////////////////////// L3cache Configurable Knobs /////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define L3CACHE_ID 0
|
||||
|
||||
// Block size in bytes
|
||||
`define L3CACHE_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`define L3_CACHE_ID 0
|
||||
|
||||
// Word size in bytes
|
||||
`define L3WORD_SIZE `L2CACHE_LINE_SIZE
|
||||
`define L3_WORD_SIZE `L2_CACHE_LINE_SIZE
|
||||
|
||||
// Block size in bytes
|
||||
`define L3_CACHE_LINE_SIZE ((`L3_ENABLE) ? `MEM_BLOCK_SIZE : `L3_WORD_SIZE)
|
||||
|
||||
// Input request tag bits
|
||||
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
|
||||
`define L3_CORE_TAG_WIDTH (`L2_CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
|
||||
|
||||
// Memory request data bits
|
||||
`define L3MEM_DATA_WIDTH (`L3CACHE_LINE_SIZE * 8)
|
||||
`define L3_MEM_DATA_WIDTH (`L3_CACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define L3MEM_ADDR_WIDTH (32 - `CLOG2(`L3CACHE_LINE_SIZE))
|
||||
`define L3_MEM_ADDR_WIDTH (32 - `CLOG2(`L3_CACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define L3MEM_BYTEEN_WIDTH `L3CACHE_LINE_SIZE
|
||||
`define L3_MEM_BYTEEN_WIDTH `L3_CACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define L3NUM_REQS `NUM_CLUSTERS
|
||||
`define L3_NUM_REQS `NUM_CLUSTERS
|
||||
|
||||
// Memory request tag bits
|
||||
`define _L3MEM_ADDR_RATIO_W $clog2(`L3CACHE_LINE_SIZE / `L3WORD_SIZE)
|
||||
`define _L3NC_MEM_TAG_WIDTH ($clog2(`L3NUM_REQS) + `_L3MEM_ADDR_RATIO_W + `L2MEM_TAG_WIDTH)
|
||||
`define _L3MEM_TAG_WIDTH `MAX((`L3MEM_ADDR_WIDTH + `NC_ADDR_BITS), `_L3NC_MEM_TAG_WIDTH)
|
||||
`define L3MEM_TAG_WIDTH (`L3_ENABLE ? `_L3MEM_TAG_WIDTH : (`L2MEM_TAG_WIDTH + `CLOG2(`L3NUM_REQS)))
|
||||
`define _L3_MEM_ADDR_RATIO_W $clog2(`L3_CACHE_LINE_SIZE / `L3_WORD_SIZE)
|
||||
`define _L3_NC_MEM_TAG_WIDTH ($clog2(`L3_NUM_REQS) + `_L3_MEM_ADDR_RATIO_W + `L2_MEM_TAG_WIDTH)
|
||||
`define _L3_MEM_TAG_WIDTH `MAX((`CLOG2(`L3_NUM_BANKS) + `CLOG2(`L3_MSHR_SIZE) + `NC_FLAG_BITS), `_L3_NC_MEM_TAG_WIDTH)
|
||||
`define L3_MEM_TAG_WIDTH ((`L3_ENABLE) ? `_L3_MEM_TAG_WIDTH : (`L2_MEM_TAG_WIDTH + `CLOG2(`L3_NUM_REQS)))
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3MEM_BYTEEN_WIDTH
|
||||
`define VX_MEM_ADDR_WIDTH `L3MEM_ADDR_WIDTH
|
||||
`define VX_MEM_DATA_WIDTH `L3MEM_DATA_WIDTH
|
||||
`define VX_MEM_TAG_WIDTH `L3MEM_TAG_WIDTH
|
||||
`define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_MEM_BYTEEN_WIDTH
|
||||
`define VX_MEM_ADDR_WIDTH `L3_MEM_ADDR_WIDTH
|
||||
`define VX_MEM_DATA_WIDTH `L3_MEM_DATA_WIDTH
|
||||
`define VX_MEM_TAG_WIDTH `L3_MEM_TAG_WIDTH
|
||||
`define VX_CORE_TAG_WIDTH `L3_CORE_TAG_WIDTH
|
||||
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)
|
||||
|
||||
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
|
||||
// Merged D-cache/I-cache memory tag
|
||||
`define XMEM_TAG_WIDTH (`DMEM_TAG_WIDTH + `CLOG2(2))
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "VX_types.vh"
|
||||
`include "VX_fpu_types.vh"
|
||||
`include "VX_gpu_types.vh"
|
||||
|
||||
`endif
|
||||
|
|
|
@ -9,35 +9,42 @@ module VX_execute #(
|
|||
input wire reset,
|
||||
|
||||
// Dcache interface
|
||||
VX_dcache_req_if dcache_req_if,
|
||||
VX_dcache_rsp_if dcache_rsp_if,
|
||||
VX_dcache_req_if.master dcache_req_if,
|
||||
VX_dcache_rsp_if.slave dcache_rsp_if,
|
||||
|
||||
// commit status
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
// commit interface
|
||||
VX_cmt_to_csr_if.slave cmt_to_csr_if,
|
||||
|
||||
// fetch interface
|
||||
VX_fetch_to_csr_if.slave fetch_to_csr_if,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
VX_perf_pipeline_if.slave perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
// inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
VX_alu_req_if.slave alu_req_if,
|
||||
VX_lsu_req_if.slave lsu_req_if,
|
||||
VX_csr_req_if.slave csr_req_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_req_if.slave fpu_req_if,
|
||||
`endif
|
||||
VX_gpu_req_if.slave gpu_req_if,
|
||||
|
||||
// outputs
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if st_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
VX_branch_ctl_if.master branch_ctl_if,
|
||||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
VX_commit_if.master alu_commit_if,
|
||||
VX_commit_if.master ld_commit_if,
|
||||
VX_commit_if.master st_commit_if,
|
||||
VX_commit_if.master csr_commit_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if.master fpu_commit_if,
|
||||
`endif
|
||||
VX_commit_if.master gpu_commit_if,
|
||||
|
||||
input wire busy
|
||||
input wire busy
|
||||
);
|
||||
VX_fpu_to_csr_if fpu_to_csr_if();
|
||||
|
||||
|
@ -178,8 +185,8 @@ module VX_execute #(
|
|||
.clk (clk),
|
||||
.reset (csr_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if(perf_pipeline_if),
|
||||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
|
@ -188,8 +195,13 @@ module VX_execute #(
|
|||
`endif
|
||||
.csr_req_if (csr_req_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.fpu_pending (fpu_pending),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.fpu_pending (fpu_pending),
|
||||
.pending (csr_pending),
|
||||
`else
|
||||
`UNUSED_PIN (pending),
|
||||
`endif
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
|
@ -207,22 +219,6 @@ module VX_execute #(
|
|||
.csr_pending (csr_pending),
|
||||
.pending (fpu_pending)
|
||||
);
|
||||
`else
|
||||
`UNUSED_VAR (csr_pending)
|
||||
`UNUSED_VAR (fpu_to_csr_if.read_frm)
|
||||
assign fpu_req_if.ready = 0;
|
||||
assign fpu_commit_if.valid = 0;
|
||||
assign fpu_commit_if.wid = 0;
|
||||
assign fpu_commit_if.PC = 0;
|
||||
assign fpu_commit_if.tmask = 0;
|
||||
assign fpu_commit_if.wb = 0;
|
||||
assign fpu_commit_if.rd = 0;
|
||||
assign fpu_commit_if.data = 0;
|
||||
assign fpu_to_csr_if.write_enable = 0;
|
||||
assign fpu_to_csr_if.write_wid = 0;
|
||||
assign fpu_to_csr_if.write_fflags = 0;
|
||||
assign fpu_to_csr_if.read_wid = 0;
|
||||
assign fpu_pending = 0;
|
||||
`endif
|
||||
|
||||
VX_gpu_unit #(
|
||||
|
@ -244,8 +240,8 @@ module VX_execute #(
|
|||
// special workaround to get RISC-V tests Pass/Fail status
|
||||
wire ebreak /* verilator public */;
|
||||
assign ebreak = alu_req_if.valid && alu_req_if.ready
|
||||
&& `ALU_IS_BR(alu_req_if.op_mod)
|
||||
&& (`BR_OP(alu_req_if.op_type) == `BR_EBREAK
|
||||
|| `BR_OP(alu_req_if.op_type) == `BR_ECALL);
|
||||
&& `INST_ALU_IS_BR(alu_req_if.op_mod)
|
||||
&& (`INST_BR_BITS'(alu_req_if.op_type) == `INST_BR_EBREAK
|
||||
|| `INST_BR_BITS'(alu_req_if.op_type) == `INST_BR_ECALL);
|
||||
|
||||
endmodule
|
|
@ -9,19 +9,23 @@ module VX_fetch #(
|
|||
input wire reset,
|
||||
|
||||
// Icache interface
|
||||
VX_icache_req_if icache_req_if,
|
||||
VX_icache_rsp_if icache_rsp_if,
|
||||
VX_icache_req_if.master icache_req_if,
|
||||
VX_icache_rsp_if.slave icache_rsp_if,
|
||||
|
||||
// inputs
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if,
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_wstall_if.slave wstall_if,
|
||||
VX_join_if.slave join_if,
|
||||
VX_branch_ctl_if.slave branch_ctl_if,
|
||||
VX_warp_ctl_if.slave warp_ctl_if,
|
||||
|
||||
// outputs
|
||||
VX_ifetch_rsp_if ifetch_rsp_if,
|
||||
VX_ifetch_rsp_if.master ifetch_rsp_if,
|
||||
|
||||
output wire busy
|
||||
// csr interface
|
||||
VX_fetch_to_csr_if.master fetch_to_csr_if,
|
||||
|
||||
// busy status
|
||||
output wire busy
|
||||
);
|
||||
|
||||
VX_ifetch_req_if ifetch_req_if();
|
||||
|
@ -32,13 +36,17 @@ module VX_fetch #(
|
|||
`SCOPE_BIND_VX_fetch_warp_sched
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (reset),
|
||||
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
|
||||
.ifetch_req_if (ifetch_req_if),
|
||||
.ifetch_rsp_if (ifetch_rsp_if),
|
||||
|
||||
.fetch_to_csr_if (fetch_to_csr_if),
|
||||
|
||||
.busy (busy)
|
||||
);
|
||||
|
|
@ -3,21 +3,18 @@
|
|||
module VX_fpu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
// inputs
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
|
||||
// outputs
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_fpu_req_if.slave fpu_req_if,
|
||||
VX_fpu_to_csr_if.master fpu_to_csr_if,
|
||||
VX_commit_if.master fpu_commit_if,
|
||||
|
||||
input wire[`NUM_WARPS-1:0] csr_pending,
|
||||
output wire[`NUM_WARPS-1:0] pending
|
||||
);
|
||||
|
||||
import fpu_types::*;
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam FPUQ_BITS = `LOG2UP(`FPUQ_SIZE);
|
||||
|
||||
|
@ -65,7 +62,7 @@ module VX_fpu_unit #(
|
|||
|
||||
// resolve dynamic FRM from CSR
|
||||
assign fpu_to_csr_if.read_wid = fpu_req_if.wid;
|
||||
wire [`FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `FRM_DYN) ? fpu_to_csr_if.read_frm : fpu_req_if.op_mod;
|
||||
wire [`INST_FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `INST_FRM_DYN) ? fpu_to_csr_if.read_frm : fpu_req_if.op_mod;
|
||||
|
||||
`ifdef FPU_DPI
|
||||
|
||||
|
@ -183,7 +180,7 @@ module VX_fpu_unit #(
|
|||
wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS),
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFLAGS_BITS),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
|
@ -1,37 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
|
||||
module VX_gpr_ram_f #(
|
||||
parameter DATAW = 1,
|
||||
parameter DEPTH = 1,
|
||||
parameter ADDRW = $clog2(DEPTH)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire wren,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire [ADDRW-1:0] raddr1,
|
||||
input wire [ADDRW-1:0] raddr2,
|
||||
input wire [ADDRW-1:0] raddr3,
|
||||
output wire [DATAW-1:0] rdata1,
|
||||
output wire [DATAW-1:0] rdata2,
|
||||
output wire [DATAW-1:0] rdata3
|
||||
);
|
||||
reg [DATAW-1:0] mem [DEPTH-1:0];
|
||||
|
||||
initial mem = '{default: 0};
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren) begin
|
||||
mem [waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
|
||||
assign rdata1 = mem [raddr1];
|
||||
assign rdata2 = mem [raddr2];
|
||||
assign rdata3 = mem [raddr3];
|
||||
|
||||
endmodule
|
||||
|
||||
`TRACING_ON
|
|
@ -1,34 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
|
||||
module VX_gpr_ram_i #(
|
||||
parameter DATAW = 1,
|
||||
parameter DEPTH = 1,
|
||||
parameter ADDRW = $clog2(DEPTH)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire wren,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire [ADDRW-1:0] raddr1,
|
||||
input wire [ADDRW-1:0] raddr2,
|
||||
output wire [DATAW-1:0] rdata1,
|
||||
output wire [DATAW-1:0] rdata2
|
||||
);
|
||||
reg [DATAW-1:0] mem [DEPTH-1:0];
|
||||
|
||||
initial mem = '{default: 0};
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren) begin
|
||||
mem [waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
|
||||
assign rdata1 = mem [raddr1];
|
||||
assign rdata2 = mem [raddr2];
|
||||
|
||||
endmodule
|
||||
|
||||
`TRACING_ON
|
91
hw/rtl/VX_gpr_stage.sv
Normal file
91
hw/rtl/VX_gpr_stage.sv
Normal file
|
@ -0,0 +1,91 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr_stage #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_gpr_req_if.slave gpr_req_if,
|
||||
|
||||
// outputs
|
||||
VX_gpr_rsp_if.master gpr_rsp_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
|
||||
|
||||
// ensure r0 never gets written, which can happen before the reset
|
||||
wire write_enable = writeback_if.valid && (writeback_if.rd != 0);
|
||||
|
||||
wire [`NUM_THREADS-1:0] wren;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign wren[i] = write_enable && writeback_if.tmask[i];
|
||||
end
|
||||
|
||||
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2;
|
||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
VX_dp_ram #(
|
||||
.DATAW (32),
|
||||
.SIZE (RAM_SIZE),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0)
|
||||
) dp_ram1 (
|
||||
.clk (clk),
|
||||
.wren (wren[i]),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.raddr (raddr1),
|
||||
.rdata (gpr_rsp_if.rs1_data[i])
|
||||
);
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (32),
|
||||
.SIZE (RAM_SIZE),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0)
|
||||
) dp_ram2 (
|
||||
.clk (clk),
|
||||
.wren (wren[i]),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.raddr (raddr2),
|
||||
.rdata (gpr_rsp_if.rs2_data[i])
|
||||
);
|
||||
end
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [$clog2(RAM_SIZE)-1:0] raddr3;
|
||||
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
VX_dp_ram #(
|
||||
.DATAW (32),
|
||||
.SIZE (RAM_SIZE),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0)
|
||||
) dp_ram3 (
|
||||
.clk (clk),
|
||||
.wren (wren[i]),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.raddr (raddr3),
|
||||
.rdata (gpr_rsp_if.rs3_data[i])
|
||||
);
|
||||
end
|
||||
`else
|
||||
`UNUSED_VAR (gpr_req_if.rs3)
|
||||
assign gpr_rsp_if.rs3_data = 'x;
|
||||
`endif
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
endmodule
|
|
@ -1,87 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr_stage #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_writeback_if writeback_if,
|
||||
VX_gpr_req_if gpr_req_if,
|
||||
|
||||
// outputs
|
||||
VX_gpr_rsp_if gpr_rsp_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
// ensure r0 never gets written, which can happen before the reset
|
||||
wire write_enable = writeback_if.valid && (writeback_if.rd != 0);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
|
||||
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3;
|
||||
wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2, raddr3;
|
||||
|
||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
VX_gpr_ram_f #(
|
||||
.DATAW (32),
|
||||
.DEPTH (RAM_DEPTH)
|
||||
) gpr_ram_f (
|
||||
.clk (clk),
|
||||
.wren (write_enable && writeback_if.tmask[i]),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.raddr1 (raddr1),
|
||||
.raddr2 (raddr2),
|
||||
.raddr3 (raddr3),
|
||||
.rdata1 (rdata1[i]),
|
||||
.rdata2 (rdata2[i]),
|
||||
.rdata3 (rdata3[i])
|
||||
);
|
||||
end
|
||||
|
||||
assign gpr_rsp_if.rs1_data = rdata1;
|
||||
assign gpr_rsp_if.rs2_data = rdata2;
|
||||
assign gpr_rsp_if.rs3_data = rdata3;
|
||||
`else
|
||||
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
|
||||
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2;
|
||||
wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2;
|
||||
|
||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
`UNUSED_VAR (gpr_req_if.rs3)
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
VX_gpr_ram_i #(
|
||||
.DATAW (32),
|
||||
.DEPTH (RAM_DEPTH)
|
||||
) gpr_ram_i (
|
||||
.clk (clk),
|
||||
.wren (write_enable && writeback_if.tmask[i]),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.raddr1 (raddr1),
|
||||
.raddr2 (raddr2),
|
||||
.rdata1 (rdata1[i]),
|
||||
.rdata2 (rdata2[i])
|
||||
);
|
||||
end
|
||||
|
||||
assign gpr_rsp_if.rs1_data = rdata1;
|
||||
assign gpr_rsp_if.rs2_data = rdata2;
|
||||
assign gpr_rsp_if.rs3_data = 0;
|
||||
`endif
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
endmodule
|
43
hw/rtl/VX_gpu_types.vh
Normal file
43
hw/rtl/VX_gpu_types.vh
Normal file
|
@ -0,0 +1,43 @@
|
|||
`ifndef VX_GPU_TYPES
|
||||
`define VX_GPU_TYPES
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
package gpu_types;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
} gpu_tmc_t;
|
||||
|
||||
`define GPU_TMC_BITS $bits(gpu_types::gpu_tmc_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NUM_WARPS-1:0] wmask;
|
||||
logic [31:0] pc;
|
||||
} gpu_wspawn_t;
|
||||
|
||||
`define GPU_WSPAWN_BITS $bits(gpu_types::gpu_wspawn_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic diverged;
|
||||
logic [`NUM_THREADS-1:0] then_tmask;
|
||||
logic [`NUM_THREADS-1:0] else_tmask;
|
||||
logic [31:0] pc;
|
||||
} gpu_split_t;
|
||||
|
||||
`define GPU_SPLIT_BITS $bits(gpu_types::gpu_split_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NB_BITS-1:0] id;
|
||||
logic [`NW_BITS-1:0] size_m1;
|
||||
} gpu_barrier_t;
|
||||
|
||||
`define GPU_BARRIER_BITS $bits(gpu_types::gpu_barrier_t)
|
||||
|
||||
endpackage
|
||||
|
||||
`endif
|
|
@ -5,11 +5,11 @@ module VX_gpu_unit #(
|
|||
) (
|
||||
`SCOPE_IO_VX_gpu_unit
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
VX_gpu_req_if.slave gpu_req_if,
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_tex_csr_if tex_csr_if,
|
||||
|
@ -19,9 +19,10 @@ module VX_gpu_unit #(
|
|||
`endif
|
||||
|
||||
// Outputs
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_commit_if gpu_commit_if
|
||||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
VX_commit_if.master gpu_commit_if
|
||||
);
|
||||
import gpu_types::*;
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
|
@ -47,26 +48,32 @@ module VX_gpu_unit #(
|
|||
|
||||
wire stall_in, stall_out;
|
||||
|
||||
wire is_wspawn = (gpu_req_if.op_type == `GPU_WSPAWN);
|
||||
wire is_tmc = (gpu_req_if.op_type == `GPU_TMC);
|
||||
wire is_split = (gpu_req_if.op_type == `GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.op_type == `GPU_BAR);
|
||||
wire is_wspawn = (gpu_req_if.op_type == `INST_GPU_WSPAWN);
|
||||
wire is_tmc = (gpu_req_if.op_type == `INST_GPU_TMC);
|
||||
wire is_split = (gpu_req_if.op_type == `INST_GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.op_type == `INST_GPU_BAR);
|
||||
|
||||
// tmc
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign tmc_new_mask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
end
|
||||
assign tmc.valid = is_tmc;
|
||||
assign tmc.tmask = tmc_new_mask;
|
||||
wire taken = (gpu_req_if.rs1_data[i] != 0);
|
||||
assign taken_tmask[i] = gpu_req_if.tmask[i] & taken;
|
||||
assign not_taken_tmask[i] = gpu_req_if.tmask[i] & ~taken;
|
||||
end
|
||||
|
||||
// tmc
|
||||
|
||||
wire [`NUM_THREADS-1:0] pred_mask = (taken_tmask != 0) ? taken_tmask : gpu_req_if.tmask;
|
||||
|
||||
assign tmc.valid = is_tmc || is_pred;
|
||||
assign tmc.tmask = is_pred ? pred_mask : rs1_data[`NUM_THREADS-1:0];
|
||||
|
||||
// wspawn
|
||||
|
||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data[0];
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
assign wspawn_wmask[i] = (i < rs1_data);
|
||||
end
|
||||
assign wspawn.valid = is_wspawn;
|
||||
assign wspawn.wmask = wspawn_wmask;
|
||||
|
@ -74,20 +81,11 @@ module VX_gpu_unit #(
|
|||
|
||||
// split
|
||||
|
||||
wire [`NUM_THREADS-1:0] split_then_mask;
|
||||
wire [`NUM_THREADS-1:0] split_else_mask;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire taken = gpu_req_if.rs1_data[i][0];
|
||||
assign split_then_mask[i] = gpu_req_if.tmask[i] & taken;
|
||||
assign split_else_mask[i] = gpu_req_if.tmask[i] & ~taken;
|
||||
end
|
||||
|
||||
assign split.valid = is_split;
|
||||
assign split.diverged = (| split_then_mask) && (| split_else_mask);
|
||||
assign split.then_mask = split_then_mask;
|
||||
assign split.else_mask = split_else_mask;
|
||||
assign split.pc = gpu_req_if.next_PC;
|
||||
assign split.valid = is_split;
|
||||
assign split.diverged = (| taken_tmask) && (| not_taken_tmask);
|
||||
assign split.then_tmask = taken_tmask;
|
||||
assign split.else_tmask = not_taken_tmask;
|
||||
assign split.pc = gpu_req_if.next_PC;
|
||||
|
||||
// barrier
|
||||
|
||||
|
@ -200,9 +198,9 @@ module VX_gpu_unit #(
|
|||
|
||||
`SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_wid, warp_ctl_if.wid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc);
|
||||
`SCOPE_ASSIGN (gpu_rsp_wspawn, warp_ctl_if.wspawn);
|
||||
`SCOPE_ASSIGN (gpu_rsp_split, warp_ctl_if.split);
|
||||
`SCOPE_ASSIGN (gpu_rsp_barrier, warp_ctl_if.barrier);
|
||||
`SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_wspawn, warp_ctl_if.wspawn.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_split, warp_ctl_if.split.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_barrier, warp_ctl_if.barrier.valid);
|
||||
|
||||
endmodule
|
|
@ -7,17 +7,16 @@ module VX_ibuffer #(
|
|||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_decode_if decode_if,
|
||||
VX_decode_if.slave decode_if,
|
||||
|
||||
// outputs
|
||||
VX_ibuffer_if ibuffer_if
|
||||
VX_ibuffer_if.master ibuffer_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS;
|
||||
localparam SIZE = 3;
|
||||
localparam ADDRW = $clog2(SIZE);
|
||||
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1;
|
||||
localparam ADDRW = $clog2(`IBUF_SIZE+1);
|
||||
localparam NWARPSW = $clog2(`NUM_WARPS+1);
|
||||
|
||||
reg [`NUM_WARPS-1:0][ADDRW-1:0] used_r;
|
||||
|
@ -36,14 +35,16 @@ module VX_ibuffer #(
|
|||
wire writing = enq_fire && (i == decode_if.wid);
|
||||
wire reading = deq_fire && (i == ibuffer_if.wid);
|
||||
|
||||
wire is_head_ptr = empty_r[i] || (alm_empty_r[i] && reading);
|
||||
wire going_empty = empty_r[i] || (alm_empty_r[i] && reading);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW)
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`IBUF_SIZE),
|
||||
.OUT_REG (1)
|
||||
) queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (writing && !is_head_ptr),
|
||||
.valid_in (writing && !going_empty),
|
||||
.data_in (q_data_in),
|
||||
.ready_out(reading),
|
||||
.data_out (q_data_prev[i]),
|
||||
|
@ -63,7 +64,7 @@ module VX_ibuffer #(
|
|||
empty_r[i] <= 0;
|
||||
if (used_r[i] == 1)
|
||||
alm_empty_r[i] <= 0;
|
||||
if (used_r[i] == ADDRW'(SIZE-1))
|
||||
if (used_r[i] == ADDRW'(`IBUF_SIZE))
|
||||
full_r[i] <= 1;
|
||||
end
|
||||
end else if (reading) begin
|
||||
|
@ -76,7 +77,7 @@ module VX_ibuffer #(
|
|||
used_r[i] <= used_r[i] + ADDRW'($signed(2'(writing) - 2'(reading)));
|
||||
end
|
||||
|
||||
if (writing && is_head_ptr) begin
|
||||
if (writing && going_empty) begin
|
||||
q_data_out[i] <= q_data_in;
|
||||
end else if (reading) begin
|
||||
q_data_out[i] <= q_data_prev[i];
|
||||
|
@ -97,6 +98,8 @@ module VX_ibuffer #(
|
|||
reg [DATAW-1:0] deq_instr, deq_instr_n;
|
||||
reg [NWARPSW-1:0] num_warps;
|
||||
|
||||
`UNUSED_VAR (deq_instr)
|
||||
|
||||
// calculate valid table
|
||||
always @(*) begin
|
||||
valid_table_n = valid_table;
|
||||
|
@ -114,11 +117,11 @@ module VX_ibuffer #(
|
|||
) rr_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (ibuffer_if.ready),
|
||||
.requests (valid_table_n),
|
||||
.grant_index (deq_wid_rr_n),
|
||||
`UNUSED_PIN (grant_onehot),
|
||||
`UNUSED_PIN (grant_valid)
|
||||
`UNUSED_PIN (grant_valid),
|
||||
`UNUSED_PIN (grant_onehot),
|
||||
`UNUSED_PIN (enable)
|
||||
);
|
||||
|
||||
// schedule the next instruction to issue
|
||||
|
@ -146,11 +149,10 @@ module VX_ibuffer #(
|
|||
valid_table <= 0;
|
||||
deq_valid <= 0;
|
||||
num_warps <= 0;
|
||||
deq_wid_rr <= 0;
|
||||
end else begin
|
||||
valid_table <= valid_table_n;
|
||||
deq_valid <= deq_valid_n;
|
||||
deq_wid_rr <= deq_wid_rr_n;
|
||||
|
||||
|
||||
if (warp_added && !warp_removed) begin
|
||||
num_warps <= num_warps + NWARPSW'(1);
|
||||
|
@ -159,42 +161,48 @@ module VX_ibuffer #(
|
|||
end
|
||||
end
|
||||
|
||||
deq_wid <= deq_wid_n;
|
||||
deq_instr <= deq_instr_n;
|
||||
deq_wid <= deq_wid_n;
|
||||
deq_wid_rr <= deq_wid_rr_n;
|
||||
deq_instr <= deq_instr_n;
|
||||
end
|
||||
|
||||
assign decode_if.ready = ~q_full[decode_if.wid];
|
||||
|
||||
assign q_data_in = {decode_if.tmask,
|
||||
decode_if.PC,
|
||||
decode_if.ex_type,
|
||||
decode_if.op_type,
|
||||
decode_if.op_mod,
|
||||
decode_if.wb,
|
||||
decode_if.wb,
|
||||
decode_if.use_PC,
|
||||
decode_if.use_imm,
|
||||
decode_if.imm,
|
||||
decode_if.rd,
|
||||
decode_if.rs1,
|
||||
decode_if.rs2,
|
||||
decode_if.rs3,
|
||||
decode_if.imm,
|
||||
decode_if.use_PC,
|
||||
decode_if.use_imm,
|
||||
decode_if.used_regs};
|
||||
decode_if.rs3};
|
||||
|
||||
assign ibuffer_if.valid = deq_valid;
|
||||
assign ibuffer_if.wid = deq_wid;
|
||||
assign ibuffer_if.wid_n = deq_wid_n;
|
||||
assign {ibuffer_if.tmask,
|
||||
ibuffer_if.PC,
|
||||
ibuffer_if.ex_type,
|
||||
ibuffer_if.op_type,
|
||||
ibuffer_if.op_mod,
|
||||
ibuffer_if.wb,
|
||||
ibuffer_if.wb,
|
||||
ibuffer_if.use_PC,
|
||||
ibuffer_if.use_imm,
|
||||
ibuffer_if.imm,
|
||||
ibuffer_if.rd,
|
||||
ibuffer_if.rs1,
|
||||
ibuffer_if.rs2,
|
||||
ibuffer_if.rs3,
|
||||
ibuffer_if.imm,
|
||||
ibuffer_if.use_PC,
|
||||
ibuffer_if.use_imm,
|
||||
ibuffer_if.used_regs} = deq_instr;
|
||||
ibuffer_if.rs3} = deq_instr;
|
||||
|
||||
// scoreboard forwarding
|
||||
assign ibuffer_if.wid_n = deq_wid_n;
|
||||
assign ibuffer_if.rd_n = deq_instr_n[3*`NR_BITS +: `NR_BITS];
|
||||
assign ibuffer_if.rs1_n = deq_instr_n[2*`NR_BITS +: `NR_BITS];
|
||||
assign ibuffer_if.rs2_n = deq_instr_n[1*`NR_BITS +: `NR_BITS];
|
||||
assign ibuffer_if.rs3_n = deq_instr_n[0*`NR_BITS +: `NR_BITS];
|
||||
|
||||
endmodule
|
|
@ -5,24 +5,24 @@ module VX_icache_stage #(
|
|||
) (
|
||||
`SCOPE_IO_VX_icache_stage
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Icache interface
|
||||
VX_icache_req_if icache_req_if,
|
||||
VX_icache_rsp_if icache_rsp_if,
|
||||
VX_icache_req_if.master icache_req_if,
|
||||
VX_icache_rsp_if.slave icache_rsp_if,
|
||||
|
||||
// request
|
||||
VX_ifetch_req_if ifetch_req_if,
|
||||
VX_ifetch_req_if.slave ifetch_req_if,
|
||||
|
||||
// reponse
|
||||
VX_ifetch_rsp_if ifetch_rsp_if
|
||||
VX_ifetch_rsp_if.master ifetch_rsp_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
localparam OUTPUT_REG = 0;
|
||||
localparam OUT_REG = 0;
|
||||
|
||||
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
|
||||
|
||||
|
@ -33,20 +33,21 @@ module VX_icache_stage #(
|
|||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW(32 + `NUM_THREADS),
|
||||
.SIZE(`NUM_WARPS),
|
||||
.FASTRAM(1)
|
||||
.DATAW (32 + `NUM_THREADS),
|
||||
.SIZE (`NUM_WARPS),
|
||||
.LUTRAM (1)
|
||||
) req_metadata (
|
||||
.clk(clk),
|
||||
.waddr(req_tag),
|
||||
.raddr(rsp_tag),
|
||||
.wren(icache_req_fire),
|
||||
.byteen(1'b1),
|
||||
.rden(ifetch_rsp_if.valid),
|
||||
.din({ifetch_req_if.PC, ifetch_req_if.tmask}),
|
||||
.dout({rsp_PC, rsp_tmask})
|
||||
.clk (clk),
|
||||
.wren (icache_req_fire),
|
||||
.waddr (req_tag),
|
||||
.wdata ({ifetch_req_if.PC, ifetch_req_if.tmask}),
|
||||
.raddr (rsp_tag),
|
||||
.rdata ({rsp_PC, rsp_tmask})
|
||||
);
|
||||
|
||||
`RUNTIME_ASSERT((!ifetch_req_if.valid || ifetch_req_if.PC >= `STARTUP_ADDR),
|
||||
("invalid PC=%0h, wid=%0d, tmask=%b", ifetch_req_if.PC, ifetch_req_if.wid, ifetch_req_if.tmask))
|
||||
|
||||
// Icache Request
|
||||
assign icache_req_if.valid = ifetch_req_if.valid;
|
||||
assign icache_req_if.addr = ifetch_req_if.PC[31:2];
|
||||
|
@ -62,12 +63,12 @@ module VX_icache_stage #(
|
|||
|
||||
wire [`NW_BITS-1:0] rsp_wid = rsp_tag;
|
||||
|
||||
wire stall_out = ~ifetch_rsp_if.ready && (0 == OUTPUT_REG && ifetch_rsp_if.valid);
|
||||
wire stall_out = ~ifetch_rsp_if.ready && (0 == OUT_REG && ifetch_rsp_if.valid);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + 32),
|
||||
.RESETW (1),
|
||||
.DEPTH (OUTPUT_REG)
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -90,10 +91,10 @@ module VX_icache_stage #(
|
|||
`ifdef DBG_PRINT_CORE_ICACHE
|
||||
always @(posedge clk) begin
|
||||
if (icache_req_if.valid && icache_req_if.ready) begin
|
||||
$display("%t: I$%0d req: wid=%0d, PC=%0h", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC);
|
||||
dpi_trace("%d: I$%0d req: wid=%0d, PC=%0h\n", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC);
|
||||
end
|
||||
if (ifetch_rsp_if.valid && ifetch_rsp_if.ready) begin
|
||||
$display("%t: I$%0d rsp: wid=%0d, PC=%0h, data=%0h", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, ifetch_rsp_if.data);
|
||||
dpi_trace("%d: I$%0d rsp: wid=%0d, PC=%0h, data=%0h\n", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, ifetch_rsp_if.data);
|
||||
end
|
||||
end
|
||||
`endif
|
159
hw/rtl/VX_instr_demux.sv
Normal file
159
hw/rtl/VX_instr_demux.sv
Normal file
|
@ -0,0 +1,159 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_instr_demux (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_ibuffer_if.slave ibuffer_if,
|
||||
VX_gpr_rsp_if.slave gpr_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if.master alu_req_if,
|
||||
VX_lsu_req_if.master lsu_req_if,
|
||||
VX_csr_req_if.master csr_req_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_req_if.master fpu_req_if,
|
||||
`endif
|
||||
VX_gpu_req_if.master gpu_req_if
|
||||
);
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
wire alu_req_ready;
|
||||
wire lsu_req_ready;
|
||||
wire csr_req_ready;
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire fpu_req_ready;
|
||||
`endif
|
||||
wire gpu_req_ready;
|
||||
|
||||
VX_lzc #(
|
||||
.N (`NUM_THREADS)
|
||||
) tid_select (
|
||||
.in_i (ibuffer_if.tmask),
|
||||
.cnt_o (tid),
|
||||
`UNUSED_PIN (valid_o)
|
||||
);
|
||||
|
||||
wire [31:0] next_PC = ibuffer_if.PC + 4;
|
||||
|
||||
// ALU unit
|
||||
|
||||
wire alu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_ALU);
|
||||
wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(ibuffer_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.OUT_REG (1)
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (alu_req_valid),
|
||||
.ready_in (alu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, alu_op_type, ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.valid_out (alu_req_if.valid),
|
||||
.ready_out (alu_req_if.ready)
|
||||
);
|
||||
|
||||
// lsu unit
|
||||
|
||||
wire lsu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_LSU);
|
||||
wire [`INST_LSU_BITS-1:0] lsu_op_type = `INST_LSU_BITS'(ibuffer_if.op_type);
|
||||
wire lsu_is_fence = `INST_LSU_IS_FENCE(ibuffer_if.op_mod);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
.OUT_REG (1)
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (lsu_req_valid),
|
||||
.ready_in (lsu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, lsu_op_type, lsu_is_fence, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
|
||||
.valid_out (lsu_req_if.valid),
|
||||
.ready_out (lsu_req_if.ready)
|
||||
);
|
||||
|
||||
// csr unit
|
||||
|
||||
wire csr_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_CSR);
|
||||
wire [`INST_CSR_BITS-1:0] csr_op_type = `INST_CSR_BITS'(ibuffer_if.op_type);
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr = ibuffer_if.imm[`CSR_ADDR_BITS-1:0];
|
||||
wire [`NRI_BITS-1:0] csr_imm = ibuffer_if.imm[`CSR_ADDR_BITS +: `NRI_BITS];
|
||||
wire [31:0] csr_rs1_data = gpr_rsp_if.rs1_data[tid];
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32),
|
||||
.OUT_REG (1)
|
||||
) csr_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (csr_req_valid),
|
||||
.ready_in (csr_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, csr_op_type, csr_addr, ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, csr_imm, csr_rs1_data}),
|
||||
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.imm, csr_req_if.rs1_data}),
|
||||
.valid_out (csr_req_if.valid),
|
||||
.ready_out (csr_req_if.ready)
|
||||
);
|
||||
|
||||
// fpu unit
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire fpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_FPU);
|
||||
wire [`INST_FPU_BITS-1:0] fpu_op_type = `INST_FPU_BITS'(ibuffer_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.OUT_REG (1)
|
||||
) fpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (fpu_req_valid),
|
||||
.ready_in (fpu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, fpu_op_type, ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||
.valid_out (fpu_req_if.valid),
|
||||
.ready_out (fpu_req_if.ready)
|
||||
);
|
||||
`else
|
||||
`UNUSED_VAR (gpr_rsp_if.rs3_data)
|
||||
`endif
|
||||
|
||||
// gpu unit
|
||||
|
||||
wire gpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_GPU);
|
||||
wire [`INST_GPU_BITS-1:0] gpu_op_type = `INST_GPU_BITS'(ibuffer_if.op_type);
|
||||
wire [31:0] gpu_rs2_data = gpr_rsp_if.rs2_data[tid];
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `NR_BITS + 1 + + `NT_BITS + (`NUM_THREADS * 32 + 32)),
|
||||
.OUT_REG (1)
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (gpu_req_valid),
|
||||
.ready_in (gpu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, gpu_op_type, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpu_rs2_data}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
||||
.valid_out (gpu_req_if.valid),
|
||||
.ready_out (gpu_req_if.ready)
|
||||
);
|
||||
|
||||
// can take next request?
|
||||
reg ready_r;
|
||||
always @(*) begin
|
||||
case (ibuffer_if.ex_type)
|
||||
`EX_ALU: ready_r = alu_req_ready;
|
||||
`EX_LSU: ready_r = lsu_req_ready;
|
||||
`EX_CSR: ready_r = csr_req_ready;
|
||||
`ifdef EXT_F_ENABLE
|
||||
`EX_FPU: ready_r = fpu_req_ready;
|
||||
`endif
|
||||
`EX_GPU: ready_r = gpu_req_ready;
|
||||
default: ready_r = 1'b1; // ignore NOPs
|
||||
endcase
|
||||
end
|
||||
assign ibuffer_if.ready = ready_r;
|
||||
|
||||
endmodule
|
|
@ -1,146 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_instr_demux (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_ibuffer_if ibuffer_if,
|
||||
VX_gpr_rsp_if gpr_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
wire alu_req_ready;
|
||||
wire lsu_req_ready;
|
||||
wire csr_req_ready;
|
||||
wire fpu_req_ready;
|
||||
wire gpu_req_ready;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N (`NUM_THREADS)
|
||||
) tid_select (
|
||||
.data_in (ibuffer_if.tmask),
|
||||
.index (tid),
|
||||
`UNUSED_PIN (onehot),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire [31:0] next_PC = ibuffer_if.PC + 4;
|
||||
|
||||
// ALU unit
|
||||
|
||||
wire alu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_ALU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.OUTPUT_REG (1)
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (alu_req_valid),
|
||||
.ready_in (alu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `ALU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.valid_out (alu_req_if.valid),
|
||||
.ready_out (alu_req_if.ready)
|
||||
);
|
||||
|
||||
// lsu unit
|
||||
|
||||
wire lsu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_LSU);
|
||||
wire lsu_is_fence = `LSU_IS_FENCE(ibuffer_if.op_mod);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
.OUTPUT_REG (1)
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (lsu_req_valid),
|
||||
.ready_in (lsu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `LSU_OP(ibuffer_if.op_type), lsu_is_fence, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
|
||||
.valid_out (lsu_req_if.valid),
|
||||
.ready_out (lsu_req_if.ready)
|
||||
);
|
||||
|
||||
// csr unit
|
||||
|
||||
wire csr_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_CSR);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
||||
.OUTPUT_REG (1)
|
||||
) csr_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (csr_req_valid),
|
||||
.ready_in (csr_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `CSR_OP(ibuffer_if.op_type), ibuffer_if.imm[`CSR_ADDR_BITS-1:0], ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, ibuffer_if.rs1, gpr_rsp_if.rs1_data[0]}),
|
||||
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.rs1, csr_req_if.rs1_data}),
|
||||
.valid_out (csr_req_if.valid),
|
||||
.ready_out (csr_req_if.ready)
|
||||
);
|
||||
|
||||
// fpu unit
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire fpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_FPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.OUTPUT_REG (1)
|
||||
) fpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (fpu_req_valid),
|
||||
.ready_in (fpu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `FPU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||
.valid_out (fpu_req_if.valid),
|
||||
.ready_out (fpu_req_if.ready)
|
||||
);
|
||||
`else
|
||||
`UNUSED_VAR (gpr_rsp_if.rs3_data)
|
||||
assign fpu_req_ready = 0;
|
||||
`endif
|
||||
|
||||
// gpu unit
|
||||
|
||||
wire gpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_GPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.OUTPUT_REG (1)
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (gpu_req_valid),
|
||||
.ready_in (gpu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `GPU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.op_mod, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}),
|
||||
.valid_out (gpu_req_if.valid),
|
||||
.ready_out (gpu_req_if.ready)
|
||||
);
|
||||
|
||||
// can take next request?
|
||||
reg ready_r;
|
||||
always @(*) begin
|
||||
case (ibuffer_if.ex_type)
|
||||
`EX_ALU: ready_r = alu_req_ready;
|
||||
`EX_LSU: ready_r = lsu_req_ready;
|
||||
`EX_CSR: ready_r = csr_req_ready;
|
||||
`EX_FPU: ready_r = fpu_req_ready;
|
||||
`EX_GPU: ready_r = gpu_req_ready;
|
||||
default: ready_r = 1'b1; // ignore NOPs
|
||||
endcase
|
||||
end
|
||||
assign ibuffer_if.ready = ready_r;
|
||||
|
||||
endmodule
|
|
@ -6,11 +6,13 @@ module VX_ipdom_stack #(
|
|||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire pair,
|
||||
input wire [WIDTH - 1:0] q1,
|
||||
input wire [WIDTH - 1:0] q2,
|
||||
output wire [WIDTH - 1:0] d,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
output wire index,
|
||||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
|
@ -38,32 +40,29 @@ module VX_ipdom_stack #(
|
|||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW(WIDTH * 2),
|
||||
.SIZE(DEPTH),
|
||||
.RWCHECK(1),
|
||||
.FASTRAM(1)
|
||||
.DATAW (WIDTH * 2),
|
||||
.SIZE (DEPTH),
|
||||
.LUTRAM (1)
|
||||
) store (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr),
|
||||
.raddr(rd_ptr),
|
||||
.wren(push),
|
||||
.byteen(1'b1),
|
||||
.rden(pop),
|
||||
.din({q2, q1}),
|
||||
.dout({d2, d1})
|
||||
.clk (clk),
|
||||
.wren (push),
|
||||
.waddr (wr_ptr),
|
||||
.wdata ({q2, q1}),
|
||||
.raddr (rd_ptr),
|
||||
.rdata ({d2, d1})
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
is_part[wr_ptr] <= 0;
|
||||
is_part[wr_ptr] <= ~pair;
|
||||
end else if (pop) begin
|
||||
is_part[rd_ptr] <= 1;
|
||||
end
|
||||
end
|
||||
wire p = is_part[rd_ptr];
|
||||
|
||||
assign d = p ? d1 : d2;
|
||||
assign empty = ~(| wr_ptr);
|
||||
assign index = is_part[rd_ptr];
|
||||
assign d = index ? d1 : d2;
|
||||
assign empty = (ADDRW'(0) == wr_ptr);
|
||||
assign full = (ADDRW'(DEPTH-1) == wr_ptr);
|
||||
|
||||
endmodule
|
|
@ -9,30 +9,76 @@ module VX_issue #(
|
|||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
VX_perf_pipeline_if.master perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_writeback_if writeback_if,
|
||||
VX_decode_if.slave decode_if,
|
||||
VX_writeback_if.slave writeback_if,
|
||||
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
VX_alu_req_if.master alu_req_if,
|
||||
VX_lsu_req_if.master lsu_req_if,
|
||||
VX_csr_req_if.master csr_req_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_req_if.master fpu_req_if,
|
||||
`endif
|
||||
VX_gpu_req_if.master gpu_req_if
|
||||
);
|
||||
VX_ibuffer_if ibuffer_if();
|
||||
VX_ibuffer_if execute_if();
|
||||
VX_gpr_req_if gpr_req_if();
|
||||
VX_gpr_rsp_if gpr_rsp_if();
|
||||
|
||||
wire scoreboard_delay;
|
||||
VX_gpr_req_if gpr_req_if();
|
||||
assign gpr_req_if.wid = ibuffer_if.wid;
|
||||
assign gpr_req_if.rs1 = ibuffer_if.rs1;
|
||||
assign gpr_req_if.rs2 = ibuffer_if.rs2;
|
||||
assign gpr_req_if.rs3 = ibuffer_if.rs3;
|
||||
|
||||
VX_writeback_if sboard_wb_if();
|
||||
assign sboard_wb_if.valid = writeback_if.valid;
|
||||
assign sboard_wb_if.wid = writeback_if.wid;
|
||||
assign sboard_wb_if.PC = writeback_if.PC;
|
||||
assign sboard_wb_if.rd = writeback_if.rd;
|
||||
assign sboard_wb_if.eop = writeback_if.eop;
|
||||
assign sboard_wb_if.ready = writeback_if.ready;
|
||||
|
||||
VX_ibuffer_if sboard_ib_if();
|
||||
assign sboard_ib_if.valid = ibuffer_if.valid && idmux_ib_if.ready;
|
||||
assign sboard_ib_if.wid = ibuffer_if.wid;
|
||||
assign sboard_ib_if.PC = ibuffer_if.PC;
|
||||
assign sboard_ib_if.wb = ibuffer_if.wb;
|
||||
assign sboard_ib_if.rd = ibuffer_if.rd;
|
||||
assign sboard_ib_if.rd_n = ibuffer_if.rd_n;
|
||||
assign sboard_ib_if.rs1_n = ibuffer_if.rs1_n;
|
||||
assign sboard_ib_if.rs2_n = ibuffer_if.rs2_n;
|
||||
assign sboard_ib_if.rs3_n = ibuffer_if.rs3_n;
|
||||
assign sboard_ib_if.wid_n = ibuffer_if.wid_n;
|
||||
|
||||
VX_ibuffer_if idmux_ib_if();
|
||||
assign idmux_ib_if.valid = ibuffer_if.valid && sboard_ib_if.ready;
|
||||
assign idmux_ib_if.wid = ibuffer_if.wid;
|
||||
assign idmux_ib_if.tmask = ibuffer_if.tmask;
|
||||
assign idmux_ib_if.PC = ibuffer_if.PC;
|
||||
assign idmux_ib_if.ex_type = ibuffer_if.ex_type;
|
||||
assign idmux_ib_if.op_type = ibuffer_if.op_type;
|
||||
assign idmux_ib_if.op_mod = ibuffer_if.op_mod;
|
||||
assign idmux_ib_if.wb = ibuffer_if.wb;
|
||||
assign idmux_ib_if.rd = ibuffer_if.rd;
|
||||
assign idmux_ib_if.rs1 = ibuffer_if.rs1;
|
||||
assign idmux_ib_if.imm = ibuffer_if.imm;
|
||||
assign idmux_ib_if.use_PC = ibuffer_if.use_PC;
|
||||
assign idmux_ib_if.use_imm = ibuffer_if.use_imm;
|
||||
|
||||
// issue the instruction
|
||||
assign ibuffer_if.ready = sboard_ib_if.ready && idmux_ib_if.ready;
|
||||
|
||||
`RESET_RELAY (ibuf_reset);
|
||||
`RESET_RELAY (gpr_reset);
|
||||
`RESET_RELAY (demux_reset);
|
||||
|
||||
VX_ibuffer #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (ibuf_reset),
|
||||
.decode_if (decode_if),
|
||||
.ibuffer_if (ibuffer_if)
|
||||
);
|
||||
|
@ -42,54 +88,33 @@ module VX_issue #(
|
|||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.writeback_if(writeback_if),
|
||||
.delay (scoreboard_delay)
|
||||
.ibuffer_if (sboard_ib_if),
|
||||
.writeback_if(sboard_wb_if)
|
||||
);
|
||||
|
||||
assign gpr_req_if.wid = ibuffer_if.wid;
|
||||
assign gpr_req_if.rs1 = ibuffer_if.rs1;
|
||||
assign gpr_req_if.rs2 = ibuffer_if.rs2;
|
||||
assign gpr_req_if.rs3 = ibuffer_if.rs3;
|
||||
|
||||
VX_gpr_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) gpr_stage (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (gpr_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.gpr_req_if (gpr_req_if),
|
||||
.gpr_rsp_if (gpr_rsp_if)
|
||||
);
|
||||
|
||||
assign execute_if.valid = ibuffer_if.valid && ~scoreboard_delay;
|
||||
assign execute_if.wid = ibuffer_if.wid;
|
||||
assign execute_if.tmask = ibuffer_if.tmask;
|
||||
assign execute_if.PC = ibuffer_if.PC;
|
||||
assign execute_if.ex_type = ibuffer_if.ex_type;
|
||||
assign execute_if.op_type = ibuffer_if.op_type;
|
||||
assign execute_if.op_mod = ibuffer_if.op_mod;
|
||||
assign execute_if.wb = ibuffer_if.wb;
|
||||
assign execute_if.rd = ibuffer_if.rd;
|
||||
assign execute_if.rs1 = ibuffer_if.rs1;
|
||||
assign execute_if.imm = ibuffer_if.imm;
|
||||
assign execute_if.use_PC = ibuffer_if.use_PC;
|
||||
assign execute_if.use_imm = ibuffer_if.use_imm;
|
||||
|
||||
VX_instr_demux instr_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ibuffer_if (execute_if),
|
||||
.reset (demux_reset),
|
||||
.ibuffer_if (idmux_ib_if),
|
||||
.gpr_rsp_if (gpr_rsp_if),
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_req_if (fpu_req_if),
|
||||
`endif
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
// issue the instruction
|
||||
assign ibuffer_if.ready = !scoreboard_delay && execute_if.ready;
|
||||
);
|
||||
|
||||
`SCOPE_ASSIGN (issue_fire, ibuffer_if.valid && ibuffer_if.ready);
|
||||
`SCOPE_ASSIGN (issue_wid, ibuffer_if.wid);
|
||||
|
@ -106,8 +131,8 @@ module VX_issue #(
|
|||
`SCOPE_ASSIGN (issue_imm, ibuffer_if.imm);
|
||||
`SCOPE_ASSIGN (issue_use_pc, ibuffer_if.use_PC);
|
||||
`SCOPE_ASSIGN (issue_use_imm, ibuffer_if.use_imm);
|
||||
`SCOPE_ASSIGN (scoreboard_delay, scoreboard_delay);
|
||||
`SCOPE_ASSIGN (execute_delay, ~execute_if.ready);
|
||||
`SCOPE_ASSIGN (scoreboard_delay, !sboard_wb_if.ready);
|
||||
`SCOPE_ASSIGN (execute_delay, !idmux_ib_if.ready);
|
||||
`SCOPE_ASSIGN (gpr_rsp_a, gpr_rsp_if.rs1_data);
|
||||
`SCOPE_ASSIGN (gpr_rsp_b, gpr_rsp_if.rs2_data);
|
||||
`SCOPE_ASSIGN (gpr_rsp_c, gpr_rsp_if.rs3_data);
|
||||
|
@ -145,7 +170,7 @@ module VX_issue #(
|
|||
if (decode_if.valid & !decode_if.ready) begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (ibuffer_if.valid & scoreboard_delay) begin
|
||||
if (ibuffer_if.valid & !sboard_wb_if.ready) begin
|
||||
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (alu_req_if.valid & !alu_req_if.ready) begin
|
||||
|
@ -182,46 +207,48 @@ module VX_issue #(
|
|||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=",
|
||||
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=",
|
||||
$time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd);
|
||||
`PRINT_ARRAY1D(alu_req_if.rs1_data, `NUM_THREADS);
|
||||
$write(", rs2_data=");
|
||||
`PRINT_ARRAY1D(alu_req_if.rs2_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
`TRACE_ARRAY1D(alu_req_if.rs1_data, `NUM_THREADS);
|
||||
dpi_trace(", rs2_data=");
|
||||
`TRACE_ARRAY1D(alu_req_if.rs2_data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, offset=%0h, addr=",
|
||||
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, offset=%0h, addr=",
|
||||
$time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.offset);
|
||||
`PRINT_ARRAY1D(lsu_req_if.base_addr, `NUM_THREADS);
|
||||
$write(", data=");
|
||||
`PRINT_ARRAY1D(lsu_req_if.store_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
`TRACE_ARRAY1D(lsu_req_if.base_addr, `NUM_THREADS);
|
||||
dpi_trace(", data=");
|
||||
`TRACE_ARRAY1D(lsu_req_if.store_data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=",
|
||||
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=",
|
||||
$time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr);
|
||||
`PRINT_ARRAY1D(csr_req_if.rs1_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
`TRACE_ARRAY1D(csr_req_if.rs1_data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=",
|
||||
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=",
|
||||
$time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd);
|
||||
`PRINT_ARRAY1D(fpu_req_if.rs1_data, `NUM_THREADS);
|
||||
$write(", rs2_data=");
|
||||
`PRINT_ARRAY1D(fpu_req_if.rs2_data, `NUM_THREADS);
|
||||
$write(", rs3_data=");
|
||||
`PRINT_ARRAY1D(fpu_req_if.rs3_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
`TRACE_ARRAY1D(fpu_req_if.rs1_data, `NUM_THREADS);
|
||||
dpi_trace(", rs2_data=");
|
||||
`TRACE_ARRAY1D(fpu_req_if.rs2_data, `NUM_THREADS);
|
||||
dpi_trace(", rs3_data=");
|
||||
`TRACE_ARRAY1D(fpu_req_if.rs3_data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
`endif
|
||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=",
|
||||
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=",
|
||||
$time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd);
|
||||
`PRINT_ARRAY1D(gpu_req_if.rs1_data, `NUM_THREADS);
|
||||
$write(", rs2_data=");
|
||||
`PRINT_ARRAY1D(gpu_req_if.rs2_data, `NUM_THREADS);
|
||||
$write(", rs3_data=");
|
||||
`PRINT_ARRAY1D(gpu_req_if.rs3_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
`TRACE_ARRAY1D(gpu_req_if.rs1_data, `NUM_THREADS);
|
||||
dpi_trace(", rs2_data=");
|
||||
`TRACE_ARRAY1D(gpu_req_if.rs2_data, `NUM_THREADS);
|
||||
dpi_trace(", rs3_data=");
|
||||
`TRACE_ARRAY1D(gpu_req_if.rs3_data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
|
@ -5,27 +5,26 @@ module VX_lsu_unit #(
|
|||
) (
|
||||
`SCOPE_IO_VX_lsu_unit
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Dcache interface
|
||||
VX_dcache_req_if dcache_req_if,
|
||||
VX_dcache_rsp_if dcache_rsp_if,
|
||||
VX_dcache_req_if.master dcache_req_if,
|
||||
VX_dcache_rsp_if.slave dcache_rsp_if,
|
||||
|
||||
// inputs
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_lsu_req_if.slave lsu_req_if,
|
||||
|
||||
// outputs
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if st_commit_if
|
||||
VX_commit_if.master ld_commit_if,
|
||||
VX_commit_if.master st_commit_if
|
||||
);
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = 32 - MEM_ASHIFT;
|
||||
|
||||
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
|
||||
localparam REQ_ADDRW = 32 - REQ_ASHIFT;
|
||||
localparam REQ_ASHIFT = `CLOG2(`DCACHE_WORD_SIZE);
|
||||
|
||||
localparam ADDR_TYPEW = `NC_ADDR_BITS + `SM_ENABLE;
|
||||
localparam ADDR_TYPEW = `NC_FLAG_BITS + `SM_ENABLE;
|
||||
|
||||
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
|
||||
`STATIC_ASSERT(0 == (`SMEM_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
|
||||
|
@ -34,7 +33,7 @@ module VX_lsu_unit #(
|
|||
wire req_valid;
|
||||
wire [`NUM_THREADS-1:0] req_tmask;
|
||||
wire [`NUM_THREADS-1:0][31:0] req_addr;
|
||||
wire [`LSU_BITS-1:0] req_type;
|
||||
wire [`INST_LSU_BITS-1:0] req_type;
|
||||
wire [`NUM_THREADS-1:0][31:0] req_data;
|
||||
wire [`NR_BITS-1:0] req_rd;
|
||||
wire req_wb;
|
||||
|
@ -52,9 +51,9 @@ module VX_lsu_unit #(
|
|||
end
|
||||
|
||||
// detect duplicate addresses
|
||||
wire [`NUM_THREADS-1:0] addr_matches;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign addr_matches[i] = (full_addr[0] == full_addr[i]) || ~lsu_req_if.tmask[i];
|
||||
wire [`NUM_THREADS-2:0] addr_matches;
|
||||
for (genvar i = 0; i < (`NUM_THREADS-1); i++) begin
|
||||
assign addr_matches[i] = (lsu_req_if.base_addr[i+1] == lsu_req_if.base_addr[0]) || ~lsu_req_if.tmask[i+1];
|
||||
end
|
||||
wire lsu_is_dup = lsu_req_if.tmask[0] && (& addr_matches);
|
||||
|
||||
|
@ -81,7 +80,7 @@ module VX_lsu_unit #(
|
|||
wire lsu_valid = lsu_req_if.valid && ~fence_wait;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * ADDR_TYPEW) + `LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.DATAW (1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * ADDR_TYPEW) + `INST_LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) req_pipe_reg (
|
||||
.clk (clk),
|
||||
|
@ -98,7 +97,7 @@ module VX_lsu_unit #(
|
|||
wire [31:0] rsp_pc;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire rsp_wb;
|
||||
wire [`LSU_BITS-1:0] rsp_type;
|
||||
wire [`INST_LSU_BITS-1:0] rsp_type;
|
||||
wire rsp_is_dup;
|
||||
|
||||
`UNUSED_VAR (rsp_type)
|
||||
|
@ -120,11 +119,12 @@ module VX_lsu_unit #(
|
|||
|
||||
wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
|
||||
|
||||
wire dcache_req_fire_any = (| dcache_req_fire);
|
||||
|
||||
wire dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready;
|
||||
|
||||
wire mbuf_push = dcache_req_fire_any
|
||||
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
|
||||
|
||||
wire mbuf_push = ~mbuf_full
|
||||
&& (| ({`NUM_THREADS{req_valid}} & req_tmask_dup & dcache_req_if.ready))
|
||||
&& is_req_start // first submission only
|
||||
&& req_wb; // loads only
|
||||
|
||||
|
@ -134,8 +134,8 @@ module VX_lsu_unit #(
|
|||
`UNUSED_VAR (dcache_rsp_if.tag)
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (`NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1),
|
||||
.SIZE (`LSUQ_SIZE)
|
||||
.DATAW (`NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `INST_LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1),
|
||||
.SIZE (`LSUQ_SIZE)
|
||||
) req_metadata (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -150,9 +150,7 @@ module VX_lsu_unit #(
|
|||
.empty (mbuf_empty)
|
||||
);
|
||||
|
||||
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
|
||||
|
||||
wire req_ready_all = &(dcache_req_if.ready | req_sent_mask | ~req_tmask_dup);
|
||||
wire dcache_req_ready = &(dcache_req_if.ready | req_sent_mask | ~req_tmask_dup);
|
||||
|
||||
wire [`NUM_THREADS-1:0] req_sent_mask_n = req_sent_mask | dcache_req_fire;
|
||||
|
||||
|
@ -161,7 +159,7 @@ module VX_lsu_unit #(
|
|||
req_sent_mask <= 0;
|
||||
is_req_start <= 1;
|
||||
end else begin
|
||||
if (req_ready_all) begin
|
||||
if (dcache_req_ready) begin
|
||||
req_sent_mask <= 0;
|
||||
is_req_start <= 1;
|
||||
end else begin
|
||||
|
@ -204,7 +202,7 @@ module VX_lsu_unit #(
|
|||
|
||||
always @(*) begin
|
||||
mem_req_byteen = {4{req_wb}};
|
||||
case (`LSU_WSIZE(req_type))
|
||||
case (`INST_LSU_WSIZE(req_type))
|
||||
0: mem_req_byteen[req_offset[i]] = 1;
|
||||
1: begin
|
||||
mem_req_byteen[req_offset[i]] = 1;
|
||||
|
@ -237,11 +235,11 @@ module VX_lsu_unit #(
|
|||
`endif
|
||||
end
|
||||
|
||||
assign ready_in = req_dep_ready && req_ready_all;
|
||||
assign ready_in = req_dep_ready && dcache_req_ready;
|
||||
|
||||
// send store commit
|
||||
|
||||
wire is_store_rsp = req_valid && ~req_wb && req_ready_all;
|
||||
wire is_store_rsp = req_valid && ~req_wb && dcache_req_ready;
|
||||
|
||||
assign st_commit_if.valid = is_store_rsp;
|
||||
assign st_commit_if.wid = req_wid;
|
||||
|
@ -263,11 +261,11 @@ module VX_lsu_unit #(
|
|||
wire [7:0] rsp_data8 = rsp_offset[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
|
||||
|
||||
always @(*) begin
|
||||
case (`LSU_FMT(rsp_type))
|
||||
`FMT_B: rsp_data[i] = 32'(signed'(rsp_data8));
|
||||
`FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
|
||||
`FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
|
||||
`FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
|
||||
case (`INST_LSU_FMT(rsp_type))
|
||||
`INST_FMT_B: rsp_data[i] = 32'(signed'(rsp_data8));
|
||||
`INST_FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
|
||||
`INST_FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
|
||||
`INST_FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
|
||||
default: rsp_data[i] = rsp_data32;
|
||||
endcase
|
||||
end
|
||||
|
@ -307,7 +305,7 @@ module VX_lsu_unit #(
|
|||
`SCOPE_ASSIGN (dcache_rsp_tag, mbuf_raddr);
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
reg [`LSUQ_SIZE-1:0][(`NW_BITS + 32 + 64 + 1)-1:0] pending_reqs;
|
||||
reg [`LSUQ_SIZE-1:0][(`NW_BITS + 32 + `NR_BITS + 64 + 1)-1:0] pending_reqs;
|
||||
wire [63:0] delay_timeout = 10000 * (1 ** (`L2_ENABLE + `L3_ENABLE));
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
@ -315,7 +313,7 @@ module VX_lsu_unit #(
|
|||
pending_reqs <= '0;
|
||||
end begin
|
||||
if (mbuf_push) begin
|
||||
pending_reqs[mbuf_waddr] <= {req_wid, req_pc, $time, 1'b1};
|
||||
pending_reqs[mbuf_waddr] <= {req_wid, req_pc, req_rd, $time, 1'b1};
|
||||
end
|
||||
if (mbuf_pop) begin
|
||||
pending_reqs[mbuf_raddr] <= '0;
|
||||
|
@ -324,40 +322,42 @@ module VX_lsu_unit #(
|
|||
|
||||
for (integer i = 0; i < `LSUQ_SIZE; ++i) begin
|
||||
if (pending_reqs[i][0]) begin
|
||||
assert(($time - pending_reqs[i][1 +: 64]) < delay_timeout) else
|
||||
$error("%t: *** D$%0d response timeout: wid=%0d, PC=%0h", $time, CORE_ID, pending_reqs[i][1+64+32 +: `NW_BITS], pending_reqs[i][1+64 +: 32]);
|
||||
`ASSERT(($time - pending_reqs[i][1 +: 64]) < delay_timeout,
|
||||
("%t: *** D$%0d response timeout: remaining=%b, wid=%0d, PC=%0h, rd=%0d",
|
||||
$time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+32+`NR_BITS +: `NW_BITS], pending_reqs[i][1+64+`NR_BITS +: 32], pending_reqs[i][1+64 +: `NR_BITS]));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef DBG_PRINT_CORE_DCACHE
|
||||
always @(posedge clk) begin
|
||||
if (lsu_req_if.valid && fence_wait) begin
|
||||
$display("%t: *** D$%0d fence wait", $time, CORE_ID);
|
||||
wire dcache_req_fire_any = (| dcache_req_fire);
|
||||
always @(posedge clk) begin
|
||||
if (lsu_req_if.valid && fence_wait) begin
|
||||
dpi_trace("%d: *** D$%0d fence wait\n", $time, CORE_ID);
|
||||
end
|
||||
if (dcache_req_fire_any) begin
|
||||
if (dcache_req_if.rw[0]) begin
|
||||
$write("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire);
|
||||
`PRINT_ARRAY1D(req_addr, `NUM_THREADS);
|
||||
$write(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
|
||||
`PRINT_ARRAY1D(req_addr_type, `NUM_THREADS);
|
||||
$write(", data=");
|
||||
`PRINT_ARRAY1D(dcache_req_if.data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
dpi_trace("%d: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire);
|
||||
`TRACE_ARRAY1D(req_addr, `NUM_THREADS);
|
||||
dpi_trace(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
|
||||
`TRACE_ARRAY1D(req_addr_type, `NUM_THREADS);
|
||||
dpi_trace(", data=");
|
||||
`TRACE_ARRAY1D(dcache_req_if.data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
end else begin
|
||||
$write("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire);
|
||||
`PRINT_ARRAY1D(req_addr, `NUM_THREADS);
|
||||
$write(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
|
||||
`PRINT_ARRAY1D(req_addr_type, `NUM_THREADS);
|
||||
$write(", rd=%0d, is_dup=%b\n", req_rd, req_is_dup);
|
||||
dpi_trace("%d: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire);
|
||||
`TRACE_ARRAY1D(req_addr, `NUM_THREADS);
|
||||
dpi_trace(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
|
||||
`TRACE_ARRAY1D(req_addr_type, `NUM_THREADS);
|
||||
dpi_trace(", rd=%0d, is_dup=%b\n", req_rd, req_is_dup);
|
||||
end
|
||||
end
|
||||
if (dcache_rsp_fire) begin
|
||||
$write("%t: D$%0d Rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, rd=%0d, data=",
|
||||
dpi_trace("%d: D$%0d Rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, rd=%0d, data=",
|
||||
$time, CORE_ID, rsp_wid, rsp_pc, dcache_rsp_if.tmask, mbuf_raddr, rsp_rd);
|
||||
`PRINT_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS);
|
||||
$write(", is_dup=%b\n", rsp_is_dup);
|
||||
`TRACE_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS);
|
||||
dpi_trace(", is_dup=%b\n", rsp_is_dup);
|
||||
end
|
||||
end
|
||||
`endif
|
|
@ -8,11 +8,11 @@ module VX_mem_arb #(
|
|||
parameter TAG_SEL_IDX = 0,
|
||||
parameter BUFFERED_REQ = 0,
|
||||
parameter BUFFERED_RSP = 0,
|
||||
parameter TYPE = "R",
|
||||
parameter TYPE = "P",
|
||||
|
||||
localparam DATA_SIZE = (DATA_WIDTH / 8),
|
||||
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS),
|
||||
localparam TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
|
||||
parameter DATA_SIZE = (DATA_WIDTH / 8),
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
|
||||
parameter TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
|
@ -5,24 +5,24 @@ module VX_mem_unit # (
|
|||
) (
|
||||
`SCOPE_IO_VX_mem_unit
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_memsys_if.master perf_memsys_if,
|
||||
`endif
|
||||
|
||||
// Core <-> Dcache
|
||||
VX_dcache_req_if dcache_req_if,
|
||||
VX_dcache_rsp_if dcache_rsp_if,
|
||||
VX_dcache_req_if.slave dcache_req_if,
|
||||
VX_dcache_rsp_if.master dcache_rsp_if,
|
||||
|
||||
// Core <-> Icache
|
||||
VX_icache_req_if icache_req_if,
|
||||
VX_icache_rsp_if icache_rsp_if,
|
||||
VX_icache_req_if.slave icache_req_if,
|
||||
VX_icache_rsp_if.master icache_rsp_if,
|
||||
|
||||
// Memory
|
||||
VX_mem_req_if mem_req_if,
|
||||
VX_mem_rsp_if mem_rsp_if
|
||||
VX_mem_req_if.master mem_req_if,
|
||||
VX_mem_rsp_if.slave mem_rsp_if
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -30,58 +30,59 @@ module VX_mem_unit # (
|
|||
`endif
|
||||
|
||||
VX_mem_req_if #(
|
||||
.DATA_WIDTH (`IMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`IMEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`IMEM_TAG_WIDTH)
|
||||
.DATA_WIDTH (`ICACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`ICACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
|
||||
) icache_mem_req_if();
|
||||
|
||||
VX_mem_rsp_if #(
|
||||
.DATA_WIDTH (`IMEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`IMEM_TAG_WIDTH)
|
||||
.DATA_WIDTH (`ICACHE_MEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
|
||||
) icache_mem_rsp_if();
|
||||
|
||||
VX_mem_req_if #(
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`DMEM_TAG_WIDTH)
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH)
|
||||
) dcache_mem_req_if();
|
||||
|
||||
VX_mem_rsp_if #(
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`DMEM_TAG_WIDTH)
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH)
|
||||
) dcache_mem_rsp_if();
|
||||
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE)
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
|
||||
) dcache_req_tmp_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE)
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
|
||||
) dcache_rsp_tmp_if();
|
||||
|
||||
`RESET_RELAY (icache_reset);
|
||||
`RESET_RELAY (dcache_reset);
|
||||
`RESET_RELAY (mem_arb_reset);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`ICACHE_ID),
|
||||
.CACHE_SIZE (`ICACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`ICACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`INUM_BANKS),
|
||||
.WORD_SIZE (`IWORD_SIZE),
|
||||
.NUM_BANKS (1),
|
||||
.WORD_SIZE (`ICACHE_WORD_SIZE),
|
||||
.NUM_REQS (1),
|
||||
.CREQ_SIZE (`ICREQ_SIZE),
|
||||
.CRSQ_SIZE (`ICRSQ_SIZE),
|
||||
.MSHR_SIZE (`IMSHR_SIZE),
|
||||
.MRSQ_SIZE (`IMRSQ_SIZE),
|
||||
.MREQ_SIZE (`IMREQ_SIZE),
|
||||
.CREQ_SIZE (`ICACHE_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`ICACHE_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`ICACHE_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`ICACHE_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`ICACHE_MREQ_SIZE),
|
||||
.WRITE_ENABLE (0),
|
||||
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
|
||||
.MEM_TAG_WIDTH (`IMEM_TAG_WIDTH)
|
||||
.CORE_TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`ICACHE_CORE_TAG_ID_BITS),
|
||||
.MEM_TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
|
||||
) icache (
|
||||
`SCOPE_BIND_VX_mem_unit_icache
|
||||
|
||||
|
@ -91,7 +92,7 @@ module VX_mem_unit # (
|
|||
// Core request
|
||||
.core_req_valid (icache_req_if.valid),
|
||||
.core_req_rw (1'b0),
|
||||
.core_req_byteen ({`IWORD_SIZE{1'b1}}),
|
||||
.core_req_byteen ('b0),
|
||||
.core_req_addr (icache_req_if.addr),
|
||||
.core_req_data ('x),
|
||||
.core_req_tag (icache_req_if.tag),
|
||||
|
@ -128,19 +129,19 @@ module VX_mem_unit # (
|
|||
.CACHE_ID (`DCACHE_ID),
|
||||
.CACHE_SIZE (`DCACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`DCACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`DNUM_BANKS),
|
||||
.NUM_PORTS (`DNUM_PORTS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.CREQ_SIZE (`DCREQ_SIZE),
|
||||
.CRSQ_SIZE (`DCRSQ_SIZE),
|
||||
.MSHR_SIZE (`DMSHR_SIZE),
|
||||
.MRSQ_SIZE (`DMRSQ_SIZE),
|
||||
.MREQ_SIZE (`DMREQ_SIZE),
|
||||
.NUM_BANKS (`DCACHE_NUM_BANKS),
|
||||
.NUM_PORTS (`DCACHE_NUM_PORTS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.CREQ_SIZE (`DCACHE_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`DCACHE_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`DCACHE_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`DCACHE_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`DCACHE_MREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS-`SM_ENABLE),
|
||||
.MEM_TAG_WIDTH (`DMEM_TAG_WIDTH),
|
||||
.CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE),
|
||||
.CORE_TAG_ID_BITS (`DCACHE_CORE_TAG_ID_BITS-`SM_ENABLE),
|
||||
.MEM_TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) dcache (
|
||||
`SCOPE_BIND_VX_mem_unit_dcache
|
||||
|
@ -186,28 +187,31 @@ module VX_mem_unit # (
|
|||
|
||||
if (`SM_ENABLE) begin
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH-1)
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
|
||||
) smem_req_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH-1)
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
|
||||
) smem_rsp_if();
|
||||
|
||||
VX_smem_arb #(
|
||||
`RESET_RELAY (smem_arb_reset);
|
||||
`RESET_RELAY (smem_reset);
|
||||
|
||||
VX_smem_arb #(
|
||||
.NUM_REQS (2),
|
||||
.LANES (`NUM_THREADS),
|
||||
.DATA_SIZE (4),
|
||||
.TAG_IN_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.TYPE ("X"),
|
||||
.TAG_IN_WIDTH (`DCACHE_CORE_TAG_WIDTH),
|
||||
.TYPE ("P"),
|
||||
.BUFFERED_REQ (2),
|
||||
.BUFFERED_RSP (1)
|
||||
) smem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (smem_arb_reset),
|
||||
|
||||
// input request
|
||||
.req_valid_in (dcache_req_if.valid),
|
||||
|
@ -242,19 +246,17 @@ module VX_mem_unit # (
|
|||
.rsp_ready_out (dcache_rsp_if.ready)
|
||||
);
|
||||
|
||||
`RESET_RELAY (smem_reset);
|
||||
|
||||
VX_shared_mem #(
|
||||
.CACHE_ID (`SCACHE_ID),
|
||||
.CACHE_ID (`SMEM_ID),
|
||||
.CACHE_SIZE (`SMEM_SIZE),
|
||||
.NUM_BANKS (`SNUM_BANKS),
|
||||
.WORD_SIZE (`SWORD_SIZE),
|
||||
.NUM_REQS (`SNUM_REQS),
|
||||
.CREQ_SIZE (`SCREQ_SIZE),
|
||||
.CRSQ_SIZE (`SCRSQ_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-1),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS-1),
|
||||
.BANK_ADDR_OFFSET (`SBANK_ADDR_OFFSET)
|
||||
.NUM_BANKS (`SMEM_NUM_BANKS),
|
||||
.WORD_SIZE (`SMEM_WORD_SIZE),
|
||||
.NUM_REQS (`SMEM_NUM_REQS),
|
||||
.CREQ_SIZE (`SMEM_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`SMEM_CRSQ_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE),
|
||||
.CORE_TAG_ID_BITS (`DCACHE_CORE_TAG_ID_BITS-`SM_ENABLE),
|
||||
.BANK_ADDR_OFFSET (`SMEM_BANK_ADDR_OFFSET)
|
||||
) smem (
|
||||
.clk (clk),
|
||||
.reset (smem_reset),
|
||||
|
@ -281,13 +283,20 @@ module VX_mem_unit # (
|
|||
);
|
||||
end else begin
|
||||
// core to D-cache request
|
||||
assign dcache_req_tmp_if.valid = dcache_req_if.valid;
|
||||
assign dcache_req_tmp_if.addr = dcache_req_if.addr;
|
||||
assign dcache_req_tmp_if.rw = dcache_req_if.rw;
|
||||
assign dcache_req_tmp_if.byteen = dcache_req_if.byteen;
|
||||
assign dcache_req_tmp_if.data = dcache_req_if.data;
|
||||
assign dcache_req_tmp_if.tag = dcache_req_if.tag;
|
||||
assign dcache_req_if.ready = dcache_req_tmp_if.ready;
|
||||
for (genvar i = 0; i < `DCACHE_NUM_REQS; ++i) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW ((32-`CLOG2(`DCACHE_WORD_SIZE)) + 1 + `DCACHE_WORD_SIZE + (8*`DCACHE_WORD_SIZE) + `DCACHE_CORE_TAG_WIDTH)
|
||||
) req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (dcache_req_if.valid[i]),
|
||||
.data_in ({dcache_req_if.addr[i], dcache_req_if.rw[i], dcache_req_if.byteen[i], dcache_req_if.data[i], dcache_req_if.tag[i]}),
|
||||
.ready_in (dcache_req_if.ready[i]),
|
||||
.valid_out (dcache_req_tmp_if.valid[i]),
|
||||
.data_out ({dcache_req_tmp_if.addr[i], dcache_req_tmp_if.rw[i], dcache_req_tmp_if.byteen[i], dcache_req_tmp_if.data[i], dcache_req_tmp_if.tag[i]}),
|
||||
.ready_out (dcache_req_tmp_if.ready[i])
|
||||
);
|
||||
end
|
||||
|
||||
// D-cache to core reponse
|
||||
assign dcache_rsp_if.valid = dcache_rsp_tmp_if.valid;
|
||||
|
@ -297,21 +306,23 @@ module VX_mem_unit # (
|
|||
assign dcache_rsp_tmp_if.ready = dcache_rsp_if.ready;
|
||||
end
|
||||
|
||||
wire [`DMEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DMEM_TAG_WIDTH'(icache_mem_req_if.tag);
|
||||
wire [`DMEM_TAG_WIDTH-1:0] icache_mem_rsp_tag;
|
||||
assign icache_mem_rsp_if.tag = icache_mem_rsp_tag[`IMEM_TAG_WIDTH-1:0];
|
||||
wire [`DCACHE_MEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DCACHE_MEM_TAG_WIDTH'(icache_mem_req_if.tag);
|
||||
wire [`DCACHE_MEM_TAG_WIDTH-1:0] icache_mem_rsp_tag;
|
||||
assign icache_mem_rsp_if.tag = icache_mem_rsp_tag[`ICACHE_MEM_TAG_WIDTH-1:0];
|
||||
`UNUSED_VAR (icache_mem_rsp_tag)
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (2),
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`DMEM_TAG_WIDTH),
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`DCACHE_MEM_TAG_WIDTH),
|
||||
.TYPE ("R"),
|
||||
.TAG_SEL_IDX (1), // Skip 0 for NC flag
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (2)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mem_arb_reset),
|
||||
|
||||
// Source request
|
||||
.req_valid_in ({dcache_mem_req_if.valid, icache_mem_req_if.valid}),
|
|
@ -1,15 +1,11 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "util_dpi.vh"
|
||||
`endif
|
||||
|
||||
module VX_muldiv (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
input wire [`MUL_BITS-1:0] alu_op,
|
||||
input wire [`INST_MUL_BITS-1:0] alu_op,
|
||||
input wire [`NW_BITS-1:0] wid_in,
|
||||
input wire [`NUM_THREADS-1:0] tmask_in,
|
||||
input wire [31:0] PC_in,
|
||||
|
@ -33,7 +29,7 @@ module VX_muldiv (
|
|||
input wire ready_out
|
||||
);
|
||||
|
||||
wire is_div_op = `MUL_IS_DIV(alu_op);
|
||||
wire is_div_op = `INST_MUL_IS_DIV(alu_op);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_result;
|
||||
wire [`NW_BITS-1:0] mul_wid_out;
|
||||
|
@ -48,18 +44,20 @@ module VX_muldiv (
|
|||
wire mul_valid_in = valid_in && !is_div_op;
|
||||
wire mul_ready_in = ~stall_out || ~mul_valid_out;
|
||||
|
||||
wire is_mulh_in = (alu_op != `MUL_MUL);
|
||||
wire is_signed_mul_a = (alu_op != `MUL_MULHU);
|
||||
wire is_signed_mul_b = (alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU);
|
||||
wire is_mulh_in = (alu_op != `INST_MUL_MUL);
|
||||
wire is_signed_mul_a = (alu_op != `INST_MUL_MULHU);
|
||||
wire is_signed_mul_b = (alu_op != `INST_MUL_MULHU && alu_op != `INST_MUL_MULHSU);
|
||||
|
||||
`ifdef IMUL_DPI
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_result_tmp;
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_result_tmp;
|
||||
|
||||
wire mul_fire_in = mul_valid_in && mul_ready_in;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [31:0] mul_resultl, mul_resulth;
|
||||
always @(*) begin
|
||||
dpi_imul (alu_in1[i], alu_in2[i], is_signed_mul_a, is_signed_mul_b, mul_resultl, mul_resulth);
|
||||
dpi_imul (mul_fire_in, alu_in1[i], alu_in2[i], is_signed_mul_a, is_signed_mul_b, mul_resultl, mul_resulth);
|
||||
end
|
||||
assign mul_result_tmp[i] = is_mulh_in ? mul_resulth : mul_resultl;
|
||||
end
|
||||
|
@ -83,9 +81,9 @@ module VX_muldiv (
|
|||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] mul_in1 = {is_signed_mul_a & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] mul_in2 = {is_signed_mul_b & alu_in2[i][31], alu_in2[i]};
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
wire [65:0] mul_result_tmp;
|
||||
`IGNORE_WARNINGS_END
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
VX_multiplier #(
|
||||
.WIDTHA (33),
|
||||
|
@ -127,8 +125,8 @@ module VX_muldiv (
|
|||
wire [`NR_BITS-1:0] div_rd_out;
|
||||
wire div_wb_out;
|
||||
|
||||
wire is_rem_op_in = (alu_op == `MUL_REM) || (alu_op == `MUL_REMU);
|
||||
wire is_signed_div = (alu_op == `MUL_DIV) || (alu_op == `MUL_REM);
|
||||
wire is_rem_op_in = (alu_op == `INST_MUL_REM) || (alu_op == `INST_MUL_REMU);
|
||||
wire is_signed_div = (alu_op == `INST_MUL_DIV) || (alu_op == `INST_MUL_REM);
|
||||
wire div_valid_in = valid_in && is_div_op;
|
||||
wire div_ready_out = ~stall_out && ~mul_valid_out; // arbitration prioritizes MUL
|
||||
wire div_ready_in;
|
||||
|
@ -137,11 +135,13 @@ module VX_muldiv (
|
|||
`ifdef IDIV_DPI
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] div_result_tmp;
|
||||
|
||||
wire div_fire_in = div_valid_in && div_ready_in;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [31:0] div_quotient, div_remainder;
|
||||
always @(*) begin
|
||||
dpi_idiv (alu_in1[i], alu_in2[i], is_signed_div, div_quotient, div_remainder);
|
||||
dpi_idiv (div_fire_in, alu_in1[i], alu_in2[i], is_signed_div, div_quotient, div_remainder);
|
||||
end
|
||||
assign div_result_tmp[i] = is_rem_op_in ? div_remainder : div_quotient;
|
||||
end
|
|
@ -15,30 +15,30 @@ module VX_pipeline #(
|
|||
output wire [`NUM_THREADS-1:0][3:0] dcache_req_byteen,
|
||||
output wire [`NUM_THREADS-1:0][29:0] dcache_req_addr,
|
||||
output wire [`NUM_THREADS-1:0][31:0] dcache_req_data,
|
||||
output wire [`NUM_THREADS-1:0][`DCORE_TAG_WIDTH-1:0] dcache_req_tag,
|
||||
output wire [`NUM_THREADS-1:0][`DCACHE_CORE_TAG_WIDTH-1:0] dcache_req_tag,
|
||||
input wire [`NUM_THREADS-1:0] dcache_req_ready,
|
||||
|
||||
// Dcache core reponse
|
||||
input wire dcache_rsp_valid,
|
||||
input wire [`NUM_THREADS-1:0] dcache_rsp_tmask,
|
||||
input wire [`NUM_THREADS-1:0][31:0] dcache_rsp_data,
|
||||
input wire [`DCORE_TAG_WIDTH-1:0] dcache_rsp_tag,
|
||||
input wire [`DCACHE_CORE_TAG_WIDTH-1:0] dcache_rsp_tag,
|
||||
output wire dcache_rsp_ready,
|
||||
|
||||
// Icache core request
|
||||
output wire icache_req_valid,
|
||||
output wire [29:0] icache_req_addr,
|
||||
output wire [`ICORE_TAG_WIDTH-1:0] icache_req_tag,
|
||||
output wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_req_tag,
|
||||
input wire icache_req_ready,
|
||||
|
||||
// Icache core response
|
||||
input wire icache_rsp_valid,
|
||||
input wire [31:0] icache_rsp_data,
|
||||
input wire [`ICORE_TAG_WIDTH-1:0] icache_rsp_tag,
|
||||
input wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_rsp_tag,
|
||||
output wire icache_rsp_ready,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
`endif
|
||||
|
||||
// Status
|
||||
|
@ -51,7 +51,7 @@ module VX_pipeline #(
|
|||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
|
||||
) dcache_req_if();
|
||||
|
||||
assign dcache_req_valid = dcache_req_if.valid;
|
||||
|
@ -69,7 +69,7 @@ module VX_pipeline #(
|
|||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
|
||||
) dcache_rsp_if();
|
||||
|
||||
assign dcache_rsp_if.valid = dcache_rsp_valid;
|
||||
|
@ -84,7 +84,7 @@ module VX_pipeline #(
|
|||
|
||||
VX_icache_req_if #(
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`ICORE_TAG_WIDTH)
|
||||
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
|
||||
) icache_req_if();
|
||||
|
||||
assign icache_req_valid = icache_req_if.valid;
|
||||
|
@ -98,7 +98,7 @@ module VX_pipeline #(
|
|||
|
||||
VX_icache_rsp_if #(
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`ICORE_TAG_WIDTH)
|
||||
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
|
||||
) icache_rsp_if();
|
||||
|
||||
assign icache_rsp_if.valid = icache_rsp_valid;
|
||||
|
@ -108,18 +108,18 @@ module VX_pipeline #(
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_cmt_to_csr_if #(
|
||||
.SIZE ($clog2(3*`NUM_THREADS+1))
|
||||
) cmt_to_csr_if();
|
||||
|
||||
VX_fetch_to_csr_if fetch_to_csr_if();
|
||||
VX_cmt_to_csr_if cmt_to_csr_if();
|
||||
VX_decode_if decode_if();
|
||||
VX_branch_ctl_if branch_ctl_if();
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
VX_ifetch_rsp_if ifetch_rsp_if();
|
||||
VX_alu_req_if alu_req_if();
|
||||
VX_lsu_req_if lsu_req_if();
|
||||
VX_csr_req_if csr_req_if();
|
||||
VX_csr_req_if csr_req_if();
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_req_if fpu_req_if();
|
||||
`endif
|
||||
VX_gpu_req_if gpu_req_if();
|
||||
VX_writeback_if writeback_if();
|
||||
VX_wstall_if wstall_if();
|
||||
|
@ -128,7 +128,9 @@ module VX_pipeline #(
|
|||
VX_commit_if ld_commit_if();
|
||||
VX_commit_if st_commit_if();
|
||||
VX_commit_if csr_commit_if();
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if fpu_commit_if();
|
||||
`endif
|
||||
VX_commit_if gpu_commit_if();
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -154,6 +156,7 @@ module VX_pipeline #(
|
|||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.ifetch_rsp_if (ifetch_rsp_if),
|
||||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
|
@ -186,7 +189,9 @@ module VX_pipeline #(
|
|||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_req_if (fpu_req_if),
|
||||
`endif
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
|
@ -206,12 +211,15 @@ module VX_pipeline #(
|
|||
.dcache_req_if (dcache_req_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_req_if (fpu_req_if),
|
||||
`endif
|
||||
.gpu_req_if (gpu_req_if),
|
||||
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
|
@ -220,7 +228,9 @@ module VX_pipeline #(
|
|||
.ld_commit_if (ld_commit_if),
|
||||
.st_commit_if (st_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
`endif
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
.busy (busy)
|
||||
|
@ -236,7 +246,9 @@ module VX_pipeline #(
|
|||
.ld_commit_if (ld_commit_if),
|
||||
.st_commit_if (st_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
`endif
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
.writeback_if (writeback_if),
|
|
@ -1,10 +1,16 @@
|
|||
`ifndef VX_PLATFORM
|
||||
`define VX_PLATFORM
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "util_dpi.vh"
|
||||
`endif
|
||||
|
||||
`include "VX_scope.vh"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
|
||||
`ifndef NDEBUG
|
||||
`define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \
|
||||
x \
|
||||
|
@ -13,9 +19,9 @@
|
|||
`define DEBUG_BLOCK(x)
|
||||
`endif
|
||||
|
||||
`define DEBUG_BEGIN /* verilator lint_off UNUSED */
|
||||
`define IGNORE_UNUSED_BEGIN /* verilator lint_off UNUSED */
|
||||
|
||||
`define DEBUG_END /* verilator lint_on UNUSED */
|
||||
`define IGNORE_UNUSED_END /* verilator lint_on UNUSED */
|
||||
|
||||
`define IGNORE_WARNINGS_BEGIN /* verilator lint_off UNUSED */ \
|
||||
/* verilator lint_off PINCONNECTEMPTY */ \
|
||||
|
@ -23,7 +29,8 @@
|
|||
/* verilator lint_off UNOPTFLAT */ \
|
||||
/* verilator lint_off UNDRIVEN */ \
|
||||
/* verilator lint_off DECLFILENAME */ \
|
||||
/* verilator lint_off IMPLICIT */
|
||||
/* verilator lint_off IMPLICIT */ \
|
||||
/* verilator lint_off IMPORTSTAR */
|
||||
|
||||
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
|
||||
/* verilator lint_on PINCONNECTEMPTY */ \
|
||||
|
@ -31,7 +38,8 @@
|
|||
/* verilator lint_on UNOPTFLAT */ \
|
||||
/* verilator lint_on UNDRIVEN */ \
|
||||
/* verilator lint_on DECLFILENAME */ \
|
||||
/* verilator lint_on IMPLICIT */
|
||||
/* verilator lint_on IMPLICIT */ \
|
||||
/* verilator lint_on IMPORTSTAR */
|
||||
|
||||
`define UNUSED_PARAM(x) /* verilator lint_off UNUSED */ \
|
||||
localparam __``x = x; \
|
||||
|
@ -43,7 +51,11 @@
|
|||
. x () \
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
`define STRINGIFY(x) `"x`"
|
||||
`define ERROR(msg) \
|
||||
$error msg
|
||||
|
||||
`define ASSERT(cond, msg) \
|
||||
assert(cond) else $error msg
|
||||
|
||||
`define STATIC_ASSERT(cond, msg) \
|
||||
generate \
|
||||
|
@ -51,41 +63,61 @@
|
|||
endgenerate
|
||||
|
||||
`define RUNTIME_ASSERT(cond, msg) \
|
||||
always @(posedge clk) \
|
||||
always @(posedge clk) begin \
|
||||
assert(cond) else $error msg; \
|
||||
end
|
||||
|
||||
`define TRACING_ON /* verilator tracing_on */
|
||||
`define TRACING_OFF /* verilator tracing_off */
|
||||
|
||||
`define RESET_RELAY(signal) \
|
||||
wire signal; \
|
||||
VX_reset_relay __``signal ( \
|
||||
.clk (clk), \
|
||||
.reset (reset), \
|
||||
.reset_o (signal) \
|
||||
)
|
||||
`else // SYNTHESIS
|
||||
|
||||
`define DEBUG_BLOCK(x)
|
||||
`define IGNORE_UNUSED_BEGIN
|
||||
`define IGNORE_UNUSED_END
|
||||
`define IGNORE_WARNINGS_BEGIN
|
||||
`define IGNORE_WARNINGS_END
|
||||
`define UNUSED_PARAM(x)
|
||||
`define UNUSED_VAR(x)
|
||||
`define UNUSED_PIN(x) . x ()
|
||||
`define ERROR(msg)
|
||||
`define ASSERT(cond, msg) if (cond);
|
||||
`define STATIC_ASSERT(cond, msg)
|
||||
`define RUNTIME_ASSERT(cond, msg)
|
||||
`define TRACING_ON
|
||||
`define TRACING_OFF
|
||||
|
||||
`endif // SYNTHESIS
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef QUARTUS
|
||||
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
|
||||
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
|
||||
`define DISABLE_BRAM (* ramstyle = "logic" *)
|
||||
`define PRESERVE_REG (* preserve *)
|
||||
`else
|
||||
`define USE_FAST_BRAM
|
||||
`define NO_RW_RAM_CHECK
|
||||
`define DISABLE_BRAM
|
||||
`define PRESERVE_REG
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define STRINGIFY(x) `"x`"
|
||||
|
||||
`define CLOG2(x) $clog2(x)
|
||||
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > (x)) ? 1 : 0))
|
||||
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
|
||||
`define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
|
||||
|
||||
`define ABS(x) (($signed(x) < 0) ? (-$signed(x)) : x);
|
||||
`define ABS(x) (($signed(x) < 0) ? (-$signed(x)) : (x));
|
||||
|
||||
`define MIN(x, y) ((x < y) ? (x) : (y))
|
||||
`define MAX(x, y) ((x > y) ? (x) : (y))
|
||||
`define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
`define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
|
||||
`define UP(x) (((x) > 0) ? x : 1)
|
||||
|
||||
`define SAFE_RNG(h, l) `MAX(h,l) : l
|
||||
`define UP(x) (((x) > 0) ? (x) : 1)
|
||||
|
||||
`define RTRIM(x, s) x[$bits(x)-1:($bits(x)-s)]
|
||||
|
||||
|
@ -112,25 +144,41 @@
|
|||
end \
|
||||
$write("}")
|
||||
|
||||
`define PRINT_ARRAY1D(a, m) \
|
||||
$write("{"); \
|
||||
`define TRACE_ARRAY1D(a, m) \
|
||||
dpi_trace("{"); \
|
||||
for (integer i = (m-1); i >= 0; --i) begin \
|
||||
if (i != (m-1)) $write(", "); \
|
||||
$write("0x%0h", a[i]); \
|
||||
if (i != (m-1)) dpi_trace(", "); \
|
||||
dpi_trace("0x%0h", a[i]); \
|
||||
end \
|
||||
$write("}"); \
|
||||
dpi_trace("}"); \
|
||||
|
||||
`define PRINT_ARRAY2D(a, m, n) \
|
||||
$write("{"); \
|
||||
`define TRACE_ARRAY2D(a, m, n) \
|
||||
dpi_trace("{"); \
|
||||
for (integer i = n-1; i >= 0; --i) begin \
|
||||
if (i != (n-1)) $write(", "); \
|
||||
$write("{"); \
|
||||
if (i != (n-1)) dpi_trace(", "); \
|
||||
dpi_trace("{"); \
|
||||
for (integer j = (m-1); j >= 0; --j) begin \
|
||||
if (j != (m-1)) $write(", "); \
|
||||
$write("0x%0h", a[i][j]); \
|
||||
if (j != (m-1)) dpi_trace(", "); \
|
||||
dpi_trace("0x%0h", a[i][j]); \
|
||||
end \
|
||||
$write("}"); \
|
||||
dpi_trace("}"); \
|
||||
end \
|
||||
$write("}")
|
||||
dpi_trace("}")
|
||||
|
||||
`define RESET_RELAY(signal) \
|
||||
wire signal; \
|
||||
VX_reset_relay __``signal ( \
|
||||
.clk (clk), \
|
||||
.reset (reset), \
|
||||
.reset_o (signal) \
|
||||
)
|
||||
|
||||
`define POP_COUNT(out, in) \
|
||||
VX_popcount #( \
|
||||
.N ($bits(in)) \
|
||||
) __``out ( \
|
||||
.in_i (in), \
|
||||
.cnt_o (out) \
|
||||
)
|
||||
|
||||
`endif
|
|
@ -7,132 +7,141 @@ task print_ex_type (
|
|||
input [`EX_BITS-1:0] ex_type
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: $write("ALU");
|
||||
`EX_LSU: $write("LSU");
|
||||
`EX_CSR: $write("CSR");
|
||||
`EX_FPU: $write("FPU");
|
||||
`EX_GPU: $write("GPU");
|
||||
default: $write("NOP");
|
||||
`EX_ALU: dpi_trace("ALU");
|
||||
`EX_LSU: dpi_trace("LSU");
|
||||
`EX_CSR: dpi_trace("CSR");
|
||||
`EX_FPU: dpi_trace("FPU");
|
||||
`EX_GPU: dpi_trace("GPU");
|
||||
default: dpi_trace("NOP");
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task print_ex_op (
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`OP_BITS-1:0] op_type,
|
||||
input [`MOD_BITS-1:0] op_mod
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input [`INST_MOD_BITS-1:0] op_mod
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
if (`ALU_IS_BR(op_mod)) begin
|
||||
case (`BR_BITS'(op_type))
|
||||
`BR_EQ: $write("BEQ");
|
||||
`BR_NE: $write("BNE");
|
||||
`BR_LT: $write("BLT");
|
||||
`BR_GE: $write("BGE");
|
||||
`BR_LTU: $write("BLTU");
|
||||
`BR_GEU: $write("BGEU");
|
||||
`BR_JAL: $write("JAL");
|
||||
`BR_JALR: $write("JALR");
|
||||
`BR_ECALL: $write("ECALL");
|
||||
`BR_EBREAK:$write("EBREAK");
|
||||
`BR_MRET: $write("MRET");
|
||||
`BR_SRET: $write("SRET");
|
||||
`BR_DRET: $write("DRET");
|
||||
default: $write("?");
|
||||
if (`INST_ALU_IS_BR(op_mod)) begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: dpi_trace("BEQ");
|
||||
`INST_BR_NE: dpi_trace("BNE");
|
||||
`INST_BR_LT: dpi_trace("BLT");
|
||||
`INST_BR_GE: dpi_trace("BGE");
|
||||
`INST_BR_LTU: dpi_trace("BLTU");
|
||||
`INST_BR_GEU: dpi_trace("BGEU");
|
||||
`INST_BR_JAL: dpi_trace("JAL");
|
||||
`INST_BR_JALR: dpi_trace("JALR");
|
||||
`INST_BR_ECALL: dpi_trace("ECALL");
|
||||
`INST_BR_EBREAK:dpi_trace("EBREAK");
|
||||
`INST_BR_MRET: dpi_trace("MRET");
|
||||
`INST_BR_SRET: dpi_trace("SRET");
|
||||
`INST_BR_DRET: dpi_trace("DRET");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end else if (`ALU_IS_MUL(op_mod)) begin
|
||||
case (`MUL_BITS'(op_type))
|
||||
`MUL_MUL: $write("MUL");
|
||||
`MUL_MULH: $write("MULH");
|
||||
`MUL_MULHSU:$write("MULHSU");
|
||||
`MUL_MULHU: $write("MULHU");
|
||||
`MUL_DIV: $write("DIV");
|
||||
`MUL_DIVU: $write("DIVU");
|
||||
`MUL_REM: $write("REM");
|
||||
`MUL_REMU: $write("REMU");
|
||||
default: $write("?");
|
||||
end else if (`INST_ALU_IS_MUL(op_mod)) begin
|
||||
case (`INST_MUL_BITS'(op_type))
|
||||
`INST_MUL_MUL: dpi_trace("MUL");
|
||||
`INST_MUL_MULH: dpi_trace("MULH");
|
||||
`INST_MUL_MULHSU:dpi_trace("MULHSU");
|
||||
`INST_MUL_MULHU: dpi_trace("MULHU");
|
||||
`INST_MUL_DIV: dpi_trace("DIV");
|
||||
`INST_MUL_DIVU: dpi_trace("DIVU");
|
||||
`INST_MUL_REM: dpi_trace("REM");
|
||||
`INST_MUL_REMU: dpi_trace("REMU");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end else begin
|
||||
case (`ALU_BITS'(op_type))
|
||||
`ALU_ADD: $write("ADD");
|
||||
`ALU_SUB: $write("SUB");
|
||||
`ALU_SLL: $write("SLL");
|
||||
`ALU_SRL: $write("SRL");
|
||||
`ALU_SRA: $write("SRA");
|
||||
`ALU_SLT: $write("SLT");
|
||||
`ALU_SLTU: $write("SLTU");
|
||||
`ALU_XOR: $write("XOR");
|
||||
`ALU_OR: $write("OR");
|
||||
`ALU_AND: $write("AND");
|
||||
`ALU_LUI: $write("LUI");
|
||||
`ALU_AUIPC: $write("AUIPC");
|
||||
default: $write("?");
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: dpi_trace("ADD");
|
||||
`INST_ALU_SUB: dpi_trace("SUB");
|
||||
`INST_ALU_SLL: dpi_trace("SLL");
|
||||
`INST_ALU_SRL: dpi_trace("SRL");
|
||||
`INST_ALU_SRA: dpi_trace("SRA");
|
||||
`INST_ALU_SLT: dpi_trace("SLT");
|
||||
`INST_ALU_SLTU: dpi_trace("SLTU");
|
||||
`INST_ALU_XOR: dpi_trace("XOR");
|
||||
`INST_ALU_OR: dpi_trace("OR");
|
||||
`INST_ALU_AND: dpi_trace("AND");
|
||||
`INST_ALU_LUI: dpi_trace("LUI");
|
||||
`INST_ALU_AUIPC: dpi_trace("AUIPC");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_LSU: begin
|
||||
case (`LSU_BITS'(op_type))
|
||||
`LSU_LB: $write("LB");
|
||||
`LSU_LH: $write("LH");
|
||||
`LSU_LW: $write("LW");
|
||||
`LSU_LBU:$write("LBU");
|
||||
`LSU_LHU:$write("LHU");
|
||||
`LSU_SB: $write("SB");
|
||||
`LSU_SH: $write("SH");
|
||||
`LSU_SW: $write("SW");
|
||||
default: $write("?");
|
||||
endcase
|
||||
if (op_mod == 0) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: dpi_trace("LB");
|
||||
`INST_LSU_LH: dpi_trace("LH");
|
||||
`INST_LSU_LW: dpi_trace("LW");
|
||||
`INST_LSU_LBU:dpi_trace("LBU");
|
||||
`INST_LSU_LHU:dpi_trace("LHU");
|
||||
`INST_LSU_SB: dpi_trace("SB");
|
||||
`INST_LSU_SH: dpi_trace("SH");
|
||||
`INST_LSU_SW: dpi_trace("SW");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end else if (op_mod == 1) begin
|
||||
case (`INST_FENCE_BITS'(op_type))
|
||||
`INST_FENCE_D: dpi_trace("DFENCE");
|
||||
`INST_FENCE_I: dpi_trace("IFENCE");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_CSR: begin
|
||||
case (`CSR_BITS'(op_type))
|
||||
`CSR_RW: $write("CSRW");
|
||||
`CSR_RS: $write("CSRS");
|
||||
`CSR_RC: $write("CSRC");
|
||||
default: $write("?");
|
||||
case (`INST_CSR_BITS'(op_type))
|
||||
`INST_CSR_RW: dpi_trace("CSRW");
|
||||
`INST_CSR_RS: dpi_trace("CSRS");
|
||||
`INST_CSR_RC: dpi_trace("CSRC");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
`EX_FPU: begin
|
||||
case (`FPU_BITS'(op_type))
|
||||
`FPU_ADD: $write("ADD");
|
||||
`FPU_SUB: $write("SUB");
|
||||
`FPU_MUL: $write("MUL");
|
||||
`FPU_DIV: $write("DIV");
|
||||
`FPU_SQRT: $write("SQRT");
|
||||
`FPU_MADD: $write("MADD");
|
||||
`FPU_NMSUB: $write("NMSUB");
|
||||
`FPU_NMADD: $write("NMADD");
|
||||
`FPU_CVTWS: $write("CVTWS");
|
||||
`FPU_CVTWUS:$write("CVTWUS");
|
||||
`FPU_CVTSW: $write("CVTSW");
|
||||
`FPU_CVTSWU:$write("CVTSWU");
|
||||
`FPU_CLASS: $write("CLASS");
|
||||
`FPU_CMP: $write("CMP");
|
||||
`FPU_MISC: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: dpi_trace("ADD");
|
||||
`INST_FPU_SUB: dpi_trace("SUB");
|
||||
`INST_FPU_MUL: dpi_trace("MUL");
|
||||
`INST_FPU_DIV: dpi_trace("DIV");
|
||||
`INST_FPU_SQRT: dpi_trace("SQRT");
|
||||
`INST_FPU_MADD: dpi_trace("MADD");
|
||||
`INST_FPU_NMSUB: dpi_trace("NMSUB");
|
||||
`INST_FPU_NMADD: dpi_trace("NMADD");
|
||||
`INST_FPU_CVTWS: dpi_trace("CVTWS");
|
||||
`INST_FPU_CVTWUS:dpi_trace("CVTWUS");
|
||||
`INST_FPU_CVTSW: dpi_trace("CVTSW");
|
||||
`INST_FPU_CVTSWU:dpi_trace("CVTSWU");
|
||||
`INST_FPU_CLASS: dpi_trace("CLASS");
|
||||
`INST_FPU_CMP: dpi_trace("CMP");
|
||||
`INST_FPU_MISC: begin
|
||||
case (op_mod)
|
||||
0: $write("SGNJ");
|
||||
1: $write("SGNJN");
|
||||
2: $write("SGNJX");
|
||||
3: $write("MIN");
|
||||
4: $write("MAX");
|
||||
5: $write("MVXW");
|
||||
6: $write("MVWX");
|
||||
0: dpi_trace("SGNJ");
|
||||
1: dpi_trace("SGNJN");
|
||||
2: dpi_trace("SGNJX");
|
||||
3: dpi_trace("MIN");
|
||||
4: dpi_trace("MAX");
|
||||
5: dpi_trace("MVXW");
|
||||
6: dpi_trace("MVWX");
|
||||
endcase
|
||||
end
|
||||
default: $write("?");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
`EX_GPU: begin
|
||||
case (`GPU_BITS'(op_type))
|
||||
`GPU_TMC: $write("TMC");
|
||||
`GPU_WSPAWN:$write("WSPAWN");
|
||||
`GPU_SPLIT: $write("SPLIT");
|
||||
`GPU_JOIN: $write("JOIN");
|
||||
`GPU_BAR: $write("BAR");
|
||||
`GPU_TEX: $write("TEX");
|
||||
default: $write("?");
|
||||
case (`INST_GPU_BITS'(op_type))
|
||||
`INST_GPU_TMC: dpi_trace("TMC");
|
||||
`INST_GPU_WSPAWN:dpi_trace("WSPAWN");
|
||||
`INST_GPU_SPLIT: dpi_trace("SPLIT");
|
||||
`INST_GPU_JOIN: dpi_trace("JOIN");
|
||||
`INST_GPU_BAR: dpi_trace("BAR");
|
||||
`INST_GPU_PRED: dpi_trace("PRED");
|
||||
`INST_GPU_TEX: dpi_trace("TEX");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
default: $write("?");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
endtask
|
||||
|
||||
|
|
84
hw/rtl/VX_scoreboard.sv
Normal file
84
hw/rtl/VX_scoreboard.sv
Normal file
|
@ -0,0 +1,84 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_scoreboard #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_ibuffer_if.scoreboard ibuffer_if,
|
||||
VX_writeback_if.scoreboard writeback_if
|
||||
);
|
||||
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs, inuse_regs_n;
|
||||
|
||||
wire reserve_reg = ibuffer_if.valid && ibuffer_if.ready && ibuffer_if.wb;
|
||||
|
||||
wire release_reg = writeback_if.valid && writeback_if.ready && writeback_if.eop;
|
||||
|
||||
always @(*) begin
|
||||
inuse_regs_n = inuse_regs;
|
||||
if (reserve_reg) begin
|
||||
inuse_regs_n[ibuffer_if.wid][ibuffer_if.rd] = 1;
|
||||
end
|
||||
if (release_reg) begin
|
||||
inuse_regs_n[writeback_if.wid][writeback_if.rd] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
inuse_regs <= '0;
|
||||
end else begin
|
||||
inuse_regs <= inuse_regs_n;
|
||||
end
|
||||
end
|
||||
|
||||
reg deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3;
|
||||
|
||||
always @(posedge clk) begin
|
||||
deq_inuse_rd <= inuse_regs_n[ibuffer_if.wid_n][ibuffer_if.rd_n];
|
||||
deq_inuse_rs1 <= inuse_regs_n[ibuffer_if.wid_n][ibuffer_if.rs1_n];
|
||||
deq_inuse_rs2 <= inuse_regs_n[ibuffer_if.wid_n][ibuffer_if.rs2_n];
|
||||
deq_inuse_rs3 <= inuse_regs_n[ibuffer_if.wid_n][ibuffer_if.rs3_n];
|
||||
end
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
assign ibuffer_if.ready = ~(deq_inuse_rd
|
||||
| deq_inuse_rs1
|
||||
| deq_inuse_rs2
|
||||
| deq_inuse_rs3);
|
||||
|
||||
`UNUSED_VAR (writeback_if.PC)
|
||||
|
||||
reg [31:0] deadlock_ctr;
|
||||
wire [31:0] deadlock_timeout = 10000 * (1 ** (`L2_ENABLE + `L3_ENABLE));
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
deadlock_ctr <= 0;
|
||||
end else begin
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
|
||||
dpi_trace("%d: *** core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b\n",
|
||||
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.rd, ibuffer_if.wb,
|
||||
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3);
|
||||
end
|
||||
`endif
|
||||
if (release_reg) begin
|
||||
`ASSERT(inuse_regs[writeback_if.wid][writeback_if.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
|
||||
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd));
|
||||
end
|
||||
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
|
||||
deadlock_ctr <= deadlock_ctr + 1;
|
||||
`ASSERT(deadlock_ctr < deadlock_timeout,
|
||||
("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
|
||||
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.rd, ibuffer_if.wb,
|
||||
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3));
|
||||
end else if (ibuffer_if.valid && ibuffer_if.ready) begin
|
||||
deadlock_ctr <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -1,71 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_scoreboard #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_ibuffer_if ibuffer_if,
|
||||
VX_writeback_if writeback_if,
|
||||
output wire delay
|
||||
);
|
||||
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs, inuse_regs_n;
|
||||
|
||||
reg [`NUM_REGS-1:0] deq_inuse_regs;
|
||||
|
||||
assign delay = |(deq_inuse_regs & ibuffer_if.used_regs);
|
||||
|
||||
wire reserve_reg = ibuffer_if.valid && ibuffer_if.ready && ibuffer_if.wb;
|
||||
|
||||
wire release_reg = writeback_if.valid && writeback_if.ready && writeback_if.eop;
|
||||
|
||||
always @(*) begin
|
||||
inuse_regs_n = inuse_regs;
|
||||
if (reserve_reg) begin
|
||||
inuse_regs_n[ibuffer_if.wid][ibuffer_if.rd] = 1;
|
||||
end
|
||||
if (release_reg) begin
|
||||
inuse_regs_n[writeback_if.wid][writeback_if.rd] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
inuse_regs <= '0;
|
||||
end else begin
|
||||
inuse_regs <= inuse_regs_n;
|
||||
end
|
||||
deq_inuse_regs <= inuse_regs_n[ibuffer_if.wid_n];
|
||||
end
|
||||
|
||||
reg [31:0] deadlock_ctr;
|
||||
wire [31:0] deadlock_timeout = 10000 * (1 ** (`L2_ENABLE + `L3_ENABLE));
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
deadlock_ctr <= 0;
|
||||
end else begin
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
|
||||
$display("%t: *** core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
|
||||
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.rd, ibuffer_if.wb,
|
||||
deq_inuse_regs[ibuffer_if.rd], deq_inuse_regs[ibuffer_if.rs1], deq_inuse_regs[ibuffer_if.rs2], deq_inuse_regs[ibuffer_if.rs3]);
|
||||
end
|
||||
`endif
|
||||
if (release_reg) begin
|
||||
assert(inuse_regs[writeback_if.wid][writeback_if.rd] != 0)
|
||||
else $error("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
|
||||
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd);
|
||||
end
|
||||
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
|
||||
deadlock_ctr <= deadlock_ctr + 1;
|
||||
assert(deadlock_ctr < deadlock_timeout) else $error("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
|
||||
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.rd, ibuffer_if.wb,
|
||||
deq_inuse_regs[ibuffer_if.rd], deq_inuse_regs[ibuffer_if.rs1], deq_inuse_regs[ibuffer_if.rs2], deq_inuse_regs[ibuffer_if.rs3]);
|
||||
end else if (ibuffer_if.valid && ibuffer_if.ready) begin
|
||||
deadlock_ctr <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -8,12 +8,12 @@ module VX_smem_arb #(
|
|||
parameter TAG_SEL_IDX = 0,
|
||||
parameter BUFFERED_REQ = 0,
|
||||
parameter BUFFERED_RSP = 0,
|
||||
parameter TYPE = "R",
|
||||
parameter TYPE = "P",
|
||||
|
||||
localparam ADDR_WIDTH = (32-`CLOG2(DATA_SIZE)),
|
||||
localparam DATA_WIDTH = (8 * DATA_SIZE),
|
||||
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS),
|
||||
localparam TAG_OUT_WIDTH = TAG_IN_WIDTH - LOG_NUM_REQS
|
||||
parameter ADDR_WIDTH = (32-`CLOG2(DATA_SIZE)),
|
||||
parameter DATA_WIDTH = (8 * DATA_SIZE),
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
|
||||
parameter TAG_OUT_WIDTH = TAG_IN_WIDTH - LOG_NUM_REQS
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
|
@ -1,59 +0,0 @@
|
|||
`ifndef VX_TYPES
|
||||
`define VX_TYPES
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
typedef struct packed {
|
||||
logic is_normal;
|
||||
logic is_zero;
|
||||
logic is_subnormal;
|
||||
logic is_inf;
|
||||
logic is_nan;
|
||||
logic is_quiet;
|
||||
logic is_signaling;
|
||||
} fp_type_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic NV; // 4-Invalid
|
||||
logic DZ; // 3-Divide by zero
|
||||
logic OF; // 2-Overflow
|
||||
logic UF; // 1-Underflow
|
||||
logic NX; // 0-Inexact
|
||||
} fflags_t;
|
||||
|
||||
`define FFG_BITS $bits(fflags_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
} gpu_tmc_t;
|
||||
|
||||
`define GPU_TMC_BITS (1+`NUM_THREADS)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NUM_WARPS-1:0] wmask;
|
||||
logic [31:0] pc;
|
||||
} gpu_wspawn_t;
|
||||
|
||||
`define GPU_WSPAWN_BITS (1+`NUM_WARPS+32)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic diverged;
|
||||
logic [`NUM_THREADS-1:0] then_mask;
|
||||
logic [`NUM_THREADS-1:0] else_mask;
|
||||
logic [31:0] pc;
|
||||
} gpu_split_t;
|
||||
|
||||
`define GPU_SPLIT_BITS (1+1+`NUM_THREADS+`NUM_THREADS+32)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NB_BITS-1:0] id;
|
||||
logic [`NW_BITS-1:0] size_m1;
|
||||
} gpu_barrier_t;
|
||||
|
||||
`define GPU_BARRIER_BITS (1+`NB_BITS+`NW_BITS)
|
||||
|
||||
`endif
|
246
hw/rtl/VX_warp_sched.sv
Normal file
246
hw/rtl/VX_warp_sched.sv
Normal file
|
@ -0,0 +1,246 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_warp_sched #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_warp_sched
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_warp_ctl_if.slave warp_ctl_if,
|
||||
VX_wstall_if.slave wstall_if,
|
||||
VX_join_if.slave join_if,
|
||||
VX_branch_ctl_if.slave branch_ctl_if,
|
||||
|
||||
VX_ifetch_req_if.master ifetch_req_if,
|
||||
|
||||
VX_fetch_to_csr_if.master fetch_to_csr_if,
|
||||
|
||||
output wire busy
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
wire join_else;
|
||||
wire [31:0] join_pc;
|
||||
wire [`NUM_THREADS-1:0] join_tmask;
|
||||
|
||||
reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // real active warps (updated when a warp is activated or disabled)
|
||||
reg [`NUM_WARPS-1:0] stalled_warps; // asserted when a branch/gpgpu instructions are issued
|
||||
|
||||
reg [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks;
|
||||
reg [`NUM_WARPS-1:0][31:0] warp_pcs;
|
||||
|
||||
// barriers
|
||||
reg [`NUM_BARRIERS-1:0][`NUM_WARPS-1:0] barrier_masks; // warps waiting on barrier
|
||||
wire reached_barrier_limit; // the expected number of warps reached the barrier
|
||||
|
||||
// wspawn
|
||||
reg [31:0] wspawn_pc;
|
||||
reg [`NUM_WARPS-1:0] use_wspawn;
|
||||
|
||||
wire [`NW_BITS-1:0] schedule_wid;
|
||||
wire [`NUM_THREADS-1:0] schedule_tmask;
|
||||
wire [31:0] schedule_pc;
|
||||
wire schedule_valid;
|
||||
wire warp_scheduled;
|
||||
|
||||
wire ifetch_req_fire = ifetch_req_if.valid && ifetch_req_if.ready;
|
||||
|
||||
wire tmc_active = (warp_ctl_if.tmc.tmask != 0);
|
||||
|
||||
always @(*) begin
|
||||
active_warps_n = active_warps;
|
||||
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
|
||||
active_warps_n = warp_ctl_if.wspawn.wmask;
|
||||
end
|
||||
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
active_warps_n[warp_ctl_if.wid] = tmc_active;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
barrier_masks <= 0;
|
||||
use_wspawn <= 0;
|
||||
stalled_warps <= 0;
|
||||
warp_pcs <= '0;
|
||||
active_warps <= '0;
|
||||
thread_masks <= '0;
|
||||
|
||||
// activate first warp
|
||||
warp_pcs[0] <= `STARTUP_ADDR;
|
||||
active_warps[0] <= '1;
|
||||
thread_masks[0] <= '1;
|
||||
end else begin
|
||||
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
|
||||
use_wspawn <= warp_ctl_if.wspawn.wmask & (~`NUM_WARPS'(1));
|
||||
wspawn_pc <= warp_ctl_if.wspawn.pc;
|
||||
end
|
||||
|
||||
if (warp_ctl_if.valid && warp_ctl_if.barrier.valid) begin
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
if (reached_barrier_limit) begin
|
||||
barrier_masks[warp_ctl_if.barrier.id] <= 0;
|
||||
end else begin
|
||||
barrier_masks[warp_ctl_if.barrier.id][warp_ctl_if.wid] <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.tmask;
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
end
|
||||
|
||||
if (warp_ctl_if.valid && warp_ctl_if.split.valid) begin
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
if (warp_ctl_if.split.diverged) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_tmask;
|
||||
end
|
||||
end
|
||||
|
||||
// Branch
|
||||
if (branch_ctl_if.valid) begin
|
||||
if (branch_ctl_if.taken) begin
|
||||
warp_pcs[branch_ctl_if.wid] <= branch_ctl_if.dest;
|
||||
end
|
||||
stalled_warps[branch_ctl_if.wid] <= 0;
|
||||
end
|
||||
|
||||
if (warp_scheduled) begin
|
||||
// stall the warp until decode stage
|
||||
stalled_warps[schedule_wid] <= 1;
|
||||
|
||||
// release wspawn
|
||||
use_wspawn[schedule_wid] <= 0;
|
||||
if (use_wspawn[schedule_wid]) begin
|
||||
thread_masks[schedule_wid] <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
if (ifetch_req_fire) begin
|
||||
warp_pcs[ifetch_req_if.wid] <= ifetch_req_if.PC + 4;
|
||||
end
|
||||
|
||||
if (wstall_if.valid) begin
|
||||
stalled_warps[wstall_if.wid] <= wstall_if.stalled;
|
||||
end
|
||||
|
||||
// join handling
|
||||
if (join_if.valid) begin
|
||||
if (join_else) begin
|
||||
warp_pcs[join_if.wid] <= join_pc;
|
||||
end
|
||||
thread_masks[join_if.wid] <= join_tmask;
|
||||
end
|
||||
|
||||
active_warps <= active_warps_n;
|
||||
end
|
||||
end
|
||||
|
||||
// export thread mask register
|
||||
assign fetch_to_csr_if.thread_masks = thread_masks;
|
||||
|
||||
// calculate active barrier status
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
wire [`NW_BITS:0] active_barrier_count;
|
||||
`IGNORE_UNUSED_END
|
||||
wire [`NUM_WARPS-1:0] barrier_mask = barrier_masks[warp_ctl_if.barrier.id];
|
||||
`POP_COUNT(active_barrier_count, barrier_mask);
|
||||
|
||||
assign reached_barrier_limit = (active_barrier_count[`NW_BITS-1:0] == warp_ctl_if.barrier.size_m1);
|
||||
|
||||
reg [`NUM_WARPS-1:0] barrier_stalls;
|
||||
always @(*) begin
|
||||
barrier_stalls = barrier_masks[0];
|
||||
for (integer i = 1; i < `NUM_BARRIERS; ++i) begin
|
||||
barrier_stalls |= barrier_masks[i];
|
||||
end
|
||||
end
|
||||
|
||||
// split/join stack management
|
||||
|
||||
wire [(32+`NUM_THREADS)-1:0] ipdom_data [`NUM_WARPS-1:0];
|
||||
wire ipdom_index [`NUM_WARPS-1:0];
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
wire push = warp_ctl_if.valid
|
||||
&& warp_ctl_if.split.valid
|
||||
&& (i == warp_ctl_if.wid);
|
||||
|
||||
wire pop = join_if.valid && (i == join_if.wid);
|
||||
|
||||
wire [`NUM_THREADS-1:0] else_tmask = warp_ctl_if.split.else_tmask;
|
||||
wire [`NUM_THREADS-1:0] orig_tmask = thread_masks[warp_ctl_if.wid];
|
||||
|
||||
wire [(32+`NUM_THREADS)-1:0] q_else = {warp_ctl_if.split.pc, else_tmask};
|
||||
wire [(32+`NUM_THREADS)-1:0] q_end = {32'b0, orig_tmask};
|
||||
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH (32+`NUM_THREADS),
|
||||
.DEPTH (2 ** (`NT_BITS+1))
|
||||
) ipdom_stack (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (push),
|
||||
.pop (pop),
|
||||
.pair (warp_ctl_if.split.diverged),
|
||||
.q1 (q_end),
|
||||
.q2 (q_else),
|
||||
.d (ipdom_data[i]),
|
||||
.index (ipdom_index[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (full)
|
||||
);
|
||||
end
|
||||
|
||||
assign {join_pc, join_tmask} = ipdom_data[join_if.wid];
|
||||
assign join_else = ~ipdom_index[join_if.wid];
|
||||
|
||||
// schedule the next ready warp
|
||||
|
||||
wire [`NUM_WARPS-1:0] ready_warps = active_warps & ~(stalled_warps | barrier_stalls);
|
||||
|
||||
VX_lzc #(
|
||||
.N (`NUM_WARPS)
|
||||
) wid_select (
|
||||
.in_i (ready_warps),
|
||||
.cnt_o (schedule_wid),
|
||||
.valid_o (schedule_valid)
|
||||
);
|
||||
|
||||
wire [`NUM_WARPS-1:0][(`NUM_THREADS + 32)-1:0] schedule_data;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
assign schedule_data[i] = {(use_wspawn[i] ? `NUM_THREADS'(1) : thread_masks[i]),
|
||||
(use_wspawn[i] ? wspawn_pc : warp_pcs[i])};
|
||||
end
|
||||
|
||||
assign {schedule_tmask, schedule_pc} = schedule_data[schedule_wid];
|
||||
|
||||
wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||
|
||||
assign warp_scheduled = schedule_valid && ~stall_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NUM_THREADS + 32 + `NW_BITS),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({schedule_valid, schedule_tmask, schedule_pc, schedule_wid}),
|
||||
.data_out ({ifetch_req_if.valid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
|
||||
);
|
||||
|
||||
assign busy = (active_warps != 0);
|
||||
|
||||
`SCOPE_ASSIGN (wsched_scheduled, warp_scheduled);
|
||||
`SCOPE_ASSIGN (wsched_active_warps, active_warps);
|
||||
`SCOPE_ASSIGN (wsched_stalled_warps, stalled_warps);
|
||||
`SCOPE_ASSIGN (wsched_schedule_wid, schedule_wid);
|
||||
`SCOPE_ASSIGN (wsched_schedule_tmask, schedule_tmask);
|
||||
`SCOPE_ASSIGN (wsched_schedule_pc, schedule_pc);
|
||||
|
||||
endmodule
|
|
@ -1,254 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_warp_sched #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_warp_sched
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if,
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
|
||||
VX_ifetch_rsp_if ifetch_rsp_if,
|
||||
VX_ifetch_req_if ifetch_req_if,
|
||||
|
||||
output wire busy
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
wire join_else;
|
||||
wire [31:0] join_pc;
|
||||
wire [`NUM_THREADS-1:0] join_tm;
|
||||
|
||||
reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // real active warps (updated when a warp is activated or disabled)
|
||||
reg [`NUM_WARPS-1:0] schedule_table, schedule_table_n; // enforces round-robin, barrier, and non-speculating branches
|
||||
reg [`NUM_WARPS-1:0] stalled_warps; // asserted when a branch/gpgpu instructions are issued
|
||||
|
||||
// Lock warp until instruction decode to resolve branches
|
||||
reg [`NUM_WARPS-1:0] fetch_lock;
|
||||
|
||||
reg [`NUM_THREADS-1:0] thread_masks [`NUM_WARPS-1:0];
|
||||
reg [31:0] warp_pcs [`NUM_WARPS-1:0];
|
||||
|
||||
// barriers
|
||||
reg [`NUM_WARPS-1:0] barrier_stall_mask [`NUM_BARRIERS-1:0]; // warps waiting on barrier
|
||||
wire reached_barrier_limit; // the expected number of warps reached the barrier
|
||||
|
||||
// wspawn
|
||||
reg [31:0] use_wspawn_pc;
|
||||
reg [`NUM_WARPS-1:0] use_wspawn;
|
||||
reg [`NW_BITS-1:0] scheduled_warp;
|
||||
wire warp_scheduled;
|
||||
|
||||
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
|
||||
|
||||
always @(*) begin
|
||||
active_warps_n = active_warps;
|
||||
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
|
||||
active_warps_n = warp_ctl_if.wspawn.wmask;
|
||||
end
|
||||
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
active_warps_n[warp_ctl_if.wid] = (warp_ctl_if.tmc.tmask != 0);
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
schedule_table_n = schedule_table;
|
||||
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
schedule_table_n[warp_ctl_if.wid] = (warp_ctl_if.tmc.tmask != 0);
|
||||
end
|
||||
if (warp_scheduled) begin // remove scheduled warp (round-robin)
|
||||
schedule_table_n[scheduled_warp] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer i = 0; i < `NUM_BARRIERS; i++) begin
|
||||
barrier_stall_mask[i] <= 0;
|
||||
end
|
||||
|
||||
use_wspawn_pc <= 0;
|
||||
use_wspawn <= 0;
|
||||
warp_pcs[0] <= `STARTUP_ADDR;
|
||||
active_warps[0] <= 1; // Activating first warp
|
||||
schedule_table[0] <= 1; // set first warp as ready
|
||||
thread_masks[0] <= 1; // Activating first thread in first warp
|
||||
stalled_warps <= 0;
|
||||
fetch_lock <= 0;
|
||||
|
||||
for (integer i = 1; i < `NUM_WARPS; i++) begin
|
||||
warp_pcs[i] <= 0;
|
||||
active_warps[i] <= 0;
|
||||
schedule_table[i] <= 0;
|
||||
thread_masks[i] <= 0;
|
||||
end
|
||||
end else begin
|
||||
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
|
||||
use_wspawn <= warp_ctl_if.wspawn.wmask & (~`NUM_WARPS'(1));
|
||||
use_wspawn_pc <= warp_ctl_if.wspawn.pc;
|
||||
end
|
||||
|
||||
if (warp_ctl_if.valid && warp_ctl_if.barrier.valid) begin
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
if (reached_barrier_limit) begin
|
||||
barrier_stall_mask[warp_ctl_if.barrier.id] <= 0;
|
||||
end else begin
|
||||
barrier_stall_mask[warp_ctl_if.barrier.id][warp_ctl_if.wid] <= 1;
|
||||
end
|
||||
end else if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.tmask;
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
end else if (warp_ctl_if.valid && warp_ctl_if.split.valid) begin
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
if (warp_ctl_if.split.diverged) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_mask;
|
||||
end
|
||||
end
|
||||
|
||||
if (use_wspawn[scheduled_warp] && warp_scheduled) begin
|
||||
use_wspawn[scheduled_warp] <= 0;
|
||||
thread_masks[scheduled_warp] <= 1;
|
||||
end
|
||||
|
||||
// Stalling the scheduling of warps
|
||||
if (wstall_if.valid) begin
|
||||
stalled_warps[wstall_if.wid] <= 1;
|
||||
end
|
||||
|
||||
// Branch
|
||||
if (branch_ctl_if.valid) begin
|
||||
if (branch_ctl_if.taken) begin
|
||||
warp_pcs[branch_ctl_if.wid] <= branch_ctl_if.dest;
|
||||
end
|
||||
stalled_warps[branch_ctl_if.wid] <= 0;
|
||||
end
|
||||
|
||||
// Lock warp until instruction decode to resolve branches
|
||||
if (warp_scheduled) begin
|
||||
fetch_lock[scheduled_warp] <= 1;
|
||||
end
|
||||
if (ifetch_rsp_fire) begin
|
||||
fetch_lock[ifetch_rsp_if.wid] <= 0;
|
||||
warp_pcs[ifetch_rsp_if.wid] <= ifetch_rsp_if.PC + 4;
|
||||
end
|
||||
|
||||
// join handling
|
||||
if (join_if.valid) begin
|
||||
if (join_else) begin
|
||||
warp_pcs[join_if.wid] <= join_pc;
|
||||
end
|
||||
thread_masks[join_if.wid] <= join_tm;
|
||||
end
|
||||
|
||||
active_warps <= active_warps_n;
|
||||
|
||||
// reset 'schedule_table' when it goes to zero
|
||||
schedule_table <= (| schedule_table_n) ? schedule_table_n : active_warps_n;
|
||||
end
|
||||
end
|
||||
|
||||
// calculate active barrier status
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [`NW_BITS:0] active_barrier_count;
|
||||
`IGNORE_WARNINGS_END
|
||||
assign active_barrier_count = $countones(barrier_stall_mask[warp_ctl_if.barrier.id]);
|
||||
|
||||
assign reached_barrier_limit = (active_barrier_count[`NW_BITS-1:0] == warp_ctl_if.barrier.size_m1);
|
||||
|
||||
reg [`NUM_WARPS-1:0] total_barrier_stall;
|
||||
always @(*) begin
|
||||
total_barrier_stall = barrier_stall_mask[0];
|
||||
for (integer i = 1; i < `NUM_BARRIERS; ++i) begin
|
||||
total_barrier_stall |= barrier_stall_mask[i];
|
||||
end
|
||||
end
|
||||
|
||||
// split/join stack management
|
||||
|
||||
wire [(1+32+`NUM_THREADS-1):0] ipdom [`NUM_WARPS-1:0];
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
wire push = warp_ctl_if.valid
|
||||
&& warp_ctl_if.split.valid
|
||||
&& (i == warp_ctl_if.wid);
|
||||
|
||||
wire pop = join_if.valid && (i == join_if.wid);
|
||||
|
||||
wire [`NUM_THREADS-1:0] else_mask = warp_ctl_if.split.diverged ? warp_ctl_if.split.else_mask : thread_masks[warp_ctl_if.wid];
|
||||
wire [(1+32+`NUM_THREADS-1):0] q_end = {1'b0, 32'b0, thread_masks[warp_ctl_if.wid]};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q_else = {1'b1, warp_ctl_if.split.pc, else_mask};
|
||||
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH (1+32+`NUM_THREADS),
|
||||
.DEPTH (2 ** (`NT_BITS+1))
|
||||
) ipdom_stack (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (push),
|
||||
.pop (pop),
|
||||
.q1 (q_end),
|
||||
.q2 (q_else),
|
||||
.d (ipdom[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (full)
|
||||
);
|
||||
end
|
||||
|
||||
assign {join_else, join_pc, join_tm} = ipdom [join_if.wid];
|
||||
|
||||
// calculate next warp schedule
|
||||
|
||||
reg [`NUM_THREADS-1:0] thread_mask;
|
||||
reg schedule_valid;
|
||||
reg [31:0] warp_pc;
|
||||
|
||||
wire [`NUM_WARPS-1:0] schedule_ready = schedule_table & ~(stalled_warps | total_barrier_stall | fetch_lock);
|
||||
|
||||
always @(*) begin
|
||||
schedule_valid = 0;
|
||||
thread_mask = 'x;
|
||||
warp_pc = 'x;
|
||||
scheduled_warp = 'x;
|
||||
for (integer i = 0; i < `NUM_WARPS; ++i) begin
|
||||
if (schedule_ready[i]) begin
|
||||
schedule_valid = 1;
|
||||
thread_mask = use_wspawn[i] ? `NUM_THREADS'(1) : thread_masks[i];
|
||||
warp_pc = use_wspawn[i] ? use_wspawn_pc : warp_pcs[i];
|
||||
scheduled_warp = `NW_BITS'(i);
|
||||
break;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||
|
||||
assign warp_scheduled = schedule_valid && ~stall_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NUM_THREADS + 32 + `NW_BITS),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({warp_scheduled, thread_mask, warp_pc, scheduled_warp}),
|
||||
.data_out ({ifetch_req_if.valid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
|
||||
);
|
||||
|
||||
assign busy = (active_warps != 0);
|
||||
|
||||
`SCOPE_ASSIGN (wsched_scheduled_warp, warp_scheduled);
|
||||
`SCOPE_ASSIGN (wsched_active_warps, active_warps);
|
||||
`SCOPE_ASSIGN (wsched_schedule_table, schedule_table);
|
||||
`SCOPE_ASSIGN (wsched_schedule_ready, schedule_ready);
|
||||
`SCOPE_ASSIGN (wsched_warp_to_schedule, scheduled_warp);
|
||||
`SCOPE_ASSIGN (wsched_warp_pc, warp_pc);
|
||||
|
||||
endmodule
|
130
hw/rtl/VX_writeback.sv
Normal file
130
hw/rtl/VX_writeback.sv
Normal file
|
@ -0,0 +1,130 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_writeback #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if.slave alu_commit_if,
|
||||
VX_commit_if.slave ld_commit_if,
|
||||
VX_commit_if.slave csr_commit_if,
|
||||
VX_commit_if.slave csr_commit_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if.slave fpu_commit_if,
|
||||
`endif
|
||||
VX_commit_if.slave gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_writeback_if.master writeback_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam DATAW = `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
localparam NUM_RSPS = 5;
|
||||
`else
|
||||
localparam NUM_RSPS = 4;
|
||||
`endif
|
||||
`else
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
localparam NUM_RSPS = 4;
|
||||
`else
|
||||
localparam NUM_RSPS = 3;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
wire wb_valid;
|
||||
wire [`NW_BITS-1:0] wb_wid;
|
||||
wire [31:0] wb_PC;
|
||||
wire [`NUM_THREADS-1:0] wb_tmask;
|
||||
wire [`NR_BITS-1:0] wb_rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] wb_data;
|
||||
wire wb_eop;
|
||||
|
||||
wire [NUM_RSPS-1:0] rsp_valid;
|
||||
wire [NUM_RSPS-1:0][DATAW-1:0] rsp_data;
|
||||
wire [NUM_RSPS-1:0] rsp_ready;
|
||||
wire stall;
|
||||
|
||||
assign rsp_valid = {
|
||||
csr_commit_if.valid && csr_commit_if.wb,
|
||||
alu_commit_if.valid && alu_commit_if.wb,
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_if.valid && fpu_commit_if.wb,
|
||||
`endif
|
||||
ld_commit_if.valid && ld_commit_if.wb,
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
gpu_commit_if.valid && gpu_commit_if.wb,
|
||||
`ifend
|
||||
};
|
||||
|
||||
assign rsp_data = {
|
||||
{csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.rd, csr_commit_if.data, csr_commit_if.eop},
|
||||
{alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.rd, alu_commit_if.data, alu_commit_if.eop},
|
||||
`ifdef EXT_F_ENABLE
|
||||
{fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.rd, fpu_commit_if.data, fpu_commit_if.eop},
|
||||
`endif
|
||||
{ ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.rd, ld_commit_if.data, ld_commit_if.eop},
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
{gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.rd, gpu_commit_if.data, gpu_commit_if.eop},
|
||||
`endif
|
||||
};
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_RSPS),
|
||||
.DATAW (DATAW),
|
||||
.TYPE ("P")
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_valid),
|
||||
.data_in (rsp_data),
|
||||
.ready_in (rsp_ready),
|
||||
.valid_out (wb_valid),
|
||||
.data_out ({wb_wid, wb_PC, wb_tmask, wb_rd, wb_data, wb_eop}),
|
||||
.ready_out (~stall)
|
||||
);
|
||||
|
||||
assign ld_commit_if.ready = rsp_ready[0] || ~ld_commit_if.wb;
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign fpu_commit_if.ready = rsp_ready[1] || ~fpu_commit_if.wb;
|
||||
assign alu_commit_if.ready = rsp_ready[2] || ~alu_commit_if.wb;
|
||||
assign csr_commit_if.ready = rsp_ready[3] || ~csr_commit_if.wb;
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
assign gpu_commit_if.ready = rsp_ready[4] || ~gpu_commit_if.wb;
|
||||
`endif
|
||||
`else
|
||||
assign alu_commit_if.ready = rsp_ready[1] || ~alu_commit_if.wb;
|
||||
assign csr_commit_if.ready = rsp_ready[2] || ~csr_commit_if.wb;
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
assign gpu_commit_if.ready = rsp_ready[3] || ~gpu_commit_if.wb;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
|
||||
assign stall = ~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + DATAW),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data, wb_eop}),
|
||||
.data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data, writeback_if.eop})
|
||||
);
|
||||
|
||||
// special workaround to get RISC-V tests Pass/Fail status
|
||||
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
|
||||
always @(posedge clk) begin
|
||||
if (writeback_if.valid && writeback_if.ready) begin
|
||||
last_wb_value[writeback_if.rd] <= writeback_if.data[0];
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -1,90 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_writeback #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_writeback_if writeback_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam DATAW = `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1;
|
||||
|
||||
wire wb_valid;
|
||||
wire [`NW_BITS-1:0] wb_wid;
|
||||
wire [31:0] wb_PC;
|
||||
wire [`NUM_THREADS-1:0] wb_tmask;
|
||||
wire [`NR_BITS-1:0] wb_rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] wb_data;
|
||||
wire wb_eop;
|
||||
|
||||
wire [4:0][DATAW-1:0] rsp_data;
|
||||
wire [4:0] rsp_ready;
|
||||
wire stall;
|
||||
|
||||
wire ld_valid = ld_commit_if.valid && ld_commit_if.wb;
|
||||
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
|
||||
wire csr_valid = csr_commit_if.valid && csr_commit_if.wb;
|
||||
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
|
||||
wire gpu_valid = gpu_commit_if.valid && gpu_commit_if.wb;
|
||||
|
||||
assign rsp_data[0] = { ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.rd, ld_commit_if.data, ld_commit_if.eop};
|
||||
assign rsp_data[1] = {fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.rd, fpu_commit_if.data, fpu_commit_if.eop};
|
||||
assign rsp_data[2] = {csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.rd, csr_commit_if.data, csr_commit_if.eop};
|
||||
assign rsp_data[3] = {alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.rd, alu_commit_if.data, alu_commit_if.eop};
|
||||
assign rsp_data[4] = {gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.rd, gpu_commit_if.data, gpu_commit_if.eop};
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (5),
|
||||
.DATAW (DATAW),
|
||||
.TYPE ("X")
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in ({gpu_valid, alu_valid, csr_valid, fpu_valid, ld_valid}),
|
||||
.data_in (rsp_data),
|
||||
.ready_in (rsp_ready),
|
||||
.valid_out (wb_valid),
|
||||
.data_out ({wb_wid, wb_PC, wb_tmask, wb_rd, wb_data, wb_eop}),
|
||||
.ready_out (~stall)
|
||||
);
|
||||
|
||||
assign ld_commit_if.ready = rsp_ready[0] || ~ld_commit_if.wb;
|
||||
assign fpu_commit_if.ready = rsp_ready[1] || ~fpu_commit_if.wb;
|
||||
assign csr_commit_if.ready = rsp_ready[2] || ~csr_commit_if.wb;
|
||||
assign alu_commit_if.ready = rsp_ready[3] || ~alu_commit_if.wb;
|
||||
assign gpu_commit_if.ready = rsp_ready[4] || ~gpu_commit_if.wb;
|
||||
|
||||
assign stall = ~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + DATAW),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data, wb_eop}),
|
||||
.data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data, writeback_if.eop})
|
||||
);
|
||||
|
||||
// special workaround to get RISC-V tests Pass/Fail status
|
||||
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
|
||||
always @(posedge clk) begin
|
||||
if (writeback_if.valid && writeback_if.ready) begin
|
||||
last_wb_value[writeback_if.rd] <= writeback_if.data[0];
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -29,15 +29,15 @@ module Vortex (
|
|||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_valid;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_rw;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2MEM_BYTEEN_WIDTH-1:0] per_cluster_mem_req_byteen;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2MEM_ADDR_WIDTH-1:0] per_cluster_mem_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2MEM_DATA_WIDTH-1:0] per_cluster_mem_req_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2MEM_TAG_WIDTH-1:0] per_cluster_mem_req_tag;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_BYTEEN_WIDTH-1:0] per_cluster_mem_req_byteen;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_ADDR_WIDTH-1:0] per_cluster_mem_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_req_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_req_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2MEM_DATA_WIDTH-1:0] per_cluster_mem_rsp_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_rsp_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
@ -81,21 +81,22 @@ module Vortex (
|
|||
`RESET_RELAY (l3_reset);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L3CACHE_ID),
|
||||
.CACHE_SIZE (`L3CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L3CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L3NUM_BANKS),
|
||||
.WORD_SIZE (`L3WORD_SIZE),
|
||||
.NUM_REQS (`L3NUM_REQS),
|
||||
.CREQ_SIZE (`L3CREQ_SIZE),
|
||||
.CRSQ_SIZE (`L3CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L3MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L3MREQ_SIZE),
|
||||
.CACHE_ID (`L3_CACHE_ID),
|
||||
.CACHE_SIZE (`L3_CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L3_CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L3_NUM_BANKS),
|
||||
.NUM_PORTS (`L3_NUM_PORTS),
|
||||
.WORD_SIZE (`L3_WORD_SIZE),
|
||||
.NUM_REQS (`L3_NUM_REQS),
|
||||
.CREQ_SIZE (`L3_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`L3_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L3_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L3_MREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`L2MEM_TAG_WIDTH),
|
||||
.CORE_TAG_WIDTH (`L2_MEM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.MEM_TAG_WIDTH (`L3MEM_TAG_WIDTH),
|
||||
.MEM_TAG_WIDTH (`L3_MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) l3cache (
|
||||
`SCOPE_BIND_Vortex_l3cache
|
||||
|
@ -141,16 +142,19 @@ module Vortex (
|
|||
|
||||
end else begin
|
||||
|
||||
`RESET_RELAY (mem_arb_reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.DATA_WIDTH (`L3MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`L3MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`L2MEM_TAG_WIDTH),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.DATA_WIDTH (`L3_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`L3_MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`L2_MEM_TAG_WIDTH),
|
||||
.TYPE ("R"),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mem_arb_reset),
|
||||
|
||||
// Core request
|
||||
.req_valid_in (per_cluster_mem_req_valid),
|
||||
|
@ -201,12 +205,12 @@ module Vortex (
|
|||
always @(posedge clk) begin
|
||||
if (mem_req_valid && mem_req_ready) begin
|
||||
if (mem_req_rw)
|
||||
$display("%t: MEM Wr Req: addr=%0h, tag=%0h, byteen=%0h data=%0h", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data);
|
||||
dpi_trace("%d: MEM Wr Req: addr=%0h, tag=%0h, byteen=%0h data=%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data);
|
||||
else
|
||||
$display("%t: MEM Rd Req: addr=%0h, tag=%0h, byteen=%0h", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen);
|
||||
dpi_trace("%d: MEM Rd Req: addr=%0h, tag=%0h, byteen=%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen);
|
||||
end
|
||||
if (mem_rsp_valid && mem_rsp_ready) begin
|
||||
$display("%t: MEM Rsp: tag=%0h, data=%0h", $time, mem_rsp_tag, mem_rsp_data);
|
||||
dpi_trace("%d: MEM Rsp: tag=%0h, data=%0h\n", $time, mem_rsp_tag, mem_rsp_data);
|
||||
end
|
||||
end
|
||||
`endif
|
165
hw/rtl/Vortex_axi.sv
Normal file
165
hw/rtl/Vortex_axi.sv
Normal file
|
@ -0,0 +1,165 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module Vortex_axi #(
|
||||
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = 32,
|
||||
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
|
||||
parameter AXI_STROBE_WIDTH = (`VX_MEM_DATA_WIDTH / 8)
|
||||
)(
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// AXI write request address channel
|
||||
output wire [AXI_TID_WIDTH-1:0] m_axi_awid,
|
||||
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr,
|
||||
output wire [7:0] m_axi_awlen,
|
||||
output wire [2:0] m_axi_awsize,
|
||||
output wire [1:0] m_axi_awburst,
|
||||
output wire m_axi_awlock,
|
||||
output wire [3:0] m_axi_awcache,
|
||||
output wire [2:0] m_axi_awprot,
|
||||
output wire [3:0] m_axi_awqos,
|
||||
output wire m_axi_awvalid,
|
||||
input wire m_axi_awready,
|
||||
|
||||
// AXI write request data channel
|
||||
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata,
|
||||
output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb,
|
||||
output wire m_axi_wlast,
|
||||
output wire m_axi_wvalid,
|
||||
input wire m_axi_wready,
|
||||
|
||||
// AXI write response channel
|
||||
input wire [AXI_TID_WIDTH-1:0] m_axi_bid,
|
||||
input wire [1:0] m_axi_bresp,
|
||||
input wire m_axi_bvalid,
|
||||
output wire m_axi_bready,
|
||||
|
||||
// AXI read request channel
|
||||
output wire [AXI_TID_WIDTH-1:0] m_axi_arid,
|
||||
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr,
|
||||
output wire [7:0] m_axi_arlen,
|
||||
output wire [2:0] m_axi_arsize,
|
||||
output wire [1:0] m_axi_arburst,
|
||||
output wire m_axi_arlock,
|
||||
output wire [3:0] m_axi_arcache,
|
||||
output wire [2:0] m_axi_arprot,
|
||||
output wire [3:0] m_axi_arqos,
|
||||
output wire m_axi_arvalid,
|
||||
input wire m_axi_arready,
|
||||
|
||||
// AXI read response channel
|
||||
input wire [AXI_TID_WIDTH-1:0] m_axi_rid,
|
||||
input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata,
|
||||
input wire [1:0] m_axi_rresp,
|
||||
input wire m_axi_rlast,
|
||||
input wire m_axi_rvalid,
|
||||
output wire m_axi_rready,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
wire mem_req_valid;
|
||||
wire mem_req_rw;
|
||||
wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen;
|
||||
wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr;
|
||||
wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data;
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_ready;
|
||||
|
||||
wire mem_rsp_valid;
|
||||
wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data;
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
VX_axi_adapter #(
|
||||
.VX_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.VX_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
|
||||
.VX_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
|
||||
.VX_BYTEEN_WIDTH (AXI_STROBE_WIDTH),
|
||||
.AXI_DATA_WIDTH (AXI_DATA_WIDTH),
|
||||
.AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
|
||||
.AXI_TID_WIDTH (AXI_TID_WIDTH),
|
||||
.AXI_STROBE_WIDTH (AXI_STROBE_WIDTH)
|
||||
) axi_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid (mem_req_valid),
|
||||
.mem_req_rw (mem_req_rw),
|
||||
.mem_req_byteen (mem_req_byteen),
|
||||
.mem_req_addr (mem_req_addr),
|
||||
.mem_req_data (mem_req_data),
|
||||
.mem_req_tag (mem_req_tag),
|
||||
.mem_req_ready (mem_req_ready),
|
||||
|
||||
.mem_rsp_valid (mem_rsp_valid),
|
||||
.mem_rsp_data (mem_rsp_data),
|
||||
.mem_rsp_tag (mem_rsp_tag),
|
||||
.mem_rsp_ready (mem_rsp_ready),
|
||||
|
||||
.m_axi_awid (m_axi_awid),
|
||||
.m_axi_awaddr (m_axi_awaddr),
|
||||
.m_axi_awlen (m_axi_awlen),
|
||||
.m_axi_awsize (m_axi_awsize),
|
||||
.m_axi_awburst (m_axi_awburst),
|
||||
.m_axi_awlock (m_axi_awlock),
|
||||
.m_axi_awcache (m_axi_awcache),
|
||||
.m_axi_awprot (m_axi_awprot),
|
||||
.m_axi_awqos (m_axi_awqos),
|
||||
.m_axi_awvalid (m_axi_awvalid),
|
||||
.m_axi_awready (m_axi_awready),
|
||||
|
||||
.m_axi_wdata (m_axi_wdata),
|
||||
.m_axi_wstrb (m_axi_wstrb),
|
||||
.m_axi_wlast (m_axi_wlast),
|
||||
.m_axi_wvalid (m_axi_wvalid),
|
||||
.m_axi_wready (m_axi_wready),
|
||||
|
||||
.m_axi_bid (m_axi_bid),
|
||||
.m_axi_bresp (m_axi_bresp),
|
||||
.m_axi_bvalid (m_axi_bvalid),
|
||||
.m_axi_bready (m_axi_bready),
|
||||
|
||||
.m_axi_arid (m_axi_arid),
|
||||
.m_axi_araddr (m_axi_araddr),
|
||||
.m_axi_arlen (m_axi_arlen),
|
||||
.m_axi_arsize (m_axi_arsize),
|
||||
.m_axi_arburst (m_axi_arburst),
|
||||
.m_axi_arlock (m_axi_arlock),
|
||||
.m_axi_arcache (m_axi_arcache),
|
||||
.m_axi_arprot (m_axi_arprot),
|
||||
.m_axi_arqos (m_axi_arqos),
|
||||
.m_axi_arvalid (m_axi_arvalid),
|
||||
.m_axi_arready (m_axi_arready),
|
||||
|
||||
.m_axi_rid (m_axi_rid),
|
||||
.m_axi_rdata (m_axi_rdata),
|
||||
.m_axi_rresp (m_axi_rresp),
|
||||
.m_axi_rlast (m_axi_rlast),
|
||||
.m_axi_rvalid (m_axi_rvalid),
|
||||
.m_axi_rready (m_axi_rready)
|
||||
);
|
||||
|
||||
Vortex vortex (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid (mem_req_valid),
|
||||
.mem_req_rw (mem_req_rw),
|
||||
.mem_req_byteen (mem_req_byteen),
|
||||
.mem_req_addr (mem_req_addr),
|
||||
.mem_req_data (mem_req_data),
|
||||
.mem_req_tag (mem_req_tag),
|
||||
.mem_req_ready (mem_req_ready),
|
||||
|
||||
.mem_rsp_valid (mem_rsp_valid),
|
||||
.mem_rsp_data (mem_rsp_data),
|
||||
.mem_rsp_tag (mem_rsp_tag),
|
||||
.mem_rsp_ready (mem_rsp_ready),
|
||||
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -1,17 +1,15 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_avs_wrapper #(
|
||||
parameter NUM_BANKS = 1,
|
||||
module VX_avs_wrapper #(
|
||||
parameter AVS_DATA_WIDTH = 1,
|
||||
parameter AVS_ADDR_WIDTH = 1,
|
||||
parameter AVS_BURST_WIDTH = 1,
|
||||
parameter AVS_BANKS = 1,
|
||||
parameter AVS_BANKS = 1,
|
||||
parameter REQ_TAG_WIDTH = 1,
|
||||
parameter RD_QUEUE_SIZE = 1,
|
||||
|
||||
parameter AVS_BYTEENW = (AVS_DATA_WIDTH / 8),
|
||||
parameter RD_QUEUE_ADDR_WIDTH = $clog2(RD_QUEUE_SIZE+1),
|
||||
parameter AVS_BANKS_BITS = $clog2(AVS_BANKS)
|
||||
parameter RD_QUEUE_ADDR_WIDTH = $clog2(RD_QUEUE_SIZE+1)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -32,47 +30,46 @@ module VX_avs_wrapper #(
|
|||
input wire mem_rsp_ready,
|
||||
|
||||
// AVS bus
|
||||
output wire [AVS_DATA_WIDTH-1:0] avs_writedata [NUM_BANKS],
|
||||
input wire [AVS_DATA_WIDTH-1:0] avs_readdata [NUM_BANKS],
|
||||
output wire [AVS_ADDR_WIDTH-1:0] avs_address [NUM_BANKS],
|
||||
input wire avs_waitrequest [NUM_BANKS],
|
||||
output wire avs_write [NUM_BANKS],
|
||||
output wire avs_read [NUM_BANKS],
|
||||
output wire [AVS_BYTEENW-1:0] avs_byteenable [NUM_BANKS],
|
||||
output wire [AVS_BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS],
|
||||
input avs_readdatavalid [NUM_BANKS]
|
||||
output wire [AVS_DATA_WIDTH-1:0] avs_writedata [AVS_BANKS],
|
||||
input wire [AVS_DATA_WIDTH-1:0] avs_readdata [AVS_BANKS],
|
||||
output wire [AVS_ADDR_WIDTH-1:0] avs_address [AVS_BANKS],
|
||||
input wire avs_waitrequest [AVS_BANKS],
|
||||
output wire avs_write [AVS_BANKS],
|
||||
output wire avs_read [AVS_BANKS],
|
||||
output wire [AVS_BYTEENW-1:0] avs_byteenable [AVS_BANKS],
|
||||
output wire [AVS_BURST_WIDTH-1:0] avs_burstcount [AVS_BANKS],
|
||||
input avs_readdatavalid [AVS_BANKS]
|
||||
);
|
||||
|
||||
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
|
||||
localparam OUTPUT_REG = (NUM_BANKS > 2);
|
||||
localparam BANK_ADDRW = `LOG2UP(AVS_BANKS);
|
||||
|
||||
// Requests handling
|
||||
|
||||
wire [NUM_BANKS-1:0] avs_reqq_push, avs_reqq_pop, avs_reqq_ready;
|
||||
wire [NUM_BANKS-1:0][REQ_TAG_WIDTH-1:0] avs_reqq_tag_out;
|
||||
wire [NUM_BANKS-1:0] req_queue_going_full;
|
||||
wire [NUM_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size;
|
||||
wire [AVS_BANKS-1:0] avs_reqq_push, avs_reqq_pop, avs_reqq_ready;
|
||||
wire [AVS_BANKS-1:0][REQ_TAG_WIDTH-1:0] avs_reqq_tag_out;
|
||||
wire [AVS_BANKS-1:0] req_queue_going_full;
|
||||
wire [AVS_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size;
|
||||
wire [BANK_ADDRW-1:0] req_bank_sel;
|
||||
|
||||
if (NUM_BANKS >= 2) begin
|
||||
if (AVS_BANKS >= 2) begin
|
||||
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
|
||||
end else begin
|
||||
assign req_bank_sel = 0;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
for (genvar i = 0; i < AVS_BANKS; i++) begin
|
||||
assign avs_reqq_ready[i] = !req_queue_going_full[i] && !avs_waitrequest[i];
|
||||
assign avs_reqq_push[i] = mem_req_valid && !mem_req_rw && avs_reqq_ready[i] && (req_bank_sel == i);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
for (genvar i = 0; i < AVS_BANKS; i++) begin
|
||||
VX_pending_size #(
|
||||
.SIZE (RD_QUEUE_SIZE)
|
||||
) pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (avs_reqq_push[i]),
|
||||
.pop (avs_reqq_pop[i]),
|
||||
.incr (avs_reqq_push[i]),
|
||||
.decr (avs_reqq_pop[i]),
|
||||
.full (req_queue_going_full[i]),
|
||||
.size (req_queue_size[i]),
|
||||
`UNUSED_PIN (empty)
|
||||
|
@ -80,9 +77,8 @@ module VX_avs_wrapper #(
|
|||
`UNUSED_VAR (req_queue_size)
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (REQ_TAG_WIDTH),
|
||||
.SIZE (RD_QUEUE_SIZE),
|
||||
.OUTPUT_REG (!OUTPUT_REG)
|
||||
.DATAW (REQ_TAG_WIDTH),
|
||||
.SIZE (RD_QUEUE_SIZE)
|
||||
) rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -98,7 +94,7 @@ module VX_avs_wrapper #(
|
|||
);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
for (genvar i = 0; i < AVS_BANKS; i++) begin
|
||||
assign avs_read[i] = mem_req_valid && !mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i);
|
||||
assign avs_write[i] = mem_req_valid && mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i);
|
||||
assign avs_address[i] = mem_req_addr;
|
||||
|
@ -107,7 +103,7 @@ module VX_avs_wrapper #(
|
|||
assign avs_burstcount[i] = AVS_BURST_WIDTH'(1);
|
||||
end
|
||||
|
||||
if (NUM_BANKS >= 2) begin
|
||||
if (AVS_BANKS >= 2) begin
|
||||
assign mem_req_ready = avs_reqq_ready[req_bank_sel];
|
||||
end else begin
|
||||
assign mem_req_ready = avs_reqq_ready;
|
||||
|
@ -115,18 +111,17 @@ module VX_avs_wrapper #(
|
|||
|
||||
// Responses handling
|
||||
|
||||
wire [NUM_BANKS-1:0] rsp_arb_valid_in;
|
||||
wire [NUM_BANKS-1:0][AVS_DATA_WIDTH+REQ_TAG_WIDTH-1:0] rsp_arb_data_in;
|
||||
wire [NUM_BANKS-1:0] rsp_arb_ready_in;
|
||||
wire [AVS_BANKS-1:0] rsp_arb_valid_in;
|
||||
wire [AVS_BANKS-1:0][AVS_DATA_WIDTH+REQ_TAG_WIDTH-1:0] rsp_arb_data_in;
|
||||
wire [AVS_BANKS-1:0] rsp_arb_ready_in;
|
||||
|
||||
wire [NUM_BANKS-1:0][AVS_DATA_WIDTH-1:0] avs_rspq_data_out;
|
||||
wire [NUM_BANKS-1:0] avs_rspq_empty;
|
||||
wire [AVS_BANKS-1:0][AVS_DATA_WIDTH-1:0] avs_rspq_data_out;
|
||||
wire [AVS_BANKS-1:0] avs_rspq_empty;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
for (genvar i = 0; i < AVS_BANKS; i++) begin
|
||||
VX_fifo_queue #(
|
||||
.DATAW (AVS_DATA_WIDTH),
|
||||
.SIZE (RD_QUEUE_SIZE),
|
||||
.OUTPUT_REG (!OUTPUT_REG)
|
||||
.DATAW (AVS_DATA_WIDTH),
|
||||
.SIZE (RD_QUEUE_SIZE)
|
||||
) rd_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -140,18 +135,18 @@ module VX_avs_wrapper #(
|
|||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
for (genvar i = 0; i < AVS_BANKS; i++) begin
|
||||
assign rsp_arb_valid_in[i] = !avs_rspq_empty[i];
|
||||
assign rsp_arb_data_in[i] = {avs_rspq_data_out[i], avs_reqq_tag_out[i]};
|
||||
assign avs_reqq_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i];
|
||||
end
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_BANKS),
|
||||
.NUM_REQS (AVS_BANKS),
|
||||
.DATAW (AVS_DATA_WIDTH + REQ_TAG_WIDTH),
|
||||
.BUFFERED (OUTPUT_REG ? 1 : 0)
|
||||
.TYPE ("R")
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -166,13 +161,14 @@ module VX_avs_wrapper #(
|
|||
`ifdef DBG_PRINT_AVS
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_valid && mem_req_ready) begin
|
||||
if (mem_req_rw)
|
||||
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, mem_req_data);
|
||||
else
|
||||
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, req_queue_size);
|
||||
if (mem_req_rw) begin
|
||||
dpi_trace("%d: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, mem_req_data);
|
||||
end else begin
|
||||
dpi_trace("%d: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, req_queue_size);
|
||||
end
|
||||
end
|
||||
if (mem_rsp_valid && mem_rsp_ready) begin
|
||||
$display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, mem_rsp_tag, mem_rsp_data, req_queue_size);
|
||||
dpi_trace("%d: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d\n", $time, mem_rsp_tag, mem_rsp_data, req_queue_size);
|
||||
end
|
||||
end
|
||||
`endif
|
|
@ -47,9 +47,10 @@ localparam CCI_ADDR_WIDTH = 32 - $clog2(CCI_DATA_SIZE);
|
|||
|
||||
|
||||
localparam AVS_RD_QUEUE_SIZE = 4;
|
||||
localparam _AVS_REQ_TAGW_VX = `VX_MEM_TAG_WIDTH + $clog2(LMEM_DATA_WIDTH) - $clog2(`VX_MEM_DATA_WIDTH);
|
||||
localparam AVS_REQ_TAGW_VX = `MAX(`VX_MEM_TAG_WIDTH, _AVS_REQ_TAGW_VX);
|
||||
localparam AVS_REQ_TAGW_CCI = `MAX(CCI_ADDR_WIDTH, CCI_ADDR_WIDTH + $clog2(LMEM_DATA_WIDTH) - $clog2(CCI_DATA_WIDTH));
|
||||
localparam AVS_REQ_TAGW_VX_ = `VX_MEM_TAG_WIDTH + $clog2(LMEM_DATA_WIDTH) - $clog2(`VX_MEM_DATA_WIDTH);
|
||||
localparam AVS_REQ_TAGW_VX = `MAX(`VX_MEM_TAG_WIDTH, AVS_REQ_TAGW_VX_);
|
||||
localparam AVS_REQ_TAGW_CCI_ = CCI_ADDR_WIDTH + $clog2(LMEM_DATA_WIDTH) - $clog2(CCI_DATA_WIDTH);
|
||||
localparam AVS_REQ_TAGW_CCI = `MAX(CCI_ADDR_WIDTH, AVS_REQ_TAGW_CCI_);
|
||||
localparam AVS_REQ_TAGW = `MAX(AVS_REQ_TAGW_VX, AVS_REQ_TAGW_CCI);
|
||||
|
||||
localparam CCI_RD_WINDOW_SIZE = 8;
|
||||
|
@ -94,7 +95,7 @@ localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE);
|
|||
|
||||
wire [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
wire [63:0] dev_caps = {16'(`NUM_THREADS), 16'(`NUM_WARPS), 16'(`NUM_CORES), 16'(`IMPLEMENTATION_ID)};
|
||||
wire [63:0] dev_caps = {16'(`NUM_THREADS), 16'(`NUM_WARPS), 16'(`NUM_CORES * `NUM_CLUSTERS), 16'(`IMPLEMENTATION_ID)};
|
||||
|
||||
reg [STATE_WIDTH-1:0] state;
|
||||
|
||||
|
@ -131,9 +132,9 @@ wire cmd_scope_write;
|
|||
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
t_ccip_c0_ReqMmioHdr mmio_hdr;
|
||||
`IGNORE_WARNINGS_END
|
||||
`IGNORE_UNUSED_END
|
||||
assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
|
||||
|
||||
`STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!"))
|
||||
|
@ -150,21 +151,6 @@ assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mm
|
|||
wire [COUT_QUEUE_DATAW-1:0] cout_q_dout;
|
||||
wire cout_q_full, cout_q_empty;
|
||||
|
||||
/*
|
||||
`DEBUG_BEGIN
|
||||
wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid;
|
||||
wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid;
|
||||
wire cp2af_sRxPort_c0_rspValid = cp2af_sRxPort.c0.rspValid;
|
||||
wire cp2af_sRxPort_c1_rspValid = cp2af_sRxPort.c1.rspValid;
|
||||
wire cp2af_sRxPort_c0TxAlmFull = cp2af_sRxPort.c0TxAlmFull;
|
||||
wire cp2af_sRxPort_c1TxAlmFull = cp2af_sRxPort.c1TxAlmFull;
|
||||
wire[$bits(mmio_hdr.address)-1:0] mmio_hdr_address = mmio_hdr.address;
|
||||
wire[$bits(mmio_hdr.length)-1:0] mmio_hdr_length = mmio_hdr.length;
|
||||
wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid;
|
||||
wire[$bits(cp2af_sRxPort.c0.hdr.mdata)-1:0] cp2af_sRxPort_c0_hdr_mdata = cp2af_sRxPort.c0.hdr.mdata;
|
||||
`DEBUG_END
|
||||
*/
|
||||
|
||||
wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid
|
||||
&& (MMIO_CMD_TYPE == mmio_hdr.address)) ? 3'(cp2af_sRxPort.c0.data) : 3'h0;
|
||||
|
||||
|
@ -201,36 +187,36 @@ always @(posedge clk) begin
|
|||
MMIO_IO_ADDR: begin
|
||||
cmd_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_IO_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_ccip_clAddr'(cp2af_sRxPort.c0.data));
|
||||
dpi_trace("%d: MMIO_IO_ADDR: addr=%0h, data=0x%0h\n", $time, mmio_hdr.address, t_ccip_clAddr'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
MMIO_MEM_ADDR: begin
|
||||
cmd_mem_addr <= $bits(cmd_mem_addr)'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_MEM_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, $bits(cmd_mem_addr)'(cp2af_sRxPort.c0.data));
|
||||
dpi_trace("%d: MMIO_MEM_ADDR: addr=%0h, data=0x%0h\n", $time, mmio_hdr.address, $bits(cmd_mem_addr)'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
MMIO_DATA_SIZE: begin
|
||||
cmd_data_size <= $bits(cmd_data_size)'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_DATA_SIZE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data));
|
||||
dpi_trace("%d: MMIO_DATA_SIZE: addr=%0h, data=%0d\n", $time, mmio_hdr.address, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_TYPE: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_CMD_TYPE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_type)'(cp2af_sRxPort.c0.data));
|
||||
dpi_trace("%d: MMIO_CMD_TYPE: addr=%0h, data=%0d\n", $time, mmio_hdr.address, $bits(cmd_type)'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
`ifdef SCOPE
|
||||
MMIO_SCOPE_WRITE: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_SCOPE_WRITE: addr=%0h, data=%0h", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data));
|
||||
dpi_trace("%d: MMIO_SCOPE_WRITE: addr=%0h, data=%0h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: Unknown MMIO Wr: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data));
|
||||
dpi_trace("%d: Unknown MMIO Wr: addr=%0h, data=%0h\n", $time, mmio_hdr.address, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
|
@ -258,7 +244,7 @@ always @(posedge clk) begin
|
|||
mmio_tx.data <= 64'({cout_q_dout, !cout_q_empty, 8'(state)});
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
if (state != STATE_WIDTH'(mmio_tx.data)) begin
|
||||
$display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state);
|
||||
dpi_trace("%d: MMIO_STATUS: addr=%0h, state=%0d\n", $time, mmio_hdr.address, state);
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
@ -266,20 +252,20 @@ always @(posedge clk) begin
|
|||
MMIO_SCOPE_READ: begin
|
||||
mmio_tx.data <= cmd_scope_rdata;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_SCOPE_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_scope_rdata);
|
||||
dpi_trace("%d: MMIO_SCOPE_READ: addr=%0h, data=%0h\n", $time, mmio_hdr.address, cmd_scope_rdata);
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
MMIO_DEV_CAPS: begin
|
||||
mmio_tx.data <= dev_caps;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_DEV_CAPS: addr=%0h, data=%0h", $time, mmio_hdr.address, dev_caps);
|
||||
dpi_trace("%d: MMIO_DEV_CAPS: addr=%0h, data=%0h\n", $time, mmio_hdr.address, dev_caps);
|
||||
`endif
|
||||
end
|
||||
default: begin
|
||||
mmio_tx.data <= 64'h0;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: Unknown MMIO Rd: addr=%0h", $time, mmio_hdr.address);
|
||||
dpi_trace("%d: Unknown MMIO Rd: addr=%0h\n", $time, mmio_hdr.address);
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
|
@ -313,19 +299,19 @@ always @(posedge clk) begin
|
|||
case (cmd_type)
|
||||
CMD_MEM_READ: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE READ: ia=%0h addr=%0h size=%0d", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size);
|
||||
dpi_trace("%d: STATE READ: ia=%0h addr=%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size);
|
||||
`endif
|
||||
state <= STATE_READ;
|
||||
end
|
||||
CMD_MEM_WRITE: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE WRITE: ia=%0h addr=%0h size=%0d", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size);
|
||||
dpi_trace("%d: STATE WRITE: ia=%0h addr=%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size);
|
||||
`endif
|
||||
state <= STATE_WRITE;
|
||||
end
|
||||
CMD_RUN: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE START", $time);
|
||||
dpi_trace("%d: STATE START\n", $time);
|
||||
`endif
|
||||
vx_reset <= 1;
|
||||
state <= STATE_START;
|
||||
|
@ -340,7 +326,7 @@ always @(posedge clk) begin
|
|||
if (cmd_read_done) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE IDLE", $time);
|
||||
dpi_trace("%d: STATE IDLE\n", $time);
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
@ -349,7 +335,7 @@ always @(posedge clk) begin
|
|||
if (cmd_write_done) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE IDLE", $time);
|
||||
dpi_trace("%d: STATE IDLE\n", $time);
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
@ -361,7 +347,7 @@ always @(posedge clk) begin
|
|||
vx_started <= 0;
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE IDLE", $time);
|
||||
dpi_trace("%d: STATE IDLE\n", $time);
|
||||
`endif
|
||||
end
|
||||
end else begin
|
||||
|
@ -527,17 +513,19 @@ t_local_mem_data mem_rsp_data;
|
|||
wire [AVS_REQ_TAGW:0] mem_rsp_tag;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
`RESET_RELAY (mem_arb_reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (2),
|
||||
.DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (AVS_REQ_TAGW),
|
||||
.BUFFERED_REQ (0),
|
||||
.BUFFERED_RSP (0),
|
||||
.TYPE ("X")
|
||||
.TYPE ("P"),
|
||||
.BUFFERED_REQ (2),
|
||||
.BUFFERED_RSP (2)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mem_arb_reset),
|
||||
|
||||
// Source request
|
||||
.req_valid_in ({vx_mem_req_arb_valid, cci_mem_req_arb_valid}),
|
||||
|
@ -572,8 +560,9 @@ VX_mem_arb #(
|
|||
|
||||
//--
|
||||
|
||||
`RESET_RELAY (avs_wrapper_reset);
|
||||
|
||||
VX_avs_wrapper #(
|
||||
.NUM_BANKS (NUM_LOCAL_MEM_BANKS),
|
||||
.AVS_DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
.AVS_ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.AVS_BURST_WIDTH (LMEM_BURST_CTRW),
|
||||
|
@ -582,7 +571,7 @@ VX_avs_wrapper #(
|
|||
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE)
|
||||
) avs_wrapper (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (avs_wrapper_reset),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (mem_req_valid),
|
||||
|
@ -657,8 +646,8 @@ VX_pending_size #(
|
|||
) cci_rd_pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (cci_rd_req_fire),
|
||||
.pop (cci_rdq_pop),
|
||||
.incr (cci_rd_req_fire),
|
||||
.decr (cci_rdq_pop),
|
||||
.full (cci_pending_reads_full),
|
||||
.size (cci_pending_reads),
|
||||
`UNUSED_PIN (empty)
|
||||
|
@ -712,7 +701,7 @@ always @(posedge clk) begin
|
|||
cci_rd_req_addr <= cci_rd_req_addr + 1;
|
||||
cci_rd_req_ctr <= cci_rd_req_ctr + 1;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads);
|
||||
dpi_trace("%d: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads);
|
||||
`endif
|
||||
end
|
||||
|
||||
|
@ -722,13 +711,13 @@ always @(posedge clk) begin
|
|||
cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE);
|
||||
end
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data);
|
||||
dpi_trace("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=%0h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data);
|
||||
`endif
|
||||
end
|
||||
|
||||
if (cci_rdq_pop) begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Rd Queue Pop: pending=%0d", $time, cci_pending_reads);
|
||||
dpi_trace("%d: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads);
|
||||
`endif
|
||||
end
|
||||
|
||||
|
@ -740,13 +729,15 @@ always @(posedge clk) begin
|
|||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (cci_rdq_reset);
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (CCI_RD_QUEUE_DATAW),
|
||||
.SIZE (CCI_RD_QUEUE_SIZE),
|
||||
.OUTPUT_REG (1)
|
||||
.DATAW (CCI_RD_QUEUE_DATAW),
|
||||
.SIZE (CCI_RD_QUEUE_SIZE),
|
||||
.OUT_REG (1)
|
||||
) cci_rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (cci_rdq_reset),
|
||||
.push (cci_rdq_push),
|
||||
.pop (cci_rdq_pop),
|
||||
.data_in (cci_rdq_din),
|
||||
|
@ -814,8 +805,8 @@ VX_pending_size #(
|
|||
) cci_wr_pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (cci_mem_rd_rsp_fire),
|
||||
.pop (cci_wr_rsp_fire),
|
||||
.incr (cci_mem_rd_rsp_fire),
|
||||
.decr (cci_wr_rsp_fire),
|
||||
.empty (cci_pending_writes_empty),
|
||||
.full (cci_pending_writes_full),
|
||||
.size (cci_pending_writes)
|
||||
|
@ -861,19 +852,19 @@ begin
|
|||
cci_wr_req_data <= t_ccip_clData'(cci_mem_rsp_data);
|
||||
|
||||
if (cci_wr_req_fire) begin
|
||||
assert(cci_wr_req_ctr != 0);
|
||||
`ASSERT(cci_wr_req_ctr != 0, ("runtime error"));
|
||||
cci_wr_req_ctr <= cci_wr_req_ctr - CCI_ADDR_WIDTH'(1);
|
||||
if (cci_wr_req_ctr == CCI_ADDR_WIDTH'(1)) begin
|
||||
cci_wr_req_done <= 1;
|
||||
end
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data);
|
||||
dpi_trace("%d: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data);
|
||||
`endif
|
||||
end
|
||||
|
||||
if (cci_wr_rsp_fire) begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Wr Rsp: pending=%0d", $time, cci_pending_writes);
|
||||
dpi_trace("%d: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes);
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
@ -890,11 +881,11 @@ assign cci_mem_req_tag = cci_mem_req_rw ? cci_mem_wr_req_ctr : cci_mem_rd_req_
|
|||
|
||||
assign cmd_run_done = !vx_busy;
|
||||
|
||||
Vortex #() vortex (
|
||||
Vortex vortex (
|
||||
`SCOPE_BIND_afu_vortex
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset | vx_reset),
|
||||
.reset (reset || vx_reset),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (vx_mem_req_valid),
|
||||
|
@ -1013,6 +1004,8 @@ VX_fifo_queue #(
|
|||
|
||||
wire scope_changed = `SCOPE_TRIGGER;
|
||||
|
||||
`RESET_RELAY (scope_reset);
|
||||
|
||||
VX_scope #(
|
||||
.DATAW ($bits({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST})),
|
||||
.BUSW (64),
|
||||
|
@ -1020,7 +1013,7 @@ VX_scope #(
|
|||
.UPDW ($bits({`SCOPE_UPDATE_LIST}))
|
||||
) scope (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (scope_reset),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.changed (scope_changed),
|
||||
|
|
548
hw/rtl/cache/VX_bank.sv
vendored
Normal file
548
hw/rtl/cache/VX_bank.sv
vendored
Normal file
|
@ -0,0 +1,548 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_bank #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 1,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 1,
|
||||
// Number of bankS
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of ports per banks
|
||||
parameter NUM_PORTS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
|
||||
// Core Request Queue Size
|
||||
parameter CREQ_SIZE = 1,
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 1,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 1,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 1,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
// size of tag id in core request tag
|
||||
parameter CORE_TAG_ID_BITS = 0,
|
||||
|
||||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = 0,
|
||||
|
||||
parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE),
|
||||
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
||||
) (
|
||||
`SCOPE_IO_VX_bank
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire perf_read_misses,
|
||||
output wire perf_write_misses,
|
||||
output wire perf_mshr_stalls,
|
||||
output wire perf_pipe_stalls,
|
||||
`endif
|
||||
|
||||
// Core Request
|
||||
input wire core_req_valid,
|
||||
input wire [NUM_PORTS-1:0] core_req_pmask,
|
||||
input wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] core_req_wsel,
|
||||
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_req_tid,
|
||||
input wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
|
||||
input wire core_req_rw,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] core_req_addr,
|
||||
output wire core_req_ready,
|
||||
|
||||
// Core Response
|
||||
output wire core_rsp_valid,
|
||||
output wire [NUM_PORTS-1:0] core_rsp_pmask,
|
||||
output wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_rsp_tid,
|
||||
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
output wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire core_rsp_ready,
|
||||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [NUM_PORTS-1:0] mem_req_pmask,
|
||||
output wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen,
|
||||
output wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel,
|
||||
output wire [`LINE_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
|
||||
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// flush
|
||||
input wire flush_enable,
|
||||
input wire [`LINE_SELECT_BITS-1:0] flush_addr
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_TAG_ID_BITS)
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
wire [31:0] debug_pc_sel, debug_pc_st0, debug_pc_st1;
|
||||
wire [`NW_BITS-1:0] debug_wid_sel, debug_wid_st0, debug_wid_st1;
|
||||
`IGNORE_UNUSED_END
|
||||
`endif
|
||||
|
||||
wire [NUM_PORTS-1:0] creq_pmask;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] creq_wsel;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] creq_byteen;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] creq_tid;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] creq_tag;
|
||||
wire creq_rw;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] creq_addr;
|
||||
|
||||
wire creq_valid, creq_ready;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + `LINE_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH)),
|
||||
.SIZE (CREQ_SIZE)
|
||||
) core_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (core_req_ready),
|
||||
.valid_in (core_req_valid),
|
||||
.data_in ({core_req_rw, core_req_addr, core_req_pmask, core_req_wsel, core_req_byteen, core_req_data, core_req_tid, core_req_tag}),
|
||||
.data_out ({creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid, creq_tag}),
|
||||
.ready_out (creq_ready),
|
||||
.valid_out (creq_valid)
|
||||
);
|
||||
|
||||
wire mreq_alm_full;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
|
||||
wire crsq_valid, crsq_ready;
|
||||
wire crsq_stall;
|
||||
|
||||
wire mshr_valid;
|
||||
wire mshr_ready;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id;
|
||||
wire mshr_alm_full;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_dequeue_id;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] mshr_addr;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] mshr_tag;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mshr_wsel;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] mshr_tid;
|
||||
wire [NUM_PORTS-1:0] mshr_pmask;
|
||||
|
||||
wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1;
|
||||
wire is_read_st0, is_read_st1;
|
||||
wire is_write_st0, is_write_st1;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] wsel_st0, wsel_st1;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
|
||||
wire [NUM_PORTS-1:0] pmask_st0, pmask_st1;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] tag_st0, tag_st1;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] rdata_st1;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
|
||||
wire valid_st0, valid_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire is_mshr_st0, is_mshr_st1;
|
||||
wire miss_st0, miss_st1;
|
||||
wire is_flush_st0;
|
||||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
|
||||
// prevent read-during-write hazard when accessing tags/data block RAMs
|
||||
wire rdw_fill_hazard = valid_st0 && is_fill_st0;
|
||||
wire rdw_write_hazard = valid_st0 && is_write_st0 && ~creq_rw;
|
||||
|
||||
// determine which queue to pop next in priority order
|
||||
wire mshr_grant = !flush_enable;
|
||||
wire mshr_enable = mshr_grant && mshr_valid;
|
||||
|
||||
wire mrsq_grant = !flush_enable && !mshr_enable;
|
||||
wire mrsq_enable = mrsq_grant && mem_rsp_valid;
|
||||
wire creq_grant = !flush_enable && !mshr_enable && !mrsq_enable;
|
||||
|
||||
wire creq_enable = creq_grant && creq_valid;
|
||||
|
||||
assign mshr_ready = mshr_grant
|
||||
&& !rdw_fill_hazard // prevent read-during-write hazard
|
||||
&& !crsq_stall; // ensure core_rsp_queue not full
|
||||
|
||||
|
||||
assign mem_rsp_ready = mrsq_grant
|
||||
&& !crsq_stall; // ensure core_rsp_queue not full
|
||||
|
||||
assign creq_ready = creq_grant
|
||||
&& !rdw_write_hazard // prevent read-during-write hazard
|
||||
&& !mreq_alm_full // ensure mem_req_queue not full
|
||||
&& !mshr_alm_full // ensure mshr not full
|
||||
&& !crsq_stall; // ensure core_rsp_queue not full
|
||||
|
||||
wire flush_fire = flush_enable;
|
||||
wire mshr_fire = mshr_valid && mshr_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
wire creq_fire = creq_valid && creq_ready;
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_wid_sel, debug_pc_sel} = mshr_enable ? mshr_tag[0][`CACHE_REQ_INFO_RNG] : creq_tag[0][`CACHE_REQ_INFO_RNG];
|
||||
end else begin
|
||||
assign {debug_wid_sel, debug_pc_sel} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
wire [`CACHE_LINE_WIDTH-1:0] wdata_sel;
|
||||
assign wdata_sel[(NUM_PORTS * `WORD_WIDTH)-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[(NUM_PORTS * `WORD_WIDTH)-1:0] : creq_data;
|
||||
for (genvar i = NUM_PORTS * `WORD_WIDTH; i < `CACHE_LINE_WIDTH; ++i) begin
|
||||
assign wdata_sel[i] = mem_rsp_data[i];
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!crsq_stall),
|
||||
.data_in ({
|
||||
flush_fire || mshr_fire || mem_rsp_fire || creq_fire,
|
||||
flush_enable,
|
||||
mshr_enable,
|
||||
mrsq_enable,
|
||||
creq_enable && ~creq_rw,
|
||||
creq_enable && creq_rw,
|
||||
flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : (mshr_valid ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : creq_addr)),
|
||||
wdata_sel,
|
||||
mshr_valid ? mshr_wsel : creq_wsel,
|
||||
creq_byteen,
|
||||
mshr_valid ? mshr_tid : creq_tid,
|
||||
mshr_valid ? mshr_pmask : creq_pmask,
|
||||
mshr_valid ? mshr_tag : creq_tag,
|
||||
mshr_valid ? mshr_dequeue_id : mem_rsp_id
|
||||
}),
|
||||
.data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0})
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_wid_st0, debug_pc_st0} = tag_st0[0][`CACHE_REQ_INFO_RNG];
|
||||
end else begin
|
||||
assign {debug_wid_st0, debug_pc_st0} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_flush_st0 = valid_st0 && is_flush_st0;
|
||||
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_flush_st0);
|
||||
|
||||
wire tag_match_st0;
|
||||
|
||||
VX_tag_access #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET)
|
||||
) tag_access (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
.debug_pc (debug_pc_st0),
|
||||
.debug_wid (debug_wid_st0),
|
||||
`endif
|
||||
.stall (crsq_stall),
|
||||
|
||||
// read/Fill
|
||||
.lookup (do_lookup_st0),
|
||||
.addr (addr_st0),
|
||||
.fill (do_fill_st0),
|
||||
.flush (do_flush_st0),
|
||||
.tag_match (tag_match_st0)
|
||||
);
|
||||
|
||||
// we have a core request hit
|
||||
assign miss_st0 = (is_read_st0 || is_write_st0) && ~tag_match_st0;
|
||||
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_a_st0 = (is_read_st0 || is_write_st0) ? mshr_alloc_id : mshr_id_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!crsq_stall),
|
||||
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, miss_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_a_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, is_read_st1, is_write_st1, miss_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_wid_st1, debug_pc_st1} = tag_st1[0][`CACHE_REQ_INFO_RNG];
|
||||
end else begin
|
||||
assign {debug_wid_st1, debug_pc_st1} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
wire do_read_st0 = valid_st0 && is_read_st0;
|
||||
wire do_read_st1 = valid_st1 && is_read_st1;
|
||||
wire do_fill_st1 = valid_st1 && is_fill_st1;
|
||||
wire do_write_st1 = valid_st1 && is_write_st1;
|
||||
wire do_mshr_st1 = valid_st1 && is_mshr_st1;
|
||||
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data_st1 = wdata_st1[0 +: NUM_PORTS * `WORD_WIDTH];
|
||||
`UNUSED_VAR (wdata_st1)
|
||||
|
||||
VX_data_access #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE(CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_PORTS (NUM_PORTS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE)
|
||||
) data_access (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
.debug_pc (debug_pc_st1),
|
||||
.debug_wid (debug_wid_st1),
|
||||
`endif
|
||||
.stall (crsq_stall),
|
||||
|
||||
.read (do_read_st1 || do_mshr_st1),
|
||||
.fill (do_fill_st1),
|
||||
.write (do_write_st1 && !miss_st1),
|
||||
.addr (addr_st1),
|
||||
.wsel (wsel_st1),
|
||||
.pmask (pmask_st1),
|
||||
.byteen (byteen_st1),
|
||||
.fill_data (wdata_st1),
|
||||
.write_data (creq_data_st1),
|
||||
.read_data (rdata_st1)
|
||||
);
|
||||
|
||||
wire mshr_allocate = do_read_st0 && !crsq_stall;
|
||||
wire mshr_replay = do_fill_st0 && !crsq_stall;
|
||||
wire mshr_lookup = mshr_allocate;
|
||||
wire mshr_release = do_read_st1 && !miss_st1 && !crsq_stall;
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (MSHR_SIZE)
|
||||
) mshr_pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (creq_fire && ~creq_rw),
|
||||
.decr (mshr_fire || mshr_release),
|
||||
.full (mshr_alm_full),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
VX_miss_resrv #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_PORTS (NUM_PORTS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
|
||||
) miss_resrv (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
.deq_debug_pc (debug_pc_sel),
|
||||
.deq_debug_wid (debug_wid_sel),
|
||||
.lkp_debug_pc (debug_pc_st0),
|
||||
.lkp_debug_wid (debug_wid_st0),
|
||||
.rel_debug_pc (debug_pc_st1),
|
||||
.rel_debug_wid (debug_wid_st1),
|
||||
`endif
|
||||
|
||||
// allocate
|
||||
.allocate_valid (mshr_allocate),
|
||||
.allocate_addr (addr_st0),
|
||||
.allocate_data ({wsel_st0, tag_st0, req_tid_st0, pmask_st0}),
|
||||
.allocate_id (mshr_alloc_id),
|
||||
`UNUSED_PIN (allocate_ready),
|
||||
|
||||
// lookup
|
||||
.lookup_valid (mshr_lookup),
|
||||
.lookup_replay (mshr_replay),
|
||||
.lookup_id (mshr_alloc_id),
|
||||
.lookup_addr (addr_st0),
|
||||
.lookup_match (mshr_pending_st0),
|
||||
|
||||
// fill
|
||||
.fill_valid (mem_rsp_fire),
|
||||
.fill_id (mem_rsp_id),
|
||||
.fill_addr (mem_rsp_addr),
|
||||
|
||||
// dequeue
|
||||
.dequeue_valid (mshr_valid),
|
||||
.dequeue_id (mshr_dequeue_id),
|
||||
.dequeue_addr (mshr_addr),
|
||||
.dequeue_data ({mshr_wsel, mshr_tag, mshr_tid, mshr_pmask}),
|
||||
.dequeue_ready (mshr_ready),
|
||||
|
||||
// release
|
||||
.release_valid (mshr_release),
|
||||
.release_id (mshr_id_st1)
|
||||
);
|
||||
|
||||
// Enqueue core response
|
||||
|
||||
wire [NUM_PORTS-1:0] crsq_pmask;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] crsq_tag;
|
||||
|
||||
assign crsq_valid = (do_read_st1 && !miss_st1)
|
||||
|| do_mshr_st1;
|
||||
|
||||
assign crsq_stall = crsq_valid && !crsq_ready;
|
||||
|
||||
assign crsq_pmask = pmask_st1;
|
||||
assign crsq_tid = req_tid_st1;
|
||||
assign crsq_data = rdata_st1;
|
||||
assign crsq_tag = tag_st1;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (NUM_PORTS * (CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS)),
|
||||
.SIZE (CRSQ_SIZE),
|
||||
.OUT_REG (1)
|
||||
) core_rsp_req (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (crsq_valid),
|
||||
.data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}),
|
||||
.ready_in (crsq_ready),
|
||||
.valid_out (core_rsp_valid),
|
||||
.data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}),
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
// Enqueue memory request
|
||||
|
||||
wire mreq_push, mreq_pop, mreq_empty;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mreq_data;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mreq_byteen;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mreq_wsel;
|
||||
wire [NUM_PORTS-1:0] mreq_pmask;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] mreq_addr;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mreq_id;
|
||||
wire mreq_rw;
|
||||
|
||||
assign mreq_push = (do_read_st1 && miss_st1 && !mshr_pending_st1)
|
||||
|| do_write_st1;
|
||||
|
||||
assign mreq_pop = mem_req_valid && mem_req_ready;
|
||||
|
||||
assign mreq_rw = WRITE_ENABLE && is_write_st1;
|
||||
assign mreq_addr = addr_st1;
|
||||
assign mreq_id = mshr_id_st1;
|
||||
assign mreq_pmask= pmask_st1;
|
||||
assign mreq_wsel = wsel_st1;
|
||||
assign mreq_byteen = byteen_st1;
|
||||
assign mreq_data = creq_data_st1;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
|
||||
.SIZE (MREQ_SIZE),
|
||||
.ALM_FULL (MREQ_SIZE-2),
|
||||
.OUT_REG (1 == NUM_BANKS)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (mreq_push),
|
||||
.pop (mreq_pop),
|
||||
.data_in ({mreq_rw, mreq_addr, mreq_id, mreq_pmask, mreq_byteen, mreq_wsel, mreq_data}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_pmask, mem_req_byteen, mem_req_wsel, mem_req_data}),
|
||||
.empty (mreq_empty),
|
||||
.alm_full (mreq_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign mem_req_valid = !mreq_empty;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`SCOPE_ASSIGN (valid_st0, valid_st0);
|
||||
`SCOPE_ASSIGN (valid_st1, valid_st1);
|
||||
`SCOPE_ASSIGN (is_fill_st0, is_fill_st0);
|
||||
`SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0);
|
||||
`SCOPE_ASSIGN (miss_st0, miss_st0);
|
||||
`SCOPE_ASSIGN (crsq_stall, crsq_stall);
|
||||
`SCOPE_ASSIGN (mreq_alm_full, mreq_alm_full);
|
||||
`SCOPE_ASSIGN (mshr_alm_full, mshr_alm_full);
|
||||
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign perf_read_misses = do_read_st1 && miss_st1;
|
||||
assign perf_write_misses = do_write_st1 && miss_st1;
|
||||
assign perf_pipe_stalls = crsq_stall || mreq_alm_full || mshr_alm_full;
|
||||
assign perf_mshr_stalls = mshr_alm_full;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_BANK
|
||||
wire crsq_fire = crsq_valid && crsq_ready;
|
||||
wire pipeline_stall = (mshr_valid || mem_rsp_valid || creq_valid)
|
||||
&& ~(mshr_fire || mem_rsp_fire || creq_fire);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (pipeline_stall) begin
|
||||
dpi_trace("%d: *** cache%0d:%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, CACHE_ID, BANK_ID, crsq_stall, mreq_alm_full, mshr_alm_full);
|
||||
end
|
||||
if (flush_enable) begin
|
||||
dpi_trace("%d: cache%0d:%0d flush: addr=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
dpi_trace("%d: cache%0d:%0d fill-rsp: addr=%0h, id=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data);
|
||||
end
|
||||
if (mshr_fire) begin
|
||||
dpi_trace("%d: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, debug_wid_sel, debug_pc_sel);
|
||||
end
|
||||
if (creq_fire) begin
|
||||
if (creq_rw)
|
||||
dpi_trace("%d: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, debug_wid_sel, debug_pc_sel);
|
||||
else
|
||||
dpi_trace("%d: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, debug_wid_sel, debug_pc_sel);
|
||||
end
|
||||
if (crsq_fire) begin
|
||||
dpi_trace("%d: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1);
|
||||
end
|
||||
if (mreq_push) begin
|
||||
if (is_write_st1)
|
||||
dpi_trace("%d: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, debug_wid_st1, debug_pc_st1);
|
||||
else
|
||||
dpi_trace("%d: cache%0d:%0d fill-req: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, debug_wid_st1, debug_pc_st1);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
585
hw/rtl/cache/VX_bank.v
vendored
585
hw/rtl/cache/VX_bank.v
vendored
|
@ -1,585 +0,0 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_bank #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 1,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 1,
|
||||
// Number of bankS
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of ports per banks
|
||||
parameter NUM_PORTS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
|
||||
// Core Request Queue Size
|
||||
parameter CREQ_SIZE = 1,
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 1,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 1,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 1,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
// size of tag id in core request tag
|
||||
parameter CORE_TAG_ID_BITS = 0,
|
||||
|
||||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_bank
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire perf_read_misses,
|
||||
output wire perf_write_misses,
|
||||
output wire perf_mshr_stalls,
|
||||
output wire perf_pipe_stalls,
|
||||
`endif
|
||||
|
||||
// Core Request
|
||||
input wire core_req_valid,
|
||||
input wire [NUM_PORTS-1:0] core_req_pmask,
|
||||
input wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] core_req_wsel,
|
||||
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_req_tid,
|
||||
input wire core_req_rw,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [CORE_TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire core_req_ready,
|
||||
|
||||
// Core Response
|
||||
output wire core_rsp_valid,
|
||||
output wire [NUM_PORTS-1:0] core_rsp_pmask,
|
||||
output wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_rsp_tid,
|
||||
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire core_rsp_ready,
|
||||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [CACHE_LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`LINE_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`CACHE_LINE_WIDTH-1:0] mem_req_data,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr,
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// flush
|
||||
input wire flush_enable,
|
||||
input wire [`LINE_SELECT_BITS-1:0] flush_addr
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_TAG_ID_BITS)
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [31:0] debug_pc_sel, debug_pc_st0, debug_pc_st1;
|
||||
wire [`NW_BITS-1:0] debug_wid_sel, debug_wid_st0, debug_wid_st1;
|
||||
`IGNORE_WARNINGS_END
|
||||
`endif
|
||||
|
||||
wire [NUM_PORTS-1:0] creq_pmask;
|
||||
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] creq_wsel;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] creq_byteen;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] creq_tid;
|
||||
wire creq_rw;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] creq_addr;
|
||||
wire [CORE_TAG_WIDTH-1:0] creq_tag;
|
||||
|
||||
wire creq_out_valid, creq_out_ready;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + 1 + `LINE_ADDR_WIDTH + (1 + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
|
||||
.SIZE (CREQ_SIZE),
|
||||
.OUTPUT_REG (CREQ_SIZE > 2)
|
||||
) core_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (core_req_ready),
|
||||
.valid_in (core_req_valid),
|
||||
.data_in ({core_req_tag, core_req_rw, core_req_addr, core_req_pmask, core_req_wsel, core_req_byteen, core_req_data, core_req_tid}),
|
||||
.data_out ({creq_tag, creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid}),
|
||||
.ready_out (creq_out_ready),
|
||||
.valid_out (creq_out_valid)
|
||||
);
|
||||
|
||||
wire mshr_alm_full;
|
||||
wire mshr_pop;
|
||||
wire mshr_valid;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] mshr_addr;
|
||||
wire [CORE_TAG_WIDTH-1:0] mshr_tag;
|
||||
wire [NUM_PORTS-1:0] mshr_pmask;
|
||||
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] mshr_wsel;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] mshr_tid;
|
||||
|
||||
wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1;
|
||||
wire mem_rw_st0, mem_rw_st1;
|
||||
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
|
||||
wire [NUM_PORTS-1:0] pmask_st0, pmask_st1;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] rdata_st1;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1;
|
||||
wire [CORE_TAG_WIDTH-1:0] tag_st0, tag_st1;
|
||||
wire valid_st0, valid_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire is_mshr_st0, is_mshr_st1;
|
||||
wire miss_st0, miss_st1;
|
||||
wire prev_miss_dep_st0;
|
||||
wire force_miss_st0, force_miss_st1;
|
||||
wire not_same_prev_mshr_st0, not_same_prev_mshr_st1;
|
||||
wire writeen_unqual_st0, writeen_unqual_st1;
|
||||
wire incoming_fill_unqual_st0, incoming_fill_unqual_st1;
|
||||
wire mshr_pending_st0;
|
||||
wire is_flush_st0;
|
||||
|
||||
wire crsq_in_valid, crsq_in_ready, crsq_in_stall;
|
||||
wire mreq_alm_full;
|
||||
|
||||
wire creq_out_fire = creq_out_valid && creq_out_ready;
|
||||
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (MSHR_SIZE)
|
||||
) mshr_pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (creq_out_fire && !creq_rw),
|
||||
.pop (crsq_in_fire),
|
||||
.full (mshr_alm_full),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
// determine which queue to pop next in priority order
|
||||
wire mshr_grant = !mreq_alm_full; // ensure memory request queue not full (deadlock prevention)
|
||||
wire mshr_enable = mshr_grant && mshr_valid;
|
||||
|
||||
wire mrsq_grant = !mshr_enable;
|
||||
wire mrsq_enable = mrsq_grant && mem_rsp_valid;
|
||||
|
||||
wire creq_grant = !mshr_enable && !mrsq_enable && !flush_enable;
|
||||
|
||||
wire is_miss_st1 = (miss_st1 || force_miss_st1);
|
||||
|
||||
assign mshr_pop = mshr_enable
|
||||
&& !(valid_st1 && is_mshr_st1 && is_miss_st1) // do not schedule another mshr request if the previous one missed
|
||||
&& !crsq_in_stall; // ensure core response ready
|
||||
|
||||
assign creq_out_ready = creq_grant
|
||||
&& !mreq_alm_full // ensure memory request ready
|
||||
&& !mshr_alm_full // ensure mshr enqueue ready
|
||||
&& !crsq_in_stall; // ensure core response ready
|
||||
|
||||
assign mem_rsp_ready = mrsq_grant
|
||||
&& !crsq_in_stall; // ensure core response ready
|
||||
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_wid_sel, debug_pc_sel} = mshr_enable ? mshr_tag[`CACHE_REQ_INFO_RNG] : creq_tag[`CACHE_REQ_INFO_RNG];
|
||||
end else begin
|
||||
assign {debug_wid_sel, debug_pc_sel} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
wire [`CACHE_LINE_WIDTH-1:0] creq_line_data;
|
||||
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
if (NUM_PORTS > 1) begin
|
||||
reg [`CACHE_LINE_WIDTH-1:0] creq_line_data_r;
|
||||
always @(*) begin
|
||||
creq_line_data_r = 'x;
|
||||
for (integer p = 0; p < NUM_PORTS; p++) begin
|
||||
if (creq_pmask[p]) begin
|
||||
creq_line_data_r[creq_wsel[p] * `WORD_WIDTH +: `WORD_WIDTH] = creq_data[p];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign creq_line_data = creq_line_data_r;
|
||||
end else begin
|
||||
assign creq_line_data = {`WORDS_PER_LINE{creq_data}};
|
||||
end
|
||||
end else begin
|
||||
assign creq_line_data = creq_data;
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!crsq_in_stall),
|
||||
.data_in ({
|
||||
flush_enable || mshr_pop || mem_rsp_fire || creq_out_fire,
|
||||
flush_enable,
|
||||
mshr_enable,
|
||||
mrsq_enable || flush_enable,
|
||||
mshr_enable ? 1'b0 : creq_rw,
|
||||
mshr_enable ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)),
|
||||
(mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data : creq_line_data,
|
||||
mshr_enable ? mshr_wsel : creq_wsel,
|
||||
creq_byteen,
|
||||
mshr_enable ? mshr_tid : creq_tid,
|
||||
mshr_enable ? mshr_pmask : creq_pmask,
|
||||
mshr_enable ? mshr_tag : creq_tag
|
||||
}),
|
||||
.data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0})
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_wid_st0, debug_pc_st0} = tag_st0[`CACHE_REQ_INFO_RNG];
|
||||
end else begin
|
||||
assign {debug_wid_st0, debug_pc_st0} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
wire do_lookup_st0 = valid_st0 && ~is_fill_st0;
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0 && !crsq_in_stall;
|
||||
|
||||
wire tag_match_st0;
|
||||
|
||||
VX_tag_access #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET)
|
||||
) tag_access (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
.debug_pc (debug_pc_st0),
|
||||
.debug_wid (debug_wid_st0),
|
||||
`endif
|
||||
|
||||
// read/Fill
|
||||
.lookup (do_lookup_st0),
|
||||
.addr (addr_st0),
|
||||
.fill (do_fill_st0),
|
||||
.is_flush (is_flush_st0),
|
||||
.tag_match (tag_match_st0)
|
||||
);
|
||||
|
||||
// we had a miss with prior request for the current address
|
||||
assign prev_miss_dep_st0 = valid_st1 && is_miss_st1 && (addr_st0 == addr_st1);
|
||||
|
||||
// we have a core request hit
|
||||
assign miss_st0 = !is_fill_st0 && !tag_match_st0;
|
||||
|
||||
// force a miss to ensure commit order when a new request has pending previous requests to same block
|
||||
// also force a miss for mshr requests when previous request was a missed
|
||||
assign force_miss_st0 = (!is_fill_st0 && !is_mshr_st0 && (mshr_pending_st0 || prev_miss_dep_st0))
|
||||
|| (is_mshr_st0 && valid_st1 && is_mshr_st1 && is_miss_st1);
|
||||
|
||||
// previous mshr request doesn't have same address
|
||||
assign not_same_prev_mshr_st0 = valid_st1 && is_mshr_st1 && (addr_st1 != addr_st0);
|
||||
|
||||
// enable write when we have a fill request that is not redundant
|
||||
assign writeen_unqual_st0 = is_fill_st0 && !tag_match_st0;
|
||||
|
||||
// check if incoming memory response match current address
|
||||
assign incoming_fill_unqual_st0 = mem_rsp_valid && (addr_st0 == mem_rsp_addr);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!crsq_in_stall),
|
||||
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, incoming_fill_unqual_st0, miss_st0, force_miss_st0, mem_rw_st0, not_same_prev_mshr_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}),
|
||||
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, incoming_fill_unqual_st1, miss_st1, force_miss_st1, mem_rw_st1, not_same_prev_mshr_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1})
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_wid_st1, debug_pc_st1} = tag_st1[`CACHE_REQ_INFO_RNG];
|
||||
end else begin
|
||||
assign {debug_wid_st1, debug_pc_st1} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
wire writeen_st1 = (WRITE_ENABLE && !is_fill_st1 && mem_rw_st1 && ~is_miss_st1)
|
||||
|| writeen_unqual_st1;
|
||||
|
||||
wire readen_st1 = !is_fill_st1 && !mem_rw_st1;
|
||||
|
||||
wire crsq_push_st1 = readen_st1 && ~is_miss_st1;
|
||||
|
||||
wire mshr_push_st1 = readen_st1 && is_miss_st1;
|
||||
|
||||
wire incoming_fill_st1 = (mem_rsp_valid && (addr_st1 == mem_rsp_addr))
|
||||
|| incoming_fill_unqual_st1;
|
||||
|
||||
wire do_writeback_st1 = !is_fill_st1 && mem_rw_st1;
|
||||
|
||||
wire mreq_push_st1 = (readen_st1 && miss_st1 && (~force_miss_st1 || not_same_prev_mshr_st1) && !incoming_fill_st1)
|
||||
|| do_writeback_st1;
|
||||
|
||||
wire [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] line_byteen_st1;
|
||||
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
reg [CACHE_LINE_SIZE-1:0] line_byteen_r;
|
||||
always @(*) begin
|
||||
line_byteen_r = 0;
|
||||
for (integer p = 0; p < NUM_PORTS; p++) begin
|
||||
if ((NUM_PORTS == 1) || pmask_st1[p]) begin
|
||||
line_byteen_r[wsel_st1[p] * WORD_SIZE +: WORD_SIZE] = byteen_st1[p];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign line_byteen_st1 = line_byteen_r;
|
||||
end else begin
|
||||
assign line_byteen_st1 = byteen_st1;
|
||||
end
|
||||
|
||||
VX_data_access #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE(CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE)
|
||||
) data_access (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
.debug_pc (debug_pc_st1),
|
||||
.debug_wid (debug_wid_st1),
|
||||
`endif
|
||||
|
||||
.addr (addr_st1),
|
||||
|
||||
// reading
|
||||
.readen (valid_st1 && readen_st1),
|
||||
.rdata (rdata_st1),
|
||||
|
||||
// writing
|
||||
.writeen (valid_st1 && writeen_st1),
|
||||
.is_fill (is_fill_st1),
|
||||
.byteen (line_byteen_st1),
|
||||
.wdata (wdata_st1)
|
||||
);
|
||||
|
||||
wire mshr_push = valid_st1 && mshr_push_st1;
|
||||
wire mshr_dequeue = valid_st1 && is_mshr_st1 && !mshr_push_st1 && crsq_in_ready;
|
||||
wire mshr_restore = is_mshr_st1;
|
||||
|
||||
// push a missed request as 'ready' if it was a forced miss that actually had a hit
|
||||
// or the fill request for this block is comming
|
||||
wire mshr_init_ready_state = !miss_st1 || incoming_fill_unqual_st1;
|
||||
|
||||
VX_miss_resrv #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_PORTS (NUM_PORTS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.ALM_FULL (MSHR_SIZE-2),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
|
||||
) miss_resrv (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
.deq_debug_pc (debug_pc_sel),
|
||||
.deq_debug_wid (debug_wid_sel),
|
||||
.enq_debug_pc (debug_pc_st1),
|
||||
.enq_debug_wid (debug_wid_st1),
|
||||
`endif
|
||||
|
||||
// enqueue
|
||||
.enqueue (mshr_push),
|
||||
.enqueue_addr (addr_st1),
|
||||
.enqueue_data ({wsel_st1, tag_st1, req_tid_st1, pmask_st1}),
|
||||
.enqueue_is_mshr (mshr_restore),
|
||||
.enqueue_as_ready (mshr_init_ready_state),
|
||||
`UNUSED_PIN (enqueue_almfull),
|
||||
`UNUSED_PIN (enqueue_full),
|
||||
|
||||
// fill
|
||||
.fill_start (mem_rsp_fire),
|
||||
.fill_addr (mem_rsp_addr),
|
||||
|
||||
// lookup
|
||||
.lookup_addr (addr_st0),
|
||||
.lookup_match (mshr_pending_st0),
|
||||
.lookup_fill (do_fill_st0),
|
||||
|
||||
// schedule
|
||||
.schedule (mshr_pop),
|
||||
.schedule_valid (mshr_valid),
|
||||
.schedule_addr (mshr_addr),
|
||||
.schedule_data ({mshr_wsel, mshr_tag, mshr_tid, mshr_pmask}),
|
||||
|
||||
// dequeue
|
||||
.dequeue (mshr_dequeue)
|
||||
);
|
||||
|
||||
// Enqueue core response
|
||||
|
||||
wire [NUM_PORTS-1:0] crsq_pmask;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid;
|
||||
wire [CORE_TAG_WIDTH-1:0] crsq_tag;
|
||||
|
||||
assign crsq_in_valid = valid_st1 && crsq_push_st1;
|
||||
assign crsq_in_stall = crsq_in_valid && !crsq_in_ready;
|
||||
|
||||
assign crsq_pmask = pmask_st1;
|
||||
assign crsq_tid = req_tid_st1;
|
||||
assign crsq_tag = tag_st1;
|
||||
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
for (genvar p = 0; p < NUM_PORTS; ++p) begin
|
||||
assign crsq_data[p] = rdata_st1[wsel_st1[p] * `WORD_WIDTH +: `WORD_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
assign crsq_data = rdata_st1;
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
|
||||
.SIZE (CRSQ_SIZE),
|
||||
.OUTPUT_REG (1 == NUM_BANKS)
|
||||
) core_rsp_req (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (crsq_in_valid),
|
||||
.data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}),
|
||||
.ready_in (crsq_in_ready),
|
||||
.valid_out (core_rsp_valid),
|
||||
.data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}),
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
// Enqueue memory request
|
||||
|
||||
wire [CACHE_LINE_SIZE-1:0] mreq_byteen;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] mreq_addr;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mreq_data;
|
||||
wire mreq_push, mreq_pop, mreq_empty, mreq_rw;
|
||||
|
||||
assign mreq_push = valid_st1 && mreq_push_st1;
|
||||
|
||||
assign mreq_pop = mem_req_valid && mem_req_ready;
|
||||
|
||||
assign mreq_rw = WRITE_ENABLE && do_writeback_st1;
|
||||
assign mreq_byteen = mreq_rw ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}};
|
||||
assign mreq_addr = addr_st1;
|
||||
assign mreq_data = wdata_st1;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH),
|
||||
.SIZE (MREQ_SIZE),
|
||||
.ALM_FULL (MREQ_SIZE-2)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (mreq_push),
|
||||
.pop (mreq_pop),
|
||||
.data_in ({mreq_rw, mreq_byteen, mreq_addr, mreq_data}),
|
||||
.data_out ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data}),
|
||||
.empty (mreq_empty),
|
||||
.alm_full (mreq_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign mem_req_valid = !mreq_empty;
|
||||
|
||||
`SCOPE_ASSIGN (valid_st0, valid_st0);
|
||||
`SCOPE_ASSIGN (valid_st1, valid_st1);
|
||||
`SCOPE_ASSIGN (is_fill_st0, is_fill_st0);
|
||||
`SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0);
|
||||
`SCOPE_ASSIGN (miss_st0, miss_st0);
|
||||
`SCOPE_ASSIGN (force_miss_st0, force_miss_st0);
|
||||
`SCOPE_ASSIGN (mshr_push, mshr_push);
|
||||
`SCOPE_ASSIGN (crsq_in_stall, crsq_in_stall);
|
||||
`SCOPE_ASSIGN (mreq_alm_full, mreq_alm_full);
|
||||
`SCOPE_ASSIGN (mshr_alm_full, mshr_alm_full);
|
||||
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign perf_read_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && !mem_rw_st1;
|
||||
assign perf_write_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && mem_rw_st1;
|
||||
assign perf_pipe_stalls = crsq_in_stall || mreq_alm_full || mshr_alm_full;
|
||||
assign perf_mshr_stalls = mshr_alm_full;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_BANK
|
||||
always @(posedge clk) begin
|
||||
/*if (crsq_in_fire && (NUM_PORTS > 1) && $countones(crsq_pmask) > 1) begin
|
||||
$display("%t: *** cache%0d:%0d multi-port-out: pmask=%b, addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, crsq_pmask, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag);
|
||||
end*/
|
||||
if (valid_st1 && !is_fill_st1 && miss_st1 && incoming_fill_st1) begin
|
||||
$display("%t: *** cache%0d:%0d miss with incoming fill - addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
|
||||
assert(!is_mshr_st1);
|
||||
end
|
||||
if (crsq_in_stall || mreq_alm_full || mshr_alm_full) begin
|
||||
$display("%t: *** cache%0d:%0d pipeline-stall: cwbq=%b, dwbq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_in_stall, mreq_alm_full, mshr_alm_full);
|
||||
end
|
||||
if (flush_enable) begin
|
||||
$display("%t: cache%0d:%0d flush: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_data);
|
||||
end
|
||||
if (mshr_pop) begin
|
||||
$display("%t: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, debug_wid_sel, debug_pc_sel);
|
||||
end
|
||||
if (creq_out_fire) begin
|
||||
if (creq_rw)
|
||||
$display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, debug_wid_sel, debug_pc_sel);
|
||||
else
|
||||
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, debug_wid_sel, debug_pc_sel);
|
||||
end
|
||||
if (crsq_in_fire) begin
|
||||
$display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1);
|
||||
end
|
||||
if (mreq_push) begin
|
||||
if (do_writeback_st1)
|
||||
$display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, debug_wid_st1, debug_pc_st1);
|
||||
else
|
||||
$display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), debug_wid_st1, debug_pc_st1);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
343
hw/rtl/cache/VX_cache.v → hw/rtl/cache/VX_cache.sv
vendored
343
hw/rtl/cache/VX_cache.v → hw/rtl/cache/VX_cache.sv
vendored
|
@ -18,15 +18,15 @@ module VX_cache #(
|
|||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Request Queue Size
|
||||
parameter CREQ_SIZE = 2,
|
||||
parameter CREQ_SIZE = 0,
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 4,
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 2,
|
||||
parameter MREQ_SIZE = 4,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
@ -44,13 +44,15 @@ module VX_cache #(
|
|||
parameter BANK_ADDR_OFFSET = 0,
|
||||
|
||||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_ENABLE = 0
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
||||
) (
|
||||
`SCOPE_IO_VX_cache
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_cache_if,
|
||||
VX_perf_cache_if.master perf_cache_if,
|
||||
`endif
|
||||
|
||||
input wire clk,
|
||||
|
@ -91,6 +93,8 @@ module VX_cache #(
|
|||
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
|
||||
`STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value"))
|
||||
|
||||
localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE);
|
||||
localparam MEM_TAG_IN_WIDTH = `BANK_SELECT_BITS + MSHR_ADDR_WIDTH;
|
||||
localparam CORE_TAG_X_WIDTH = CORE_TAG_WIDTH - NC_ENABLE;
|
||||
localparam CORE_TAG_ID_X_BITS = (CORE_TAG_ID_BITS != 0) ? (CORE_TAG_ID_BITS - NC_ENABLE) : CORE_TAG_ID_BITS;
|
||||
|
||||
|
@ -103,6 +107,117 @@ module VX_cache #(
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire mem_req_valid_sb;
|
||||
wire mem_req_rw_sb;
|
||||
wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_sb;
|
||||
wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_sb;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_sb;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_sb;
|
||||
wire mem_req_ready_sb;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (1+CACHE_LINE_SIZE+`MEM_ADDR_WIDTH+`CACHE_LINE_WIDTH+MEM_TAG_WIDTH),
|
||||
.PASSTHRU (1 == NUM_BANKS)
|
||||
) mem_req_sbuf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_valid_sb),
|
||||
.ready_in (mem_req_ready_sb),
|
||||
.data_in ({mem_req_rw_sb, mem_req_byteen_sb, mem_req_addr_sb, mem_req_data_sb, mem_req_tag_sb}),
|
||||
.data_out ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag}),
|
||||
.valid_out (mem_req_valid),
|
||||
.ready_out (mem_req_ready)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`CORE_RSP_TAGS-1:0] core_rsp_valid_sb;
|
||||
wire [NUM_REQS-1:0] core_rsp_tmask_sb;
|
||||
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_sb;
|
||||
wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_sb;
|
||||
wire [`CORE_RSP_TAGS-1:0] core_rsp_ready_sb;
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (NUM_REQS + NUM_REQS*`WORD_WIDTH + CORE_TAG_WIDTH),
|
||||
.PASSTHRU (1 == NUM_BANKS)
|
||||
) core_rsp_sbuf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_valid_sb),
|
||||
.ready_in (core_rsp_ready_sb),
|
||||
.data_in ({core_rsp_tmask_sb, core_rsp_data_sb, core_rsp_tag_sb}),
|
||||
.data_out ({core_rsp_tmask, core_rsp_data, core_rsp_tag}),
|
||||
.valid_out (core_rsp_valid),
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
end else begin
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (1 + `WORD_WIDTH + CORE_TAG_WIDTH),
|
||||
.PASSTHRU (1 == NUM_BANKS)
|
||||
) core_rsp_sbuf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_valid_sb[i]),
|
||||
.ready_in (core_rsp_ready_sb[i]),
|
||||
.data_in ({core_rsp_tmask_sb[i], core_rsp_data_sb[i], core_rsp_tag_sb[i]}),
|
||||
.data_out ({core_rsp_tmask[i], core_rsp_data[i], core_rsp_tag[i]}),
|
||||
.valid_out (core_rsp_valid[i]),
|
||||
.ready_out (core_rsp_ready[i])
|
||||
);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_p;
|
||||
wire [NUM_PORTS-1:0] mem_req_pmask_p;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_p;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_p;
|
||||
wire mem_req_rw_p;
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
reg [CACHE_LINE_SIZE-1:0] mem_req_byteen_r;
|
||||
reg [`CACHE_LINE_WIDTH-1:0] mem_req_data_r;
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen_r = 0;
|
||||
mem_req_data_r = 'x;
|
||||
for (integer i = 0; i < NUM_PORTS; ++i) begin
|
||||
if ((1 == NUM_PORTS) || mem_req_pmask_p[i]) begin
|
||||
mem_req_byteen_r[mem_req_wsel_p[i] * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p[i];
|
||||
mem_req_data_r[mem_req_wsel_p[i] * `WORD_WIDTH +: `WORD_WIDTH] = mem_req_data_p[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_req_rw_sb = mem_req_rw_p;
|
||||
assign mem_req_byteen_sb = mem_req_byteen_r;
|
||||
assign mem_req_data_sb = mem_req_data_r;
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_pmask_p)
|
||||
`UNUSED_VAR (mem_req_wsel_p)
|
||||
assign mem_req_rw_sb = mem_req_rw_p;
|
||||
assign mem_req_byteen_sb = mem_req_byteen_p;
|
||||
assign mem_req_data_sb = mem_req_data_p;
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_byteen_p)
|
||||
`UNUSED_VAR (mem_req_pmask_p)
|
||||
`UNUSED_VAR (mem_req_wsel_p)
|
||||
`UNUSED_VAR (mem_req_data_p)
|
||||
`UNUSED_VAR (mem_req_rw_p)
|
||||
|
||||
assign mem_req_rw_sb = 0;
|
||||
assign mem_req_byteen_sb = 'x;
|
||||
assign mem_req_data_sb = 'x;
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Core request
|
||||
wire [NUM_REQS-1:0] core_req_valid_nc;
|
||||
wire [NUM_REQS-1:0] core_req_rw_nc;
|
||||
|
@ -122,20 +237,23 @@ module VX_cache #(
|
|||
// Memory request
|
||||
wire mem_req_valid_nc;
|
||||
wire mem_req_rw_nc;
|
||||
wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_nc;
|
||||
wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_nc;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_nc;
|
||||
wire [`MEM_ADDR_WIDTH-1:0] mem_req_tag_nc;
|
||||
wire [NUM_PORTS-1:0] mem_req_pmask_nc;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_nc;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_nc;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_nc;
|
||||
wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_nc;
|
||||
wire mem_req_ready_nc;
|
||||
|
||||
// Memory response
|
||||
wire mem_rsp_valid_nc;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_nc;
|
||||
wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_nc;
|
||||
wire mem_rsp_ready_nc;
|
||||
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_nc;
|
||||
wire mem_rsp_ready_nc;
|
||||
|
||||
if (NC_ENABLE) begin
|
||||
VX_nc_bypass #(
|
||||
.NUM_PORTS (NUM_PORTS),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_RSP_TAGS (`CORE_RSP_TAGS),
|
||||
.NC_TAG_BIT (0),
|
||||
|
@ -145,12 +263,12 @@ module VX_cache #(
|
|||
.CORE_TAG_IN_WIDTH (CORE_TAG_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH),
|
||||
.MEM_DATA_SIZE (CACHE_LINE_SIZE),
|
||||
.MEM_TAG_IN_WIDTH (`MEM_ADDR_WIDTH),
|
||||
.MEM_DATA_SIZE (CACHE_LINE_SIZE),
|
||||
.MEM_TAG_IN_WIDTH (MEM_TAG_IN_WIDTH),
|
||||
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH)
|
||||
) nc_bypass (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core request in
|
||||
.core_req_valid_in (core_req_valid),
|
||||
|
@ -178,29 +296,33 @@ module VX_cache #(
|
|||
.core_rsp_ready_in (core_rsp_ready_nc),
|
||||
|
||||
// Core response out
|
||||
.core_rsp_valid_out (core_rsp_valid),
|
||||
.core_rsp_tmask_out (core_rsp_tmask),
|
||||
.core_rsp_data_out (core_rsp_data),
|
||||
.core_rsp_tag_out (core_rsp_tag),
|
||||
.core_rsp_ready_out (core_rsp_ready),
|
||||
.core_rsp_valid_out (core_rsp_valid_sb),
|
||||
.core_rsp_tmask_out (core_rsp_tmask_sb),
|
||||
.core_rsp_data_out (core_rsp_data_sb),
|
||||
.core_rsp_tag_out (core_rsp_tag_sb),
|
||||
.core_rsp_ready_out (core_rsp_ready_sb),
|
||||
|
||||
// Memory request in
|
||||
.mem_req_valid_in (mem_req_valid_nc),
|
||||
.mem_req_rw_in (mem_req_rw_nc),
|
||||
.mem_req_byteen_in (mem_req_byteen_nc),
|
||||
.mem_req_rw_in (mem_req_rw_nc),
|
||||
.mem_req_addr_in (mem_req_addr_nc),
|
||||
.mem_req_pmask_in (mem_req_pmask_nc),
|
||||
.mem_req_byteen_in (mem_req_byteen_nc),
|
||||
.mem_req_wsel_in (mem_req_wsel_nc),
|
||||
.mem_req_data_in (mem_req_data_nc),
|
||||
.mem_req_tag_in (mem_req_tag_nc),
|
||||
.mem_req_ready_in (mem_req_ready_nc),
|
||||
|
||||
// Memory request out
|
||||
.mem_req_valid_out (mem_req_valid),
|
||||
.mem_req_rw_out (mem_req_rw),
|
||||
.mem_req_byteen_out (mem_req_byteen),
|
||||
.mem_req_addr_out (mem_req_addr),
|
||||
.mem_req_data_out (mem_req_data),
|
||||
.mem_req_tag_out (mem_req_tag),
|
||||
.mem_req_ready_out (mem_req_ready),
|
||||
.mem_req_valid_out (mem_req_valid_sb),
|
||||
.mem_req_addr_out (mem_req_addr_sb),
|
||||
.mem_req_rw_out (mem_req_rw_p),
|
||||
.mem_req_pmask_out (mem_req_pmask_p),
|
||||
.mem_req_byteen_out (mem_req_byteen_p),
|
||||
.mem_req_wsel_out (mem_req_wsel_p),
|
||||
.mem_req_data_out (mem_req_data_p),
|
||||
.mem_req_tag_out (mem_req_tag_sb),
|
||||
.mem_req_ready_out (mem_req_ready_sb),
|
||||
|
||||
// Memory response in
|
||||
.mem_rsp_valid_in (mem_rsp_valid),
|
||||
|
@ -223,19 +345,21 @@ module VX_cache #(
|
|||
assign core_req_tag_nc = core_req_tag;
|
||||
assign core_req_ready = core_req_ready_nc;
|
||||
|
||||
assign core_rsp_valid = core_rsp_valid_nc;
|
||||
assign core_rsp_tmask = core_rsp_tmask_nc;
|
||||
assign core_rsp_data = core_rsp_data_nc;
|
||||
assign core_rsp_tag = core_rsp_tag_nc;
|
||||
assign core_rsp_ready_nc = core_rsp_ready;
|
||||
assign core_rsp_valid_sb = core_rsp_valid_nc;
|
||||
assign core_rsp_tmask_sb = core_rsp_tmask_nc;
|
||||
assign core_rsp_data_sb = core_rsp_data_nc;
|
||||
assign core_rsp_tag_sb = core_rsp_tag_nc;
|
||||
assign core_rsp_ready_nc = core_rsp_ready_sb;
|
||||
|
||||
assign mem_req_valid = mem_req_valid_nc;
|
||||
assign mem_req_rw = mem_req_rw_nc;
|
||||
assign mem_req_addr = mem_req_addr_nc;
|
||||
assign mem_req_byteen = mem_req_byteen_nc;
|
||||
assign mem_req_data = mem_req_data_nc;
|
||||
assign mem_req_tag = mem_req_tag_nc;
|
||||
assign mem_req_ready_nc = mem_req_ready;
|
||||
assign mem_req_valid_sb = mem_req_valid_nc;
|
||||
assign mem_req_addr_sb = mem_req_addr_nc;
|
||||
assign mem_req_rw_p = mem_req_rw_nc;
|
||||
assign mem_req_pmask_p = mem_req_pmask_nc;
|
||||
assign mem_req_byteen_p = mem_req_byteen_nc;
|
||||
assign mem_req_wsel_p = mem_req_wsel_nc;
|
||||
assign mem_req_data_p = mem_req_data_nc;
|
||||
assign mem_req_tag_sb = mem_req_tag_nc;
|
||||
assign mem_req_ready_nc = mem_req_ready_sb;
|
||||
|
||||
assign mem_rsp_valid_nc = mem_rsp_valid;
|
||||
assign mem_rsp_data_nc = mem_rsp_data;
|
||||
|
@ -246,17 +370,19 @@ module VX_cache #(
|
|||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_qual;
|
||||
wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_qual;
|
||||
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_qual;
|
||||
|
||||
wire mrsq_out_valid, mrsq_out_ready;
|
||||
|
||||
`RESET_RELAY (mrsq_reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`MEM_ADDR_WIDTH + `CACHE_LINE_WIDTH),
|
||||
.SIZE (MRSQ_SIZE),
|
||||
.OUTPUT_REG (MRSQ_SIZE > 2)
|
||||
.DATAW (MEM_TAG_IN_WIDTH + `CACHE_LINE_WIDTH),
|
||||
.SIZE (MRSQ_SIZE),
|
||||
.OUT_REG (MRSQ_SIZE > 2)
|
||||
) mem_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mrsq_reset),
|
||||
.ready_in (mem_rsp_ready_nc),
|
||||
.valid_in (mem_rsp_valid_nc),
|
||||
.data_in ({mem_rsp_tag_nc, mem_rsp_data_nc}),
|
||||
|
@ -272,13 +398,15 @@ module VX_cache #(
|
|||
wire [`LINE_SELECT_BITS-1:0] flush_addr;
|
||||
wire flush_enable;
|
||||
|
||||
`RESET_RELAY (flush_reset);
|
||||
|
||||
VX_flush_ctrl #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS)
|
||||
) flush_ctrl (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (flush_reset),
|
||||
.addr_out (flush_addr),
|
||||
.valid_out (flush_enable)
|
||||
);
|
||||
|
@ -287,36 +415,38 @@ module VX_cache #(
|
|||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_pmask;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] per_bank_core_req_wsel;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_req_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
|
||||
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
|
||||
wire [NUM_BANKS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_req_tag;
|
||||
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid;
|
||||
wire [NUM_BANKS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_rsp_tag;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_rsp_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
|
||||
wire [NUM_BANKS-1:0][CACHE_LINE_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_mem_req_pmask;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] per_bank_mem_req_wsel;
|
||||
wire [NUM_BANKS-1:0][`MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
|
||||
wire [NUM_BANKS-1:0][`CACHE_LINE_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
`UNUSED_VAR (mem_rsp_tag_qual)
|
||||
assign mrsq_out_ready = per_bank_mem_rsp_ready;
|
||||
end else begin
|
||||
assign mrsq_out_ready = per_bank_mem_rsp_ready[`MEM_ADDR_BANK(mem_rsp_tag_qual)];
|
||||
assign mrsq_out_ready = per_bank_mem_rsp_ready[`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual)];
|
||||
end
|
||||
|
||||
VX_core_req_bank_sel #(
|
||||
|
@ -358,31 +488,34 @@ module VX_cache #(
|
|||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
wire curr_bank_core_req_valid;
|
||||
wire [NUM_PORTS-1:0] curr_bank_core_req_pmask;
|
||||
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] curr_bank_core_req_wsel;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] curr_bank_core_req_wsel;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_req_tid;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_req_tid;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] curr_bank_core_req_tag;
|
||||
wire curr_bank_core_req_rw;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
|
||||
wire [CORE_TAG_X_WIDTH-1:0] curr_bank_core_req_tag;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
|
||||
wire curr_bank_core_req_ready;
|
||||
|
||||
wire curr_bank_core_rsp_valid;
|
||||
wire [NUM_PORTS-1:0] curr_bank_core_rsp_pmask;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_rsp_tid;
|
||||
wire [CORE_TAG_X_WIDTH-1:0] curr_bank_core_rsp_tag;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] curr_bank_core_rsp_tag;
|
||||
wire curr_bank_core_rsp_ready;
|
||||
|
||||
wire curr_bank_mem_req_valid;
|
||||
wire curr_bank_mem_req_rw;
|
||||
wire [CACHE_LINE_SIZE-1:0] curr_bank_mem_req_byteen;
|
||||
wire [NUM_PORTS-1:0] curr_bank_mem_req_pmask;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_mem_req_byteen;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] curr_bank_mem_req_wsel;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
|
||||
wire[`CACHE_LINE_WIDTH-1:0] curr_bank_mem_req_data;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] curr_bank_mem_req_id;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_mem_req_data;
|
||||
wire curr_bank_mem_req_ready;
|
||||
|
||||
wire curr_bank_mem_rsp_valid;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_rsp_addr;
|
||||
wire curr_bank_mem_rsp_valid;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] curr_bank_mem_rsp_id;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] curr_bank_mem_rsp_data;
|
||||
wire curr_bank_mem_rsp_ready;
|
||||
|
||||
|
@ -407,27 +540,31 @@ module VX_cache #(
|
|||
assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data;
|
||||
|
||||
// Memory request
|
||||
assign per_bank_mem_req_valid[i] = curr_bank_mem_req_valid;
|
||||
assign per_bank_mem_req_rw[i] = curr_bank_mem_req_rw;
|
||||
assign per_bank_mem_req_valid[i] = curr_bank_mem_req_valid;
|
||||
assign per_bank_mem_req_rw[i] = curr_bank_mem_req_rw;
|
||||
assign per_bank_mem_req_pmask[i] = curr_bank_mem_req_pmask;
|
||||
assign per_bank_mem_req_byteen[i] = curr_bank_mem_req_byteen;
|
||||
assign per_bank_mem_req_wsel[i] = curr_bank_mem_req_wsel;
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr;
|
||||
end else begin
|
||||
assign per_bank_mem_req_addr[i] = `LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, i);
|
||||
end
|
||||
assign per_bank_mem_req_id[i] = curr_bank_mem_req_id;
|
||||
assign per_bank_mem_req_data[i] = curr_bank_mem_req_data;
|
||||
assign curr_bank_mem_req_ready = per_bank_mem_req_ready[i];
|
||||
assign curr_bank_mem_req_ready = per_bank_mem_req_ready[i];
|
||||
|
||||
// Memory response
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_mem_rsp_valid = mrsq_out_valid;
|
||||
assign curr_bank_mem_rsp_addr = mem_rsp_tag_qual;
|
||||
assign curr_bank_mem_rsp_valid = mrsq_out_valid;
|
||||
end else begin
|
||||
assign curr_bank_mem_rsp_valid = mrsq_out_valid && (`MEM_ADDR_BANK(mem_rsp_tag_qual) == i);
|
||||
assign curr_bank_mem_rsp_addr = `MEM_TO_LINE_ADDR(mem_rsp_tag_qual);
|
||||
assign curr_bank_mem_rsp_valid = mrsq_out_valid && (`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual) == i);
|
||||
end
|
||||
assign curr_bank_mem_rsp_id = `MEM_TAG_TO_REQ_ID(mem_rsp_tag_qual);
|
||||
assign curr_bank_mem_rsp_data = mem_rsp_data_qual;
|
||||
assign per_bank_mem_rsp_ready[i] = curr_bank_mem_rsp_ready;
|
||||
|
||||
`RESET_RELAY (bank_reset);
|
||||
|
||||
VX_bank #(
|
||||
.BANK_ID (i),
|
||||
|
@ -450,7 +587,7 @@ module VX_cache #(
|
|||
`SCOPE_BIND_VX_cache_bank(i)
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (bank_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_read_misses (perf_read_miss_per_bank[i]),
|
||||
|
@ -482,14 +619,17 @@ module VX_cache #(
|
|||
// Memory request
|
||||
.mem_req_valid (curr_bank_mem_req_valid),
|
||||
.mem_req_rw (curr_bank_mem_req_rw),
|
||||
.mem_req_pmask (curr_bank_mem_req_pmask),
|
||||
.mem_req_byteen (curr_bank_mem_req_byteen),
|
||||
.mem_req_wsel (curr_bank_mem_req_wsel),
|
||||
.mem_req_addr (curr_bank_mem_req_addr),
|
||||
.mem_req_id (curr_bank_mem_req_id),
|
||||
.mem_req_data (curr_bank_mem_req_data),
|
||||
.mem_req_ready (curr_bank_mem_req_ready),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (curr_bank_mem_rsp_valid),
|
||||
.mem_rsp_addr (curr_bank_mem_rsp_addr),
|
||||
.mem_rsp_id (curr_bank_mem_rsp_id),
|
||||
.mem_rsp_data (curr_bank_mem_rsp_data),
|
||||
.mem_rsp_ready (curr_bank_mem_rsp_ready),
|
||||
|
||||
|
@ -523,53 +663,66 @@ module VX_cache #(
|
|||
.core_rsp_ready (core_rsp_ready_nc)
|
||||
);
|
||||
|
||||
wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in;
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_rw[i], per_bank_mem_req_byteen[i], per_bank_mem_req_data[i]};
|
||||
wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in;
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_pmask[i], per_bank_mem_req_byteen[i], per_bank_mem_req_wsel[i], per_bank_mem_req_data[i]};
|
||||
end
|
||||
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id;
|
||||
|
||||
`RESET_RELAY (mreq_reset);
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_BANKS),
|
||||
.DATAW (`MEM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH),
|
||||
.BUFFERED (1)
|
||||
.DATAW (`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
|
||||
.TYPE ("R")
|
||||
) mem_req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mreq_reset),
|
||||
.valid_in (per_bank_mem_req_valid),
|
||||
.data_in (data_in),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.valid_out (mem_req_valid_nc),
|
||||
.data_out ({mem_req_addr_nc, mem_req_rw_nc, mem_req_byteen_nc, mem_req_data_nc}),
|
||||
.data_out ({mem_req_addr_nc, mem_req_id, mem_req_rw_nc, mem_req_pmask_nc, mem_req_byteen_nc, mem_req_wsel_nc, mem_req_data_nc}),
|
||||
.ready_out (mem_req_ready_nc)
|
||||
);
|
||||
|
||||
assign mem_req_tag_nc = mem_req_addr_nc;
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'(mem_req_id);
|
||||
end else begin
|
||||
assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'({`MEM_ADDR_TO_BANK_ID(mem_req_addr_nc), mem_req_id});
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// per cycle: core_reads, core_writes
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle;
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_writes_per_cycle;
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
|
||||
|
||||
assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw);
|
||||
assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw);
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw;
|
||||
|
||||
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
|
||||
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask);
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}});
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}};
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
|
||||
end else begin
|
||||
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_valid & ~core_rsp_ready;
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
|
||||
end
|
||||
|
||||
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_read_miss_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_write_miss_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_mshr_stall_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_pipe_stall_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_pipe_stall_per_cycle;
|
||||
|
||||
assign perf_read_miss_per_cycle = $countones(perf_read_miss_per_bank);
|
||||
assign perf_write_miss_per_cycle = $countones(perf_write_miss_per_bank);
|
||||
assign perf_mshr_stall_per_cycle = $countones(perf_mshr_stall_per_bank);
|
||||
assign perf_pipe_stall_per_cycle = $countones(perf_pipe_stall_per_bank);
|
||||
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
|
||||
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
|
||||
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
|
||||
`POP_COUNT(perf_pipe_stall_per_cycle, perf_pipe_stall_per_bank);
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
|
18
hw/rtl/cache/VX_cache_define.vh
vendored
18
hw/rtl/cache/VX_cache_define.vh
vendored
|
@ -9,8 +9,10 @@
|
|||
|
||||
`define REQS_BITS `LOG2UP(NUM_REQS)
|
||||
|
||||
// tag valid tid word_sel
|
||||
`define MSHR_DATA_WIDTH (CORE_TAG_WIDTH + (1 + `REQS_BITS + `UP(`WORD_SELECT_BITS)) * NUM_PORTS)
|
||||
`define PORTS_BITS `LOG2UP(NUM_PORTS)
|
||||
|
||||
// tag valid tid word_sel
|
||||
`define MSHR_DATA_WIDTH ((CORE_TAG_WIDTH + 1 + `REQS_BITS + `UP(`WORD_SELECT_BITS)) * NUM_PORTS)
|
||||
|
||||
`define WORD_WIDTH (8 * WORD_SIZE)
|
||||
|
||||
|
@ -57,14 +59,14 @@
|
|||
|
||||
`define CORE_RSP_TAGS ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQS)
|
||||
|
||||
`define BANK_READY_COUNT ((SHARED_BANK_READY != 0) ? 1 : NUM_BANKS)
|
||||
|
||||
`define MEM_ADDR_BANK(x) x[`BANK_SELECT_BITS+BANK_ADDR_OFFSET-1 : BANK_ADDR_OFFSET]
|
||||
|
||||
`define MEM_TO_LINE_ADDR(x) x[`MEM_ADDR_WIDTH-1 : `BANK_SELECT_BITS]
|
||||
|
||||
`define LINE_TO_MEM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
|
||||
|
||||
`define MEM_ADDR_TO_BANK_ID(x) x[0 +: `BANK_SELECT_BITS]
|
||||
|
||||
`define MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0]
|
||||
|
||||
`define MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `BANK_SELECT_BITS]
|
||||
|
||||
`define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))}
|
||||
|
||||
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
|
|
|
@ -16,9 +16,7 @@ module VX_core_req_bank_sel #(
|
|||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 3,
|
||||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = 0,
|
||||
// shared bank ready signal
|
||||
parameter SHARED_BANK_READY = 0
|
||||
parameter BANK_ADDR_OFFSET = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -43,8 +41,8 @@ module VX_core_req_bank_sel #(
|
|||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen,
|
||||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data,
|
||||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid,
|
||||
output wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag,
|
||||
input wire [`BANK_READY_COUNT-1:0] per_bank_core_req_ready
|
||||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag,
|
||||
input wire [NUM_BANKS-1:0] per_bank_core_req_ready
|
||||
);
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
|
||||
|
@ -80,9 +78,9 @@ module VX_core_req_bank_sel #(
|
|||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r;
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r;
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r;
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r;
|
||||
reg [NUM_BANKS-1:0] per_bank_core_req_rw_r;
|
||||
reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_r;
|
||||
reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r;
|
||||
reg [NUM_REQS-1:0] core_req_ready_r;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
@ -101,7 +99,7 @@ module VX_core_req_bank_sel #(
|
|||
end
|
||||
end
|
||||
|
||||
for (genvar i = NUM_REQS-1; i >= 0; --i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_line_match[i] = (core_req_line_addr[i] == per_bank_line_addr_r[core_req_bid[i]]);
|
||||
end
|
||||
|
||||
|
@ -129,30 +127,19 @@ module VX_core_req_bank_sel #(
|
|||
per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i];
|
||||
per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i];
|
||||
per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i);
|
||||
per_bank_core_req_tag_r[core_req_bid[i]][i % NUM_PORTS] = core_req_tag[i];
|
||||
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
|
||||
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
|
||||
per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i];
|
||||
|
||||
req_select_table_r[core_req_bid[i]][i % NUM_PORTS] = (1 << i);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (SHARED_BANK_READY == 0) begin
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
|
||||
&& core_req_line_match[i]
|
||||
&& req_select_table_r[core_req_bid[i]][i % NUM_PORTS][i];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
core_req_ready_r[i] = per_bank_core_req_ready
|
||||
&& core_req_line_match[i]
|
||||
&& req_select_table_r[core_req_bid[i]][i % NUM_PORTS][i];
|
||||
end
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
|
||||
&& core_req_line_match[i]
|
||||
&& req_select_table_r[core_req_bid[i]][i % NUM_PORTS][i];
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -177,32 +164,17 @@ module VX_core_req_bank_sel #(
|
|||
per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i];
|
||||
per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i];
|
||||
per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i);
|
||||
per_bank_core_req_tag_r[core_req_bid[i]][i % NUM_PORTS] = core_req_tag[i];
|
||||
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
|
||||
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
|
||||
per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i];
|
||||
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (SHARED_BANK_READY == 0) begin
|
||||
always @(*) begin
|
||||
core_req_ready_r = 'x;
|
||||
for (integer i = NUM_REQS-1; i >= 0; --i) begin
|
||||
if (core_req_valid[i]) begin
|
||||
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
|
||||
&& core_req_line_match[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
core_req_ready_r = 'x;
|
||||
for (integer i = NUM_REQS-1; i >= 0; --i) begin
|
||||
if (core_req_valid[i]) begin
|
||||
core_req_ready_r[i] = per_bank_core_req_ready
|
||||
&& core_req_line_match[i];
|
||||
end
|
||||
end
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
|
||||
&& core_req_line_match[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -236,22 +208,11 @@ module VX_core_req_bank_sel #(
|
|||
end
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
if (SHARED_BANK_READY == 0) begin
|
||||
always @(*) begin
|
||||
core_req_ready_r = 0;
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid_r[i]) begin
|
||||
core_req_ready_r[per_bank_core_req_tid_r[i]] = per_bank_core_req_ready[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
core_req_ready_r = 0;
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid_r[i]) begin
|
||||
core_req_ready_r[per_bank_core_req_tid_r[i]] = per_bank_core_req_ready;
|
||||
end
|
||||
always @(*) begin
|
||||
core_req_ready_r = 0;
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid_r[i]) begin
|
||||
core_req_ready_r[per_bank_core_req_tid_r[i]] = per_bank_core_req_ready[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -320,33 +281,26 @@ module VX_core_req_bank_sel #(
|
|||
`ifdef PERF_ENABLE
|
||||
reg [NUM_REQS-1:0] core_req_sel_r;
|
||||
|
||||
if (SHARED_BANK_READY == 0) begin
|
||||
always @(*) begin
|
||||
core_req_sel_r = 0;
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (core_req_valid[i]) begin
|
||||
core_req_sel_r[i] = per_bank_core_req_ready[core_req_bid[i]];
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
core_req_sel_r = 0;
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (core_req_valid[i]) begin
|
||||
core_req_sel_r[i] = per_bank_core_req_ready;
|
||||
end
|
||||
always @(*) begin
|
||||
core_req_sel_r = 0;
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (core_req_valid[i]) begin
|
||||
core_req_sel_r[i] = per_bank_core_req_ready[core_req_bid[i]];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] bank_stalls_r;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] bank_stall_cnt;
|
||||
|
||||
wire [NUM_REQS-1:0] bank_stall_mask = core_req_sel_r & ~core_req_ready;
|
||||
`POP_COUNT(bank_stall_cnt, bank_stall_mask);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
bank_stalls_r <= 0;
|
||||
end else begin
|
||||
bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'($countones(core_req_sel_r & ~core_req_ready));
|
||||
bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'(bank_stall_cnt);
|
||||
end
|
||||
end
|
||||
|
350
hw/rtl/cache/VX_core_rsp_merge.sv
vendored
Normal file
350
hw/rtl/cache/VX_core_rsp_merge.sv
vendored
Normal file
|
@ -0,0 +1,350 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_core_rsp_merge #(
|
||||
parameter CACHE_ID = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 1,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of ports per banks
|
||||
parameter NUM_PORTS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
// size of tag id in core request tag
|
||||
parameter CORE_TAG_ID_BITS = 0,
|
||||
// output register
|
||||
parameter OUT_REG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Per Bank WB
|
||||
input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
|
||||
output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
|
||||
|
||||
// Core Response
|
||||
output wire [`CORE_RSP_TAGS-1:0] core_rsp_valid,
|
||||
output wire [NUM_REQS-1:0] core_rsp_tmask,
|
||||
output wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
input wire [`CORE_RSP_TAGS-1:0] core_rsp_ready
|
||||
);
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
|
||||
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
|
||||
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
|
||||
reg [NUM_BANKS-1:0] per_bank_core_rsp_ready_r;
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
|
||||
// The core response bus handles a single tag at the time
|
||||
// We first need to select the current tag to process,
|
||||
// then send all bank responses for that tag as a batch
|
||||
|
||||
wire [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||
wire core_rsp_ready_unqual;
|
||||
|
||||
if (NUM_PORTS > 1) begin
|
||||
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_r, per_bank_core_rsp_sent;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_n;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign per_bank_core_rsp_sent_n[i] = per_bank_core_rsp_sent_r[i] | per_bank_core_rsp_sent[i];
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
per_bank_core_rsp_sent_r <= '0;
|
||||
end else begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]) begin
|
||||
per_bank_core_rsp_sent_r[i] <= '0;
|
||||
end else begin
|
||||
per_bank_core_rsp_sent_r[i] <= per_bank_core_rsp_sent_n[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_valid_p;
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar p = 0; p < NUM_PORTS; ++p) begin
|
||||
assign per_bank_core_rsp_valid_p[i][p] = per_bank_core_rsp_valid[i]
|
||||
&& per_bank_core_rsp_pmask[i][p]
|
||||
&& !per_bank_core_rsp_sent_r[i][p];
|
||||
end
|
||||
end
|
||||
|
||||
VX_find_first #(
|
||||
.N (NUM_BANKS * NUM_PORTS),
|
||||
.DATAW (CORE_TAG_WIDTH)
|
||||
) find_first (
|
||||
.valid_i (per_bank_core_rsp_valid_p),
|
||||
.data_i (per_bank_core_rsp_tag),
|
||||
.data_o (core_rsp_tag_unqual),
|
||||
`UNUSED_PIN (valid_o)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 'x;
|
||||
per_bank_core_rsp_sent = 0;
|
||||
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (integer p = 0; p < NUM_PORTS; ++p) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& per_bank_core_rsp_pmask[i][p]
|
||||
&& !per_bank_core_rsp_sent_r[i][p]
|
||||
&& (per_bank_core_rsp_tag[i][p][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
|
||||
per_bank_core_rsp_sent[i][p] = core_rsp_ready_unqual;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
per_bank_core_rsp_ready_r[i] = (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]);
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (per_bank_core_rsp_pmask)
|
||||
|
||||
VX_find_first #(
|
||||
.N (NUM_BANKS),
|
||||
.DATAW (CORE_TAG_WIDTH)
|
||||
) find_first (
|
||||
.valid_i (per_bank_core_rsp_valid),
|
||||
.data_i (per_bank_core_rsp_tag),
|
||||
.data_o (core_rsp_tag_unqual),
|
||||
`UNUSED_PIN (valid_o)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 'x;
|
||||
per_bank_core_rsp_ready_r = 0;
|
||||
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& (per_bank_core_rsp_tag[i][0][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
per_bank_core_rsp_ready_r[i] = core_rsp_ready_unqual;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
|
||||
.PASSTHRU (0 == OUT_REG)
|
||||
) out_sbuf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_valid_any),
|
||||
.data_in ({core_rsp_valid_unqual, core_rsp_tag_unqual, core_rsp_data_unqual}),
|
||||
.ready_in (core_rsp_ready_unqual),
|
||||
.valid_out (core_rsp_valid),
|
||||
.data_out ({core_rsp_tmask, core_rsp_tag, core_rsp_data}),
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
reg [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_unqual;
|
||||
|
||||
if (NUM_PORTS > 1) begin
|
||||
|
||||
reg [NUM_REQS-1:0][(`PORTS_BITS + `BANK_SELECT_BITS)-1:0] bank_select_table;
|
||||
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_r, per_bank_core_rsp_sent;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_n;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign per_bank_core_rsp_sent_n[i] = per_bank_core_rsp_sent_r[i] | per_bank_core_rsp_sent[i];
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
per_bank_core_rsp_sent_r <= '0;
|
||||
end else begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]) begin
|
||||
per_bank_core_rsp_sent_r[i] <= '0;
|
||||
end else begin
|
||||
per_bank_core_rsp_sent_r[i] <= per_bank_core_rsp_sent_n[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = '0;
|
||||
core_rsp_tag_unqual = 'x;
|
||||
core_rsp_data_unqual = 'x;
|
||||
bank_select_table = 'x;
|
||||
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
for (integer p = 0; p < NUM_PORTS; ++p) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& per_bank_core_rsp_pmask[i][p]
|
||||
&& !per_bank_core_rsp_sent_r[i][p]) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
|
||||
core_rsp_tag_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_tag[i][p];
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
|
||||
bank_select_table[per_bank_core_rsp_tid[i][p]] = {`PORTS_BITS'(p), `BANK_SELECT_BITS'(i)};
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
per_bank_core_rsp_sent = '0;
|
||||
for (integer i = 0; i < NUM_REQS; i++) begin
|
||||
if (core_rsp_valid_unqual[i]) begin
|
||||
per_bank_core_rsp_sent[bank_select_table[i][0 +: `BANK_SELECT_BITS]][bank_select_table[i][`BANK_SELECT_BITS +: `PORTS_BITS]] = core_rsp_ready_unqual[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
per_bank_core_rsp_ready_r[i] = (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]);
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (per_bank_core_rsp_pmask)
|
||||
reg [NUM_REQS-1:0][NUM_BANKS-1:0] bank_select_table;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_tag_unqual = 'x;
|
||||
core_rsp_data_unqual = 'x;
|
||||
bank_select_table = 'x;
|
||||
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
if (per_bank_core_rsp_valid[i]) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
bank_select_table[per_bank_core_rsp_tid[i]] = (1 << i);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
per_bank_core_rsp_ready_r[i] = core_rsp_ready_unqual[per_bank_core_rsp_tid[i]]
|
||||
&& bank_select_table[per_bank_core_rsp_tid[i]][i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
|
||||
.PASSTHRU (0 == OUT_REG)
|
||||
) out_sbuf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_valid_unqual[i]),
|
||||
.data_in ({core_rsp_tag_unqual[i], core_rsp_data_unqual[i]}),
|
||||
.ready_in (core_rsp_ready_unqual[i]),
|
||||
.valid_out (core_rsp_valid[i]),
|
||||
.data_out ({core_rsp_tag[i],core_rsp_data[i]}),
|
||||
.ready_out (core_rsp_ready[i])
|
||||
);
|
||||
end
|
||||
|
||||
assign core_rsp_tmask = core_rsp_valid;
|
||||
|
||||
end
|
||||
|
||||
assign per_bank_core_rsp_ready = per_bank_core_rsp_ready_r;
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (per_bank_core_rsp_pmask)
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
reg [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
|
||||
reg [NUM_REQS-1:0] core_rsp_tmask_unqual;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_tmask_unqual = 0;
|
||||
core_rsp_tmask_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
|
||||
|
||||
core_rsp_tag_unqual = per_bank_core_rsp_tag;
|
||||
|
||||
core_rsp_data_unqual = 'x;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
|
||||
end
|
||||
|
||||
assign core_rsp_valid = per_bank_core_rsp_valid;
|
||||
assign core_rsp_tmask = core_rsp_tmask_unqual;
|
||||
assign per_bank_core_rsp_ready = core_rsp_ready;
|
||||
|
||||
end else begin
|
||||
|
||||
reg [`CORE_RSP_TAGS-1:0] core_rsp_valid_unqual;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
|
||||
|
||||
core_rsp_tag_unqual = 'x;
|
||||
core_rsp_tag_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_tag;
|
||||
|
||||
core_rsp_data_unqual = 'x;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
|
||||
end
|
||||
|
||||
assign core_rsp_valid = core_rsp_valid_unqual;
|
||||
assign core_rsp_tmask = core_rsp_valid_unqual;
|
||||
assign per_bank_core_rsp_ready = core_rsp_ready[per_bank_core_rsp_tid];
|
||||
|
||||
end
|
||||
|
||||
assign core_rsp_tag = core_rsp_tag_unqual;
|
||||
assign core_rsp_data = core_rsp_data_unqual;
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR(per_bank_core_rsp_tid)
|
||||
assign core_rsp_valid = per_bank_core_rsp_valid;
|
||||
assign core_rsp_tmask = per_bank_core_rsp_valid;
|
||||
assign core_rsp_tag = per_bank_core_rsp_tag;
|
||||
assign core_rsp_data = per_bank_core_rsp_data;
|
||||
assign per_bank_core_rsp_ready = core_rsp_ready;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
239
hw/rtl/cache/VX_core_rsp_merge.v
vendored
239
hw/rtl/cache/VX_core_rsp_merge.v
vendored
|
@ -1,239 +0,0 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_core_rsp_merge #(
|
||||
parameter CACHE_ID = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 1,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of ports per banks
|
||||
parameter NUM_PORTS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
// size of tag id in core request tag
|
||||
parameter CORE_TAG_ID_BITS = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Per Bank WB
|
||||
input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,
|
||||
input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
|
||||
output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
|
||||
|
||||
// Core Response
|
||||
output wire [`CORE_RSP_TAGS-1:0] core_rsp_valid,
|
||||
output wire [NUM_REQS-1:0] core_rsp_tmask,
|
||||
output wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
input wire [`CORE_RSP_TAGS-1:0] core_rsp_ready
|
||||
);
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
|
||||
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
|
||||
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
|
||||
reg [NUM_BANKS-1:0] core_rsp_bank_select;
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
|
||||
// The core response bus handles a single tag at the time
|
||||
// We first need to select the current tag to process,
|
||||
// then send all bank responses for that tag as a batch
|
||||
|
||||
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||
wire core_rsp_ready_unqual;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_tag_unqual = 'x;
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
if (per_bank_core_rsp_valid[i]) begin
|
||||
core_rsp_tag_unqual = per_bank_core_rsp_tag[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (NUM_PORTS > 1) begin
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 'x;
|
||||
core_rsp_bank_select = 0;
|
||||
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
for (integer p = 0; p < NUM_PORTS; p++) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& per_bank_core_rsp_pmask[i][p]
|
||||
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
|
||||
core_rsp_bank_select[i] = core_rsp_ready_unqual;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (per_bank_core_rsp_pmask)
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 'x;
|
||||
core_rsp_bank_select = 0;
|
||||
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
core_rsp_bank_select[i] = core_rsp_ready_unqual;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH))
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_valid_any),
|
||||
.data_in ({core_rsp_valid_unqual, core_rsp_tag_unqual, core_rsp_data_unqual}),
|
||||
.ready_in (core_rsp_ready_unqual),
|
||||
.valid_out (core_rsp_valid),
|
||||
.data_out ({core_rsp_tmask, core_rsp_tag, core_rsp_data}),
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (per_bank_core_rsp_pmask)
|
||||
|
||||
reg [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||
reg [NUM_REQS-1:0][NUM_BANKS-1:0] bank_select_table;
|
||||
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_unqual;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_tag_unqual = 'x;
|
||||
core_rsp_data_unqual = 'x;
|
||||
bank_select_table = 'x;
|
||||
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
if (per_bank_core_rsp_valid[i]) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
bank_select_table[per_bank_core_rsp_tid[i]] = (1 << i);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
core_rsp_bank_select[i] = core_rsp_ready_unqual[per_bank_core_rsp_tid[i]]
|
||||
&& bank_select_table[per_bank_core_rsp_tid[i]][i];
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_valid_unqual[i]),
|
||||
.data_in ({core_rsp_tag_unqual[i], core_rsp_data_unqual[i]}),
|
||||
.ready_in (core_rsp_ready_unqual[i]),
|
||||
.valid_out (core_rsp_valid[i]),
|
||||
.data_out ({core_rsp_tag[i],core_rsp_data[i]}),
|
||||
.ready_out (core_rsp_ready[i])
|
||||
);
|
||||
end
|
||||
|
||||
assign core_rsp_tmask = core_rsp_valid;
|
||||
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
assign per_bank_core_rsp_ready[i] = core_rsp_bank_select[i];
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (per_bank_core_rsp_pmask)
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
reg [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
|
||||
reg [NUM_REQS-1:0] core_rsp_tmask_unqual;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_tmask_unqual = 0;
|
||||
core_rsp_tmask_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
|
||||
|
||||
core_rsp_tag_unqual = per_bank_core_rsp_tag;
|
||||
|
||||
core_rsp_data_unqual = 'x;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
|
||||
end
|
||||
|
||||
assign core_rsp_valid = per_bank_core_rsp_valid;
|
||||
assign core_rsp_tmask = core_rsp_tmask_unqual;
|
||||
assign per_bank_core_rsp_ready = core_rsp_ready;
|
||||
|
||||
end else begin
|
||||
|
||||
reg [`CORE_RSP_TAGS-1:0] core_rsp_valid_unqual;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
|
||||
|
||||
core_rsp_tag_unqual = 'x;
|
||||
core_rsp_tag_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_tag;
|
||||
|
||||
core_rsp_data_unqual = 'x;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
|
||||
end
|
||||
|
||||
assign core_rsp_valid = core_rsp_valid_unqual;
|
||||
assign core_rsp_tmask = core_rsp_valid_unqual;
|
||||
assign per_bank_core_rsp_ready = core_rsp_ready[per_bank_core_rsp_tid];
|
||||
|
||||
end
|
||||
|
||||
assign core_rsp_tag = core_rsp_tag_unqual;
|
||||
assign core_rsp_data = core_rsp_data_unqual;
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR(per_bank_core_rsp_tid)
|
||||
assign core_rsp_valid = per_bank_core_rsp_valid;
|
||||
assign core_rsp_tmask = per_bank_core_rsp_valid;
|
||||
assign core_rsp_tag = per_bank_core_rsp_tag;
|
||||
assign core_rsp_data = per_bank_core_rsp_data;
|
||||
assign per_bank_core_rsp_ready = core_rsp_ready;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
136
hw/rtl/cache/VX_data_access.sv
vendored
Normal file
136
hw/rtl/cache/VX_data_access.sv
vendored
Normal file
|
@ -0,0 +1,136 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_data_access #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 1,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of ports per banks
|
||||
parameter NUM_PORTS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[31:0] debug_pc,
|
||||
input wire[`NW_BITS-1:0] debug_wid,
|
||||
`IGNORE_UNUSED_END
|
||||
`endif
|
||||
|
||||
input wire stall,
|
||||
|
||||
input wire read,
|
||||
input wire fill,
|
||||
input wire write,
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] addr,
|
||||
input wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] wsel,
|
||||
input wire [NUM_PORTS-1:0] pmask,
|
||||
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen,
|
||||
input wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] fill_data,
|
||||
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] write_data,
|
||||
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] read_data
|
||||
);
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_PARAM (WORD_SIZE)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (addr)
|
||||
`UNUSED_VAR (read)
|
||||
|
||||
localparam BYTEENW = WRITE_ENABLE ? CACHE_LINE_SIZE : 1;
|
||||
|
||||
wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] rdata;
|
||||
wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] wdata;
|
||||
wire [BYTEENW-1:0] wren;
|
||||
|
||||
wire [`LINE_SELECT_BITS-1:0] line_addr = addr[`LINE_SELECT_BITS-1:0];
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
reg [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] wdata_r;
|
||||
reg [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
|
||||
if (NUM_PORTS > 1) begin
|
||||
always @(*) begin
|
||||
wdata_r = 'x;
|
||||
wren_r = 0;
|
||||
for (integer i = 0; i < NUM_PORTS; ++i) begin
|
||||
if (pmask[i]) begin
|
||||
wdata_r[wsel[i]] = write_data[i];
|
||||
wren_r[wsel[i]] = byteen[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (pmask)
|
||||
always @(*) begin
|
||||
wdata_r = {`WORDS_PER_LINE{write_data}};
|
||||
wren_r = 0;
|
||||
wren_r[wsel] = byteen;
|
||||
end
|
||||
end
|
||||
assign wdata = write ? wdata_r : fill_data;
|
||||
assign wren = write ? wren_r : {BYTEENW{fill}};
|
||||
end else begin
|
||||
`UNUSED_VAR (wsel)
|
||||
`UNUSED_VAR (pmask)
|
||||
assign wdata = write ? write_data : fill_data;
|
||||
assign wren = write ? byteen : {BYTEENW{fill}};
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (byteen)
|
||||
`UNUSED_VAR (pmask)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign wdata = fill_data;
|
||||
assign wren = fill;
|
||||
end
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CACHE_LINE_WIDTH),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.BYTEENW (BYTEENW),
|
||||
.NO_RWCHECK (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.addr (line_addr),
|
||||
.wren (wren),
|
||||
.wdata (wdata),
|
||||
.rdata (rdata)
|
||||
);
|
||||
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
for (genvar i = 0; i < NUM_PORTS; ++i) begin
|
||||
assign read_data[i] = rdata[wsel[i]];
|
||||
end
|
||||
end else begin
|
||||
assign read_data = rdata;
|
||||
end
|
||||
|
||||
`UNUSED_VAR (stall)
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_DATA
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
dpi_trace("%d: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, fill_data);
|
||||
end
|
||||
if (read && ~stall) begin
|
||||
dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, read_data);
|
||||
end
|
||||
if (write && ~stall) begin
|
||||
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byteen, line_addr, write_data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
93
hw/rtl/cache/VX_data_access.v
vendored
93
hw/rtl/cache/VX_data_access.v
vendored
|
@ -1,93 +0,0 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_data_access #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 1,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[31:0] debug_pc,
|
||||
input wire[`NW_BITS-1:0] debug_wid,
|
||||
`IGNORE_WARNINGS_END
|
||||
`endif
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] addr,
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
// reading
|
||||
input wire readen,
|
||||
output wire [`CACHE_LINE_WIDTH-1:0] rdata,
|
||||
|
||||
// writing
|
||||
input wire writeen,
|
||||
input wire is_fill,
|
||||
input wire [CACHE_LINE_SIZE-1:0] byteen,
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] wdata
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_PARAM (WORD_SIZE)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (readen)
|
||||
|
||||
localparam BYTEENW = WRITE_ENABLE ? CACHE_LINE_SIZE : 1;
|
||||
|
||||
wire [`LINE_SELECT_BITS-1:0] line_addr;
|
||||
wire [BYTEENW-1:0] byte_enable;
|
||||
|
||||
assign line_addr = addr[`LINE_SELECT_BITS-1:0];
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
assign byte_enable = is_fill ? {BYTEENW{1'b1}} : byteen;
|
||||
end else begin
|
||||
`UNUSED_VAR (byteen)
|
||||
`UNUSED_VAR (is_fill)
|
||||
assign byte_enable = 1'b1;
|
||||
end
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (CACHE_LINE_SIZE * 8),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.BYTEENW (BYTEENW),
|
||||
.RWCHECK (1)
|
||||
) data_store (
|
||||
.clk(clk),
|
||||
.addr(line_addr),
|
||||
.wren(writeen),
|
||||
.byteen(byte_enable),
|
||||
.rden(1'b1),
|
||||
.din(wdata),
|
||||
.dout(rdata)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_DATA
|
||||
always @(posedge clk) begin
|
||||
if (writeen) begin
|
||||
if (is_fill) begin
|
||||
$display("%t: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, wdata);
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byte_enable, line_addr, wdata);
|
||||
end
|
||||
end
|
||||
if (readen) begin
|
||||
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, rdata);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
240
hw/rtl/cache/VX_miss_resrv.sv
vendored
Normal file
240
hw/rtl/cache/VX_miss_resrv.sv
vendored
Normal file
|
@ -0,0 +1,240 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_miss_resrv #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 1,
|
||||
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 1,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of ports per banks
|
||||
parameter NUM_PORTS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 1,
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[31:0] deq_debug_pc,
|
||||
input wire[`NW_BITS-1:0] deq_debug_wid,
|
||||
input wire[31:0] lkp_debug_pc,
|
||||
input wire[`NW_BITS-1:0] lkp_debug_wid,
|
||||
input wire[31:0] rel_debug_pc,
|
||||
input wire[`NW_BITS-1:0] rel_debug_wid,
|
||||
`IGNORE_UNUSED_END
|
||||
`endif
|
||||
|
||||
// allocate
|
||||
input wire allocate_valid,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] allocate_addr,
|
||||
input wire [`MSHR_DATA_WIDTH-1:0] allocate_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] allocate_id,
|
||||
output wire allocate_ready,
|
||||
|
||||
// fill
|
||||
input wire fill_valid,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] fill_id,
|
||||
output wire [`LINE_ADDR_WIDTH-1:0] fill_addr,
|
||||
|
||||
// lookup
|
||||
input wire lookup_valid,
|
||||
input wire lookup_replay,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] lookup_id,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] lookup_addr,
|
||||
output wire lookup_match,
|
||||
|
||||
// dequeue
|
||||
output wire dequeue_valid,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] dequeue_id,
|
||||
output wire [`LINE_ADDR_WIDTH-1:0] dequeue_addr,
|
||||
output wire [`MSHR_DATA_WIDTH-1:0] dequeue_data,
|
||||
input wire dequeue_ready,
|
||||
|
||||
// release
|
||||
input wire release_valid,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] release_id
|
||||
);
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
|
||||
reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table, addr_table_n;
|
||||
reg [MSHR_SIZE-1:0] valid_table, valid_table_n;
|
||||
reg [MSHR_SIZE-1:0] ready_table, ready_table_n;
|
||||
|
||||
reg allocate_rdy_r, allocate_rdy_n;
|
||||
reg [MSHR_ADDR_WIDTH-1:0] allocate_id_r, allocate_id_n;
|
||||
|
||||
reg dequeue_val_r, dequeue_val_n, dequeue_val_x;
|
||||
reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n, dequeue_id_x;
|
||||
|
||||
reg [MSHR_SIZE-1:0] valid_table_x;
|
||||
reg [MSHR_SIZE-1:0] ready_table_x;
|
||||
|
||||
wire [MSHR_SIZE-1:0] addr_matches;
|
||||
|
||||
wire allocate_fire = allocate_valid && allocate_ready;
|
||||
|
||||
wire dequeue_fire = dequeue_valid && dequeue_ready;
|
||||
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
|
||||
assign addr_matches[i] = (addr_table[i] == lookup_addr);
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
valid_table_x = valid_table;
|
||||
ready_table_x = ready_table;
|
||||
if (dequeue_fire) begin
|
||||
valid_table_x[dequeue_id] = 0;
|
||||
end
|
||||
if (lookup_replay) begin
|
||||
ready_table_x |= addr_matches;
|
||||
end
|
||||
end
|
||||
|
||||
VX_lzc #(
|
||||
.N (MSHR_SIZE)
|
||||
) dequeue_sel (
|
||||
.in_i (valid_table_x & ready_table_x),
|
||||
.cnt_o (dequeue_id_x),
|
||||
.valid_o (dequeue_val_x)
|
||||
);
|
||||
|
||||
VX_lzc #(
|
||||
.N (MSHR_SIZE)
|
||||
) allocate_sel (
|
||||
.in_i (~valid_table_n),
|
||||
.cnt_o (allocate_id_n),
|
||||
.valid_o (allocate_rdy_n)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
valid_table_n = valid_table_x;
|
||||
ready_table_n = ready_table_x;
|
||||
addr_table_n = addr_table;
|
||||
dequeue_val_n = dequeue_val_r;
|
||||
dequeue_id_n = dequeue_id_r;
|
||||
|
||||
if (dequeue_fire) begin
|
||||
dequeue_val_n = dequeue_val_x;
|
||||
dequeue_id_n = dequeue_id_x;
|
||||
end
|
||||
|
||||
if (allocate_fire) begin
|
||||
valid_table_n[allocate_id] = 1;
|
||||
ready_table_n[allocate_id] = 0;
|
||||
addr_table_n[allocate_id] = allocate_addr;
|
||||
end
|
||||
|
||||
if (fill_valid) begin
|
||||
dequeue_val_n = 1;
|
||||
dequeue_id_n = fill_id;
|
||||
end
|
||||
|
||||
if (release_valid) begin
|
||||
valid_table_n[release_id] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_table <= 0;
|
||||
allocate_rdy_r <= 0;
|
||||
dequeue_val_r <= 0;
|
||||
end else begin
|
||||
valid_table <= valid_table_n;
|
||||
allocate_rdy_r <= allocate_rdy_n;
|
||||
dequeue_val_r <= dequeue_val_n;
|
||||
end
|
||||
ready_table <= ready_table_n;
|
||||
addr_table <= addr_table_n;
|
||||
dequeue_id_r <= dequeue_id_n;
|
||||
allocate_id_r <= allocate_id_n;
|
||||
|
||||
`ASSERT(!allocate_fire || !valid_table[allocate_id_r], ("runtime error"));
|
||||
`ASSERT(!release_valid || valid_table[release_id], ("runtime error"));
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT((!allocate_fire || ~valid_table[allocate_id]), ("%t: *** cache%0d:%0d in-use allocation: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id))
|
||||
|
||||
`RUNTIME_ASSERT((!fill_valid || valid_table[fill_id]), ("%t: *** cache%0d:%0d invalid fill: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id))
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (`MSHR_DATA_WIDTH),
|
||||
.SIZE (MSHR_SIZE),
|
||||
.LUTRAM (1)
|
||||
) entries (
|
||||
.clk (clk),
|
||||
.waddr (allocate_id_r),
|
||||
.raddr (dequeue_id_r),
|
||||
.wren (allocate_valid),
|
||||
.wdata (allocate_data),
|
||||
.rdata (dequeue_data)
|
||||
);
|
||||
|
||||
assign fill_addr = addr_table[fill_id];
|
||||
|
||||
assign allocate_ready = allocate_rdy_r;
|
||||
assign allocate_id = allocate_id_r;
|
||||
|
||||
assign dequeue_valid = dequeue_val_r;
|
||||
assign dequeue_id = dequeue_id_r;
|
||||
assign dequeue_addr = addr_table[dequeue_id_r];
|
||||
|
||||
wire [MSHR_SIZE-1:0] lookup_entries;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
|
||||
assign lookup_entries[i] = (i != lookup_id);
|
||||
end
|
||||
assign lookup_match = |(lookup_entries & valid_table & addr_matches);
|
||||
|
||||
`UNUSED_VAR (lookup_valid)
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_MSHR
|
||||
always @(posedge clk) begin
|
||||
if (allocate_fire || fill_valid || dequeue_fire || lookup_replay || lookup_valid || release_valid) begin
|
||||
if (allocate_fire)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id, deq_debug_wid, deq_debug_pc);
|
||||
if (fill_valid)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-fill: addr=%0h, id=%0d, addr=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id, `LINE_TO_BYTE_ADDR(fill_addr, BANK_ID));
|
||||
if (dequeue_fire)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_debug_wid, deq_debug_pc);
|
||||
if (lookup_replay)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-replay: addr=%0h, id=%0d\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id);
|
||||
if (lookup_valid)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-lookup: addr=%0h, id=%0d, match=%b, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id, lookup_match, lkp_debug_wid, lkp_debug_pc);
|
||||
if (release_valid)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-release id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
release_id, rel_debug_wid, rel_debug_pc);
|
||||
dpi_trace("%d: cache%0d:%0d mshr-table", $time, CACHE_ID, BANK_ID);
|
||||
for (integer i = 0; i < MSHR_SIZE; ++i) begin
|
||||
if (valid_table[i]) begin
|
||||
dpi_trace(" ");
|
||||
if (ready_table[i])
|
||||
dpi_trace("*");
|
||||
dpi_trace("%0d=%0h", i, `LINE_TO_BYTE_ADDR(addr_table[i], BANK_ID));
|
||||
end
|
||||
end
|
||||
dpi_trace("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
233
hw/rtl/cache/VX_miss_resrv.v
vendored
233
hw/rtl/cache/VX_miss_resrv.v
vendored
|
@ -1,233 +0,0 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_miss_resrv #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 1,
|
||||
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 1,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of ports per banks
|
||||
parameter NUM_PORTS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 1,
|
||||
parameter ALM_FULL = (MSHR_SIZE-1),
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[31:0] deq_debug_pc,
|
||||
input wire[`NW_BITS-1:0] deq_debug_wid,
|
||||
input wire[31:0] enq_debug_pc,
|
||||
input wire[`NW_BITS-1:0] enq_debug_wid,
|
||||
`IGNORE_WARNINGS_END
|
||||
`endif
|
||||
|
||||
// enqueue
|
||||
input wire enqueue,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] enqueue_addr,
|
||||
input wire [`MSHR_DATA_WIDTH-1:0] enqueue_data,
|
||||
input wire enqueue_is_mshr,
|
||||
input wire enqueue_as_ready,
|
||||
output wire enqueue_full,
|
||||
output wire enqueue_almfull,
|
||||
|
||||
// fill
|
||||
input wire fill_start,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] fill_addr,
|
||||
|
||||
// lookup
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] lookup_addr,
|
||||
output wire lookup_match,
|
||||
input wire lookup_fill,
|
||||
|
||||
// schedule
|
||||
input wire schedule,
|
||||
output wire schedule_valid,
|
||||
output wire [`LINE_ADDR_WIDTH-1:0] schedule_addr,
|
||||
output wire [`MSHR_DATA_WIDTH-1:0] schedule_data,
|
||||
|
||||
// dequeue
|
||||
input wire dequeue
|
||||
);
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
localparam ADDRW = $clog2(MSHR_SIZE);
|
||||
|
||||
reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
|
||||
|
||||
reg [MSHR_SIZE-1:0] valid_table, valid_table_n;
|
||||
reg [MSHR_SIZE-1:0] ready_table, ready_table_n;
|
||||
reg [ADDRW-1:0] head_ptr, head_ptr_n;
|
||||
reg [ADDRW-1:0] tail_ptr, tail_ptr_n;
|
||||
reg [ADDRW-1:0] restore_ptr, restore_ptr_n;
|
||||
reg [ADDRW-1:0] schedule_ptr, schedule_ptr_n;
|
||||
reg [ADDRW-1:0] used_r;
|
||||
reg alm_full_r, full_r;
|
||||
reg valid_out_r;
|
||||
|
||||
wire [MSHR_SIZE-1:0] valid_address_match;
|
||||
for (genvar i = 0; i < MSHR_SIZE; i++) begin
|
||||
assign valid_address_match[i] = valid_table[i] && (addr_table[i] == lookup_addr);
|
||||
end
|
||||
|
||||
wire push_new = enqueue && !enqueue_is_mshr;
|
||||
|
||||
wire restore = enqueue && enqueue_is_mshr;
|
||||
|
||||
always @(*) begin
|
||||
valid_table_n = valid_table;
|
||||
ready_table_n = ready_table;
|
||||
head_ptr_n = head_ptr;
|
||||
tail_ptr_n = tail_ptr;
|
||||
schedule_ptr_n = schedule_ptr;
|
||||
restore_ptr_n = restore_ptr;
|
||||
|
||||
if (lookup_fill) begin
|
||||
// unlock pending requests for scheduling
|
||||
ready_table_n |= valid_address_match;
|
||||
end
|
||||
|
||||
if (schedule) begin
|
||||
// schedule next entry
|
||||
schedule_ptr_n = schedule_ptr + 1;
|
||||
valid_table_n[schedule_ptr] = 0;
|
||||
ready_table_n[schedule_ptr] = 0;
|
||||
end
|
||||
|
||||
if (fill_start && (fill_addr == addr_table[schedule_ptr])) begin
|
||||
ready_table_n[schedule_ptr] = valid_table[schedule_ptr];
|
||||
end
|
||||
|
||||
if (push_new) begin
|
||||
// push new entry
|
||||
valid_table_n[tail_ptr] = 1;
|
||||
ready_table_n[tail_ptr] = enqueue_as_ready;
|
||||
tail_ptr_n = tail_ptr + 1;
|
||||
end else if (restore) begin
|
||||
// restore schedule, returning missed mshr entry
|
||||
valid_table_n[restore_ptr] = 1;
|
||||
ready_table_n[restore_ptr] = enqueue_as_ready;
|
||||
restore_ptr_n = restore_ptr + 1;
|
||||
schedule_ptr_n = head_ptr;
|
||||
end else if (dequeue) begin
|
||||
// clear scheduled entry
|
||||
head_ptr_n = head_ptr + 1;
|
||||
restore_ptr_n = head_ptr_n;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_table <= 0;
|
||||
ready_table <= 0;
|
||||
head_ptr <= 0;
|
||||
tail_ptr <= 0;
|
||||
schedule_ptr <= 0;
|
||||
restore_ptr <= 0;
|
||||
used_r <= 0;
|
||||
alm_full_r <= 0;
|
||||
full_r <= 0;
|
||||
valid_out_r <= 0;
|
||||
end else begin
|
||||
if (schedule) begin
|
||||
assert(schedule_valid);
|
||||
assert(!fill_start);
|
||||
assert(!restore);
|
||||
end
|
||||
|
||||
if (push_new) begin
|
||||
assert(!full_r);
|
||||
end else if (restore) begin
|
||||
assert(!schedule);
|
||||
end
|
||||
|
||||
if (push_new) begin
|
||||
if (!dequeue) begin
|
||||
if (used_r == ADDRW'(ALM_FULL-1))
|
||||
alm_full_r <= 1;
|
||||
if (used_r == ADDRW'(MSHR_SIZE-1))
|
||||
full_r <= 1;
|
||||
end
|
||||
end else if (dequeue) begin
|
||||
if (used_r == ADDRW'(ALM_FULL))
|
||||
alm_full_r <= 0;
|
||||
full_r <= 0;
|
||||
end
|
||||
|
||||
used_r <= used_r + ADDRW'($signed(2'(push_new) - 2'(dequeue)));
|
||||
|
||||
valid_table <= valid_table_n;
|
||||
ready_table <= ready_table_n;
|
||||
head_ptr <= head_ptr_n;
|
||||
tail_ptr <= tail_ptr_n;
|
||||
schedule_ptr <= schedule_ptr_n;
|
||||
restore_ptr <= restore_ptr_n;
|
||||
valid_out_r <= ready_table_n[schedule_ptr_n];
|
||||
end
|
||||
|
||||
if (push_new) begin
|
||||
addr_table[tail_ptr] <= enqueue_addr;
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (`MSHR_DATA_WIDTH),
|
||||
.SIZE (MSHR_SIZE),
|
||||
.RWCHECK (1),
|
||||
.FASTRAM (1)
|
||||
) entries (
|
||||
.clk (clk),
|
||||
.waddr (tail_ptr),
|
||||
.raddr (schedule_ptr),
|
||||
.wren (push_new),
|
||||
.byteen (1'b1),
|
||||
.rden (1'b1),
|
||||
.din (enqueue_data),
|
||||
.dout (schedule_data)
|
||||
);
|
||||
|
||||
assign lookup_match = (| valid_address_match);
|
||||
assign schedule_valid = valid_out_r;
|
||||
assign schedule_addr = addr_table[schedule_ptr];
|
||||
assign enqueue_almfull = alm_full_r;
|
||||
assign enqueue_full = full_r;
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_MSHR
|
||||
always @(posedge clk) begin
|
||||
if (lookup_fill || schedule || enqueue || dequeue) begin
|
||||
if (schedule)
|
||||
$display("%t: cache%0d:%0d mshr-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(schedule_addr, BANK_ID), deq_debug_wid, deq_debug_pc);
|
||||
if (enqueue) begin
|
||||
if (enqueue_is_mshr)
|
||||
$display("%t: cache%0d:%0d mshr-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_as_ready);
|
||||
else
|
||||
$display("%t: cache%0d:%0d mshr-enqueue: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_as_ready, enq_debug_wid, enq_debug_pc);
|
||||
end
|
||||
if (dequeue)
|
||||
$display("%t: cache%0d:%0d mshr-dequeue addr%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, enq_debug_wid, enq_debug_pc);
|
||||
$write("%t: cache%0d:%0d mshr-table", $time, CACHE_ID, BANK_ID);
|
||||
for (integer j = 0; j < MSHR_SIZE; j++) begin
|
||||
if (valid_table[j]) begin
|
||||
$write(" ");
|
||||
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
|
||||
if (~ready_table[j]) $write("!");
|
||||
$write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID));
|
||||
end
|
||||
end
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -1,6 +1,7 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_nc_bypass #(
|
||||
parameter NUM_PORTS = 1,
|
||||
parameter NUM_REQS = 1,
|
||||
parameter NUM_RSP_TAGS = 0,
|
||||
parameter NC_TAG_BIT = 0,
|
||||
|
@ -10,13 +11,14 @@ module VX_nc_bypass #(
|
|||
parameter CORE_TAG_IN_WIDTH = 1,
|
||||
|
||||
parameter MEM_ADDR_WIDTH = 1,
|
||||
parameter MEM_DATA_SIZE = 1,
|
||||
parameter MEM_DATA_SIZE = 1,
|
||||
parameter MEM_TAG_IN_WIDTH = 1,
|
||||
parameter MEM_TAG_OUT_WIDTH = 1,
|
||||
|
||||
localparam CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
|
||||
localparam MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
|
||||
localparam CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1
|
||||
parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
|
||||
parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
|
||||
parameter CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1,
|
||||
parameter MEM_SELECT_BITS = `UP(`CLOG2(MEM_DATA_SIZE / CORE_DATA_SIZE))
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -57,8 +59,10 @@ module VX_nc_bypass #(
|
|||
input wire mem_req_valid_in,
|
||||
input wire mem_req_rw_in,
|
||||
input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in,
|
||||
input wire [MEM_DATA_SIZE-1:0] mem_req_byteen_in,
|
||||
input wire [MEM_DATA_WIDTH-1:0] mem_req_data_in,
|
||||
input wire [NUM_PORTS-1:0] mem_req_pmask_in,
|
||||
input wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in,
|
||||
input wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in,
|
||||
input wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in,
|
||||
input wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_in,
|
||||
output wire mem_req_ready_in,
|
||||
|
||||
|
@ -66,8 +70,10 @@ module VX_nc_bypass #(
|
|||
output wire mem_req_valid_out,
|
||||
output wire mem_req_rw_out,
|
||||
output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out,
|
||||
output wire [MEM_DATA_SIZE-1:0] mem_req_byteen_out,
|
||||
output wire [MEM_DATA_WIDTH-1:0] mem_req_data_out,
|
||||
output wire [NUM_PORTS-1:0] mem_req_pmask_out,
|
||||
output wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_out,
|
||||
output wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_out,
|
||||
output wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_out,
|
||||
output wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_out,
|
||||
input wire mem_req_ready_out,
|
||||
|
||||
|
@ -99,9 +105,9 @@ module VX_nc_bypass #(
|
|||
// core request handling
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid_in_nc;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire [NUM_REQS-1:0] core_req_nc_tids;
|
||||
wire [`UP(CORE_REQ_TIDW)-1:0] core_req_nc_tid;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire core_req_nc_valid;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
@ -142,7 +148,6 @@ module VX_nc_bypass #(
|
|||
(~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]) : core_req_ready_out[i];
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (core_req_nc_sel)
|
||||
assign core_req_ready_in = core_req_valid_in_nc ? (~mem_req_valid_in && mem_req_ready_out) : core_req_ready_out;
|
||||
end
|
||||
|
||||
|
@ -151,7 +156,7 @@ module VX_nc_bypass #(
|
|||
assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid;
|
||||
assign mem_req_ready_in = mem_req_ready_out;
|
||||
|
||||
wire [(MEM_TAG_IN_WIDTH+1)-1:0] mem_req_tag_in_nc;
|
||||
wire [(MEM_TAG_IN_WIDTH+1)-1:0] mem_req_tag_in_c;
|
||||
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_IN_WIDTH),
|
||||
|
@ -160,81 +165,69 @@ module VX_nc_bypass #(
|
|||
) mem_req_tag_insert (
|
||||
.data_in (mem_req_tag_in),
|
||||
.sel_in ('0),
|
||||
.data_out (mem_req_tag_in_nc)
|
||||
.data_out (mem_req_tag_in_c)
|
||||
);
|
||||
|
||||
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
|
||||
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
|
||||
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
|
||||
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
|
||||
wire core_req_rw_in_sel;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
|
||||
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
|
||||
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
|
||||
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
|
||||
wire core_req_rw_in_sel;
|
||||
|
||||
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]};
|
||||
end
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (MUX_DATAW),
|
||||
.N (NUM_REQS)
|
||||
) core_req_nc_mux (
|
||||
.data_in (core_req_nc_mux_in),
|
||||
.sel_in (core_req_nc_sel),
|
||||
.data_out ({core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel})
|
||||
);
|
||||
|
||||
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
|
||||
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
|
||||
|
||||
for (genvar i = 0; i < P; ++i) begin
|
||||
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
|
||||
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in_sel;
|
||||
end
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0];
|
||||
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = 0;
|
||||
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in_sel;
|
||||
end
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
|
||||
end else begin
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, core_req_tag_in_sel});
|
||||
end
|
||||
assign {core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel} = core_req_nc_mux_in[core_req_nc_tid];
|
||||
end else begin
|
||||
`UNUSED_VAR (core_req_nc_tid)
|
||||
assign core_req_tag_in_sel = core_req_tag_in;
|
||||
assign core_req_data_in_sel = core_req_data_in;
|
||||
assign core_req_byteen_in_sel = core_req_byteen_in;
|
||||
assign core_req_addr_in_sel = core_req_addr_in;
|
||||
assign core_req_rw_in_sel = core_req_rw_in;
|
||||
end
|
||||
|
||||
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
|
||||
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
|
||||
|
||||
if (D != 0) begin
|
||||
reg [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in_r;
|
||||
reg [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in_r;
|
||||
reg [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
|
||||
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0];
|
||||
|
||||
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in;
|
||||
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in[0][D +: MEM_ADDR_WIDTH];
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = 0;
|
||||
mem_req_byteen_in_r[0] = core_req_byteen_in_sel;
|
||||
|
||||
for (genvar i = 0; i < P; ++i) begin
|
||||
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
|
||||
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in;
|
||||
mem_req_wsel_in_r = 'x;
|
||||
mem_req_wsel_in_r[0] = req_addr_idx;
|
||||
|
||||
mem_req_data_in_r = 'x;
|
||||
mem_req_data_in_r[0] = core_req_data_in_sel;
|
||||
end
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0];
|
||||
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = 0;
|
||||
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in;
|
||||
end
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({req_addr_idx, core_req_tag_in});
|
||||
end else begin
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'(core_req_tag_in);
|
||||
end
|
||||
assign mem_req_pmask_out = mem_req_valid_in ? mem_req_pmask_in : NUM_PORTS'(1'b1);
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
|
||||
assign mem_req_wsel_out = mem_req_valid_in ? mem_req_wsel_in : mem_req_wsel_in_r;
|
||||
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : mem_req_data_in_r;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_wsel_in)
|
||||
`UNUSED_VAR (mem_req_pmask_in)
|
||||
assign mem_req_pmask_out = 0;
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
|
||||
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : core_req_data_in_sel;
|
||||
assign mem_req_wsel_out = 0;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, core_req_tag_in_sel});
|
||||
end
|
||||
|
||||
// core response handling
|
||||
|
||||
wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out_unqual;
|
||||
wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out_c;
|
||||
|
||||
wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
|
||||
|
||||
|
@ -246,7 +239,7 @@ module VX_nc_bypass #(
|
|||
) core_rsp_tag_insert (
|
||||
.data_in (core_rsp_tag_in[i]),
|
||||
.sel_in ('0),
|
||||
.data_out (core_rsp_tag_out_unqual[i])
|
||||
.data_out (core_rsp_tag_out_c[i])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -272,14 +265,14 @@ module VX_nc_bypass #(
|
|||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ? core_rsp_data_in[i] : mem_rsp_data_in;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_out_unqual[i] : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
|
||||
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_out_c[i] : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
|
||||
end
|
||||
end else begin
|
||||
assign core_rsp_valid_out = core_rsp_valid_in || is_mem_rsp_nc;
|
||||
assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_out_unqual : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
|
||||
assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_out_c : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
|
||||
assign core_rsp_ready_in = core_rsp_ready_out;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
|
@ -24,14 +24,14 @@ module VX_shared_mem #(
|
|||
parameter CORE_TAG_WIDTH = (2 + CORE_TAG_ID_BITS),
|
||||
|
||||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = `CLOG2(256)
|
||||
parameter BANK_ADDR_OFFSET = `CLOG2(256)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_cache_if,
|
||||
VX_perf_cache_if.master perf_cache_if,
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
|
@ -64,7 +64,7 @@ module VX_shared_mem #(
|
|||
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_unqual;
|
||||
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_unqual;
|
||||
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_unqual;
|
||||
wire per_bank_core_req_ready_unqual;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_ready_unqual;
|
||||
|
||||
VX_core_req_bank_sel #(
|
||||
.CACHE_ID (CACHE_ID),
|
||||
|
@ -74,8 +74,7 @@ module VX_shared_mem #(
|
|||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||
.BANK_ADDR_OFFSET(BANK_ADDR_OFFSET),
|
||||
.SHARED_BANK_READY(1)
|
||||
.BANK_ADDR_OFFSET(BANK_ADDR_OFFSET)
|
||||
) core_req_bank_sel (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -103,41 +102,34 @@ module VX_shared_mem #(
|
|||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
|
||||
wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr;
|
||||
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
|
||||
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
|
||||
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag;
|
||||
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
|
||||
|
||||
wire creq_in_ready;
|
||||
wire creq_out_valid;
|
||||
wire crsq_in_fire_last;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_req_reads = per_bank_core_req_valid & ~per_bank_core_req_rw;
|
||||
|
||||
wire per_bank_req_has_reads = (| per_bank_req_reads);
|
||||
|
||||
wire creq_in_valid = (| core_req_valid);
|
||||
|
||||
wire creq_out_ready = ~per_bank_req_has_reads // is write only
|
||||
|| crsq_in_fire_last; // is sending last read response
|
||||
|
||||
assign per_bank_core_req_ready_unqual = creq_in_ready;
|
||||
wire creq_out_valid, creq_out_ready;
|
||||
wire creq_in_valid, creq_in_ready;
|
||||
|
||||
wire creq_in_fire = creq_in_valid && creq_in_ready;
|
||||
`UNUSED_VAR (creq_in_fire)
|
||||
|
||||
wire creq_out_fire = creq_out_valid && creq_out_ready;
|
||||
`UNUSED_VAR (creq_out_fire)
|
||||
|
||||
wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual;
|
||||
`UNUSED_VAR (per_bank_core_req_addr_unqual)
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
assign per_bank_core_req_addr_qual[i] = per_bank_core_req_addr_unqual[i][`LINE_SELECT_BITS-1:0];
|
||||
end
|
||||
assign creq_in_valid = (| core_req_valid);
|
||||
assign per_bank_core_req_ready_unqual = {NUM_BANKS{creq_in_ready}};
|
||||
|
||||
wire [NUM_BANKS-1:0] core_req_read_mask, core_req_read_mask_unqual;
|
||||
wire core_req_writeonly, core_req_writeonly_unqual;
|
||||
|
||||
assign core_req_read_mask_unqual = per_bank_core_req_valid_unqual & ~per_bank_core_req_rw_unqual;
|
||||
assign core_req_writeonly_unqual = ~(| core_req_read_mask_unqual);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (NUM_BANKS * (1 + 1 + `LINE_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS)),
|
||||
.SIZE (CREQ_SIZE),
|
||||
.OUTPUT_REG (1) // output should be registered for the data_store addr port
|
||||
.DATAW (NUM_BANKS * (1 + 1 + `LINE_ADDR_WIDTH + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS) + NUM_BANKS + 1),
|
||||
.SIZE (CREQ_SIZE),
|
||||
.OUT_REG (1) // output should be registered for the data_store addr port
|
||||
) core_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -145,44 +137,53 @@ module VX_shared_mem #(
|
|||
.valid_in (creq_in_valid),
|
||||
.data_in ({per_bank_core_req_valid_unqual,
|
||||
per_bank_core_req_rw_unqual,
|
||||
per_bank_core_req_addr_qual,
|
||||
per_bank_core_req_addr_unqual,
|
||||
per_bank_core_req_byteen_unqual,
|
||||
per_bank_core_req_data_unqual,
|
||||
per_bank_core_req_tag_unqual,
|
||||
per_bank_core_req_tid_unqual}),
|
||||
per_bank_core_req_tid_unqual,
|
||||
core_req_read_mask_unqual,
|
||||
core_req_writeonly_unqual}),
|
||||
.data_out ({per_bank_core_req_valid,
|
||||
per_bank_core_req_rw,
|
||||
per_bank_core_req_addr,
|
||||
per_bank_core_req_byteen,
|
||||
per_bank_core_req_data,
|
||||
per_bank_core_req_tag,
|
||||
per_bank_core_req_tid}),
|
||||
per_bank_core_req_tid,
|
||||
core_req_read_mask,
|
||||
core_req_writeonly}),
|
||||
.ready_out (creq_out_ready),
|
||||
.valid_out (creq_out_valid)
|
||||
);
|
||||
`UNUSED_VAR (creq_in_fire)
|
||||
|
||||
wire crsq_in_valid, crsq_in_ready;
|
||||
wire crsq_last_read;
|
||||
|
||||
assign creq_out_ready = core_req_writeonly
|
||||
|| (crsq_in_ready && crsq_last_read);
|
||||
|
||||
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
|
||||
wire wren = per_bank_core_req_rw[i]
|
||||
&& per_bank_core_req_valid[i]
|
||||
&& creq_out_fire;
|
||||
wire [WORD_SIZE-1:0] wren = per_bank_core_req_byteen[i]
|
||||
& {WORD_SIZE{per_bank_core_req_valid[i]
|
||||
&& per_bank_core_req_rw[i]}};
|
||||
|
||||
wire [`LINE_SELECT_BITS-1:0] addr = per_bank_core_req_addr[i][`LINE_SELECT_BITS-1:0];
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`WORD_WIDTH),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.BYTEENW (WORD_SIZE),
|
||||
.RWCHECK (1)
|
||||
.DATAW (`WORD_WIDTH),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.BYTEENW (WORD_SIZE),
|
||||
.NO_RWCHECK (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.addr (per_bank_core_req_addr[i]),
|
||||
.wren (wren),
|
||||
.byteen (per_bank_core_req_byteen[i]),
|
||||
.rden (1'b1),
|
||||
.din (per_bank_core_req_data[i]),
|
||||
.dout (per_bank_core_rsp_data[i])
|
||||
.clk (clk),
|
||||
.addr (addr),
|
||||
.wren (wren),
|
||||
.wdata (per_bank_core_req_data[i]),
|
||||
.rdata (per_bank_core_rsp_data[i])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -190,57 +191,54 @@ module VX_shared_mem #(
|
|||
// We first need to select the current tag to process,
|
||||
// then send all bank responses for that tag as a batch
|
||||
|
||||
wire crsq_in_valid, crsq_in_ready;
|
||||
|
||||
reg [NUM_BANKS-1:0] bank_rsp_sel_prv, bank_rsp_sel_cur;
|
||||
|
||||
wire [NUM_BANKS-1:0] bank_rsp_sel_n = bank_rsp_sel_prv | bank_rsp_sel_cur;
|
||||
reg [NUM_REQS-1:0] core_rsp_valids_in;
|
||||
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
|
||||
wire [CORE_TAG_WIDTH-1:0] core_rsp_tag_in;
|
||||
reg [NUM_BANKS-1:0] bank_rsp_sel_r, bank_rsp_sel_n;
|
||||
|
||||
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
|
||||
|
||||
assign crsq_in_fire_last = crsq_in_fire && (bank_rsp_sel_n == per_bank_req_reads);
|
||||
assign crsq_last_read = (bank_rsp_sel_n == core_req_read_mask);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
bank_rsp_sel_prv <= 0;
|
||||
bank_rsp_sel_r <= 0;
|
||||
end else begin
|
||||
if (crsq_in_fire) begin
|
||||
if (bank_rsp_sel_n == per_bank_req_reads) begin
|
||||
bank_rsp_sel_prv <= 0;
|
||||
if (crsq_last_read) begin
|
||||
bank_rsp_sel_r <= 0;
|
||||
end else begin
|
||||
bank_rsp_sel_prv <= bank_rsp_sel_n;
|
||||
bank_rsp_sel_r <= bank_rsp_sel_n;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [NUM_REQS-1:0] core_rsp_valids_in;
|
||||
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
|
||||
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_in;
|
||||
|
||||
always @(*) begin
|
||||
end
|
||||
|
||||
VX_find_first #(
|
||||
.N (NUM_BANKS),
|
||||
.DATAW (CORE_TAG_WIDTH)
|
||||
) find_first (
|
||||
.valid_i (core_req_read_mask & ~bank_rsp_sel_r),
|
||||
.data_i (per_bank_core_req_tag),
|
||||
.data_o (core_rsp_tag_in),
|
||||
`UNUSED_PIN (valid_o)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valids_in = 0;
|
||||
core_rsp_data_in = 'x;
|
||||
core_rsp_tag_in = 'x;
|
||||
bank_rsp_sel_cur = 0;
|
||||
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
if (per_bank_req_reads[i] && ~bank_rsp_sel_prv[i]) begin
|
||||
core_rsp_tag_in = per_bank_core_req_tag[i];
|
||||
end
|
||||
end
|
||||
|
||||
bank_rsp_sel_n = bank_rsp_sel_r;
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
if (per_bank_core_req_valid[i]
|
||||
&& (core_rsp_tag_in[CORE_TAG_ID_BITS-1:0] == per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valids_in[per_bank_core_req_tid[i]] = 1;
|
||||
core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
|
||||
bank_rsp_sel_cur[i] = 1;
|
||||
bank_rsp_sel_n[i] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign crsq_in_valid = creq_out_valid && per_bank_req_has_reads;
|
||||
assign crsq_in_valid = creq_out_valid && ~core_req_writeonly;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH),
|
||||
|
@ -257,10 +255,10 @@ module VX_shared_mem #(
|
|||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
wire [NUM_BANKS-1:0][31:0] debug_pc_st0, debug_pc_st1;
|
||||
wire [NUM_BANKS-1:0][`NW_BITS-1:0] debug_wid_st0, debug_wid_st1;
|
||||
`IGNORE_WARNINGS_END
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
|
@ -276,17 +274,21 @@ module VX_shared_mem #(
|
|||
`ifdef DBG_PRINT_CACHE_BANK
|
||||
|
||||
reg is_multi_tag_req;
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
reg [CORE_TAG_WIDTH-1:0] core_req_tag_sel;
|
||||
`IGNORE_WARNINGS_END
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
always @(*) begin
|
||||
core_req_tag_sel ='x;
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
if (per_bank_core_req_valid[i]) begin
|
||||
core_req_tag_sel = per_bank_core_req_tag[i];
|
||||
end
|
||||
end
|
||||
VX_find_first #(
|
||||
.N (NUM_BANKS),
|
||||
.DATAW (CORE_TAG_WIDTH)
|
||||
) find_first_d (
|
||||
.valid_i (per_bank_core_req_valid),
|
||||
.data_i (per_bank_core_req_tag),
|
||||
.data_o (core_req_tag_sel),
|
||||
`UNUSED_PIN (valid_o)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
is_multi_tag_req = 0;
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid[i]
|
||||
|
@ -298,22 +300,20 @@ module VX_shared_mem #(
|
|||
|
||||
always @(posedge clk) begin
|
||||
if (!crsq_in_ready) begin
|
||||
$display("%t: *** cache%0d pipeline-stall", $time, CACHE_ID);
|
||||
dpi_trace("%d: *** cache%0d pipeline-stall\n", $time, CACHE_ID);
|
||||
end
|
||||
if (is_multi_tag_req) begin
|
||||
$display("%t: *** cache%0d multi-tag request!", $time, CACHE_ID);
|
||||
dpi_trace("%d: *** cache%0d multi-tag request!\n", $time, CACHE_ID);
|
||||
end
|
||||
if (creq_in_fire) begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid_unqual[i]) begin
|
||||
if (per_bank_core_req_rw_unqual[i]) begin
|
||||
$display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h",
|
||||
$time, CACHE_ID, i, per_bank_core_req_addr_unqual[i], per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i],
|
||||
debug_wid_st0[i], debug_pc_st0[i]);
|
||||
dpi_trace("%d: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i], debug_wid_st0[i], debug_pc_st0[i]);
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, byteen=%b, wid=%0d, PC=%0h",
|
||||
$time, CACHE_ID, i, per_bank_core_req_addr_unqual[i], per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i],
|
||||
debug_wid_st0[i], debug_pc_st0[i]);
|
||||
dpi_trace("%d: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, byteen=%b, wid=%0d, PC=%0h\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], debug_wid_st0[i], debug_pc_st0[i]);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -322,13 +322,11 @@ module VX_shared_mem #(
|
|||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid[i]) begin
|
||||
if (per_bank_core_req_rw[i]) begin
|
||||
$display("%t: cache%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h",
|
||||
$time, CACHE_ID, i, per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_req_data[i],
|
||||
debug_wid_st1[i], debug_pc_st1[i]);
|
||||
dpi_trace("%d: cache%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_req_data[i], debug_wid_st1[i], debug_pc_st1[i]);
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h",
|
||||
$time, CACHE_ID, i, per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_rsp_data[i],
|
||||
debug_wid_st1[i], debug_pc_st1[i]);
|
||||
dpi_trace("%d: cache%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_rsp_data[i], debug_wid_st1[i], debug_pc_st1[i]);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -338,16 +336,22 @@ module VX_shared_mem #(
|
|||
|
||||
`ifdef PERF_ENABLE
|
||||
// per cycle: core_reads, core_writes
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle;
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
|
||||
|
||||
assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw);
|
||||
assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw);
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw;
|
||||
|
||||
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
|
||||
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask);
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}});
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}};
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
|
||||
end else begin
|
||||
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_valid & ~core_rsp_ready;
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
|
||||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
|
82
hw/rtl/cache/VX_tag_access.sv
vendored
Normal file
82
hw/rtl/cache/VX_tag_access.sv
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_tag_access #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 1,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[31:0] debug_pc,
|
||||
input wire[`NW_BITS-1:0] debug_wid,
|
||||
`IGNORE_UNUSED_END
|
||||
`endif
|
||||
|
||||
input wire stall,
|
||||
|
||||
// read/fill
|
||||
input wire lookup,
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] addr,
|
||||
input wire fill,
|
||||
input wire flush,
|
||||
output wire tag_match
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (lookup)
|
||||
|
||||
wire [`TAG_SELECT_BITS-1:0] read_tag;
|
||||
wire read_valid;
|
||||
|
||||
wire [`LINE_SELECT_BITS-1:0] line_addr = addr[`LINE_SELECT_BITS-1:0];
|
||||
wire [`TAG_SELECT_BITS-1:0] line_tag = `LINE_TAG_ADDR(addr);
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`TAG_SELECT_BITS + 1),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.NO_RWCHECK (1)
|
||||
) tag_store (
|
||||
.clk( clk),
|
||||
.addr (line_addr),
|
||||
.wren (fill || flush),
|
||||
.wdata ({!flush, line_tag}),
|
||||
.rdata ({read_valid, read_tag})
|
||||
);
|
||||
|
||||
assign tag_match = read_valid && (line_tag == read_tag);
|
||||
|
||||
`UNUSED_VAR (stall)
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_TAG
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
dpi_trace("%d: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag);
|
||||
end
|
||||
if (flush) begin
|
||||
dpi_trace("%d: cache%0d:%0d tag-flush: addr=%0h, blk_addr=%0d\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr);
|
||||
end
|
||||
if (lookup && ~stall) begin
|
||||
if (tag_match) begin
|
||||
dpi_trace("%d: cache%0d:%0d tag-hit: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, line_tag);
|
||||
end else begin
|
||||
dpi_trace("%d: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, line_tag, read_tag);
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
84
hw/rtl/cache/VX_tag_access.v
vendored
84
hw/rtl/cache/VX_tag_access.v
vendored
|
@ -1,84 +0,0 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_tag_access #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 1,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[31:0] debug_pc,
|
||||
input wire[`NW_BITS-1:0] debug_wid,
|
||||
`IGNORE_WARNINGS_END
|
||||
`endif
|
||||
|
||||
// read/fill
|
||||
input wire lookup,
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] addr,
|
||||
input wire fill,
|
||||
input wire is_flush,
|
||||
output wire tag_match
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (lookup)
|
||||
|
||||
wire read_valid;
|
||||
wire [`TAG_SELECT_BITS-1:0] read_tag;
|
||||
|
||||
wire [`TAG_SELECT_BITS-1:0] line_tag = `LINE_TAG_ADDR(addr);
|
||||
wire [`LINE_SELECT_BITS-1:0] line_addr = addr [`LINE_SELECT_BITS-1:0];
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW(`TAG_SELECT_BITS + 1),
|
||||
.SIZE(`LINES_PER_BANK),
|
||||
.INITZERO(1),
|
||||
.RWCHECK(1)
|
||||
) tag_store (
|
||||
.clk(clk),
|
||||
.addr(line_addr),
|
||||
.wren(fill),
|
||||
.byteen(1'b1),
|
||||
.rden(1'b1),
|
||||
.din({!is_flush, line_tag}),
|
||||
.dout({read_valid, read_tag})
|
||||
);
|
||||
|
||||
assign tag_match = read_valid && (line_tag == read_tag);
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_TAG
|
||||
always @(posedge clk) begin
|
||||
if (fill) begin
|
||||
if (is_flush) begin
|
||||
$display("%t: cache%0d:%0d tag-flush: addr=%0h, blk_addr=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr);
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag, read_tag);
|
||||
if (tag_match) begin
|
||||
$display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr, BANK_ID));
|
||||
end
|
||||
end
|
||||
end else if (lookup) begin
|
||||
if (tag_match) begin
|
||||
$display("%t: cache%0d:%0d tag-hit: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, line_tag);
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, line_tag, read_tag);
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
28
hw/rtl/fp_cores/VX_fp_class.sv
Normal file
28
hw/rtl/fp_cores/VX_fp_class.sv
Normal file
|
@ -0,0 +1,28 @@
|
|||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fp_class # (
|
||||
parameter MAN_BITS = 23,
|
||||
parameter EXP_BITS = 8
|
||||
) (
|
||||
input [EXP_BITS-1:0] exp_i,
|
||||
input [MAN_BITS-1:0] man_i,
|
||||
output fp_class_t clss_o
|
||||
);
|
||||
wire is_normal = (exp_i != '0) && (exp_i != '1);
|
||||
wire is_zero = (exp_i == '0) && (man_i == '0);
|
||||
wire is_subnormal = (exp_i == '0) && (man_i != '0);
|
||||
wire is_inf = (exp_i == '1) && (man_i == '0);
|
||||
wire is_nan = (exp_i == '1) && (man_i != '0);
|
||||
wire is_signaling = is_nan && ~man_i[MAN_BITS-1];
|
||||
wire is_quiet = is_nan && ~is_signaling;
|
||||
|
||||
assign clss_o.is_normal = is_normal;
|
||||
assign clss_o.is_zero = is_zero;
|
||||
assign clss_o.is_subnormal = is_subnormal;
|
||||
assign clss_o.is_inf = is_inf;
|
||||
assign clss_o.is_nan = is_nan;
|
||||
assign clss_o.is_quiet = is_quiet;
|
||||
assign clss_o.is_signaling = is_signaling;
|
||||
|
||||
endmodule
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
/// Modified port of cast module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
||||
|
@ -15,7 +15,7 @@ module VX_fp_cvt #(
|
|||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire is_itof,
|
||||
input wire is_signed,
|
||||
|
@ -59,13 +59,16 @@ module VX_fp_cvt #(
|
|||
|
||||
// Input processing
|
||||
|
||||
fp_type_t [LANES-1:0] in_a_type;
|
||||
fp_class_t [LANES-1:0] fp_clss;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
VX_fp_type fp_type (
|
||||
VX_fp_class #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class (
|
||||
.exp_i (dataa[i][30:23]),
|
||||
.man_i (dataa[i][22:0]),
|
||||
.type_o (in_a_type[i])
|
||||
.clss_o (fp_clss[i])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -74,16 +77,18 @@ module VX_fp_cvt #(
|
|||
wire [LANES-1:0] input_sign;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [INT_MAN_WIDTH-1:0] int_mantissa;
|
||||
wire [INT_MAN_WIDTH-1:0] fmt_mantissa;
|
||||
wire fmt_sign = dataa[i][31];
|
||||
wire int_sign = dataa[i][31] & is_signed;
|
||||
assign int_mantissa = int_sign ? (-dataa[i]) : dataa[i];
|
||||
assign fmt_mantissa = INT_MAN_WIDTH'({in_a_type[i].is_normal, dataa[i][MAN_BITS-1:0]});
|
||||
|
||||
assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]};
|
||||
assign fmt_mantissa = INT_MAN_WIDTH'({fp_clss[i].is_normal, dataa[i][MAN_BITS-1:0]});
|
||||
assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS +: EXP_BITS]} +
|
||||
{1'b0, fp_clss[i].is_subnormal};
|
||||
assign encoded_mant[i] = is_itof ? int_mantissa : fmt_mantissa;
|
||||
assign input_sign[i] = is_itof ? int_sign : fmt_sign;
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
|
||||
// Pipeline stage0
|
||||
|
@ -93,7 +98,7 @@ module VX_fp_cvt #(
|
|||
wire is_itof_s0;
|
||||
wire unsigned_s0;
|
||||
wire [2:0] rnd_mode_s0;
|
||||
fp_type_t [LANES-1:0] in_a_type_s0;
|
||||
fp_class_t [LANES-1:0] fp_clss_s0;
|
||||
wire [LANES-1:0] input_sign_s0;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
|
||||
|
@ -101,14 +106,14 @@ module VX_fp_cvt #(
|
|||
wire stall;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
|
||||
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_class_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in, tag_in, is_itof, !is_signed, frm, in_a_type, input_sign, fmt_exponent, encoded_mant}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0})
|
||||
.data_in ({valid_in, tag_in, is_itof, !is_signed, frm, fp_clss, input_sign, fmt_exponent, encoded_mant}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, fp_clss_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0})
|
||||
);
|
||||
|
||||
// Normalization
|
||||
|
@ -119,8 +124,8 @@ module VX_fp_cvt #(
|
|||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire mant_is_nonzero;
|
||||
VX_lzc #(
|
||||
.WIDTH (INT_MAN_WIDTH),
|
||||
.MODE (1)
|
||||
.N (INT_MAN_WIDTH),
|
||||
.MODE (1)
|
||||
) lzc (
|
||||
.in_i (encoded_mant_s0[i]),
|
||||
.cnt_o (renorm_shamt_s0[i]),
|
||||
|
@ -134,20 +139,12 @@ module VX_fp_cvt #(
|
|||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Input mantissa needs to be normalized
|
||||
wire [INT_EXP_WIDTH-1:0] fp_input_exp;
|
||||
wire [INT_EXP_WIDTH-1:0] int_input_exp;
|
||||
|
||||
// Realign input mantissa, append zeroes if destination is wider
|
||||
// Realign input mantissa, append zeroes if destination is wider
|
||||
assign input_mant_s0[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
|
||||
|
||||
// Unbias exponent and compensate for shift
|
||||
assign fp_input_exp = fmt_exponent_s0[i] +
|
||||
{1'b0, in_a_type_s0[i].is_subnormal} +
|
||||
(FMT_SHIFT_COMPENSATION - EXP_BIAS) -
|
||||
{1'b0, renorm_shamt_s0[i]};
|
||||
|
||||
assign int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
|
||||
wire [INT_EXP_WIDTH-1:0] fp_input_exp = fmt_exponent_s0[i] + (FMT_SHIFT_COMPENSATION - EXP_BIAS) - {1'b0, renorm_shamt_s0[i]};
|
||||
wire [INT_EXP_WIDTH-1:0] int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
|
||||
|
||||
assign input_exp_s0[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
@ -160,21 +157,21 @@ module VX_fp_cvt #(
|
|||
wire is_itof_s1;
|
||||
wire unsigned_s1;
|
||||
wire [2:0] rnd_mode_s1;
|
||||
fp_type_t [LANES-1:0] in_a_type_s1;
|
||||
fp_class_t [LANES-1:0] fp_clss_s1;
|
||||
wire [LANES-1:0] input_sign_s1;
|
||||
wire [LANES-1:0] mant_is_zero_s1;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s1;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
|
||||
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_class_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, mant_is_zero_s0, input_mant_s0, input_exp_s0}),
|
||||
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, input_sign_s1, mant_is_zero_s1, input_mant_s1, input_exp_s1})
|
||||
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, fp_clss_s0, input_sign_s0, mant_is_zero_s0, input_mant_s0, input_exp_s0}),
|
||||
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, fp_clss_s1, input_sign_s1, mant_is_zero_s1, input_mant_s1, input_exp_s1})
|
||||
);
|
||||
|
||||
// Perform adjustments to mantissa and exponent
|
||||
|
@ -183,39 +180,35 @@ module VX_fp_cvt #(
|
|||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
|
||||
wire [LANES-1:0] of_before_round_s1;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
|
||||
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
|
||||
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
|
||||
reg of_before_round;
|
||||
|
||||
// Rebias the exponent
|
||||
assign destination_exp = input_exp_s1[i] + EXP_BIAS;
|
||||
|
||||
always @(*) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Default assignment
|
||||
final_exp = destination_exp; // take exponent as is, only look at lower bits
|
||||
preshift_mant = {input_mant_s1[i], 33'b0}; // Place mantissa to the left of the shifter
|
||||
final_exp = input_exp_s1[i] + EXP_BIAS; // take exponent as is, only look at lower bits
|
||||
preshift_mant = {input_mant_s1[i], 33'b0}; // Place mantissa to the left of the shifter
|
||||
denorm_shamt = 0; // right of mantissa
|
||||
of_before_round = 1'b0;
|
||||
|
||||
// Handle INT casts
|
||||
if (is_itof_s1) begin
|
||||
if ($signed(destination_exp) >= $signed(2**EXP_BITS-1)) begin
|
||||
if ($signed(input_exp_s1[i]) >= $signed(2**EXP_BITS-1-EXP_BIAS)) begin
|
||||
// Overflow or infinities (for proper rounding)
|
||||
final_exp = (2**EXP_BITS-2); // largest normal value
|
||||
preshift_mant = ~0; // largest normal value and RS bits set
|
||||
of_before_round = 1'b1;
|
||||
end else if ($signed(destination_exp) < $signed(-MAN_BITS)) begin
|
||||
end else if ($signed(input_exp_s1[i]) < $signed(-MAN_BITS-EXP_BIAS)) begin
|
||||
// Limit the shift to retain sticky bits
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
|
||||
end else if ($signed(destination_exp) < $signed(1)) begin
|
||||
denorm_shamt = (2 + MAN_BITS); // to sticky
|
||||
end else if ($signed(input_exp_s1[i]) < $signed(1-EXP_BIAS)) begin
|
||||
// Denormalize underflowing values
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + 1 - destination_exp; // adjust right shifting
|
||||
denorm_shamt = (1-EXP_BIAS) - input_exp_s1[i]; // adjust right shifting
|
||||
end
|
||||
end else begin
|
||||
if ($signed(input_exp_s1[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s1)) begin
|
||||
|
@ -224,7 +217,7 @@ module VX_fp_cvt #(
|
|||
of_before_round = 1'b1;
|
||||
end else if ($signed(input_exp_s1[i]) < $signed(-1)) begin
|
||||
// underflow
|
||||
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
|
||||
denorm_shamt = MAX_INT_WIDTH+1; // all bits go to the sticky
|
||||
end else begin
|
||||
// By default right shift mantissa to be an integer
|
||||
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp_s1[i];
|
||||
|
@ -245,7 +238,7 @@ module VX_fp_cvt #(
|
|||
wire is_itof_s2;
|
||||
wire unsigned_s2;
|
||||
wire [2:0] rnd_mode_s2;
|
||||
fp_type_t [LANES-1:0] in_a_type_s2;
|
||||
fp_class_t [LANES-1:0] fp_clss_s2;
|
||||
wire [LANES-1:0] mant_is_zero_s2;
|
||||
wire [LANES-1:0] input_sign_s2;
|
||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant_s2;
|
||||
|
@ -253,14 +246,14 @@ module VX_fp_cvt #(
|
|||
wire [LANES-1:0] of_before_round_s2;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
|
||||
.DATAW (1 + TAGW + 1 + 1 + `INST_FRM_BITS + LANES * ($bits(fp_class_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}),
|
||||
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, rnd_mode_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2})
|
||||
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, fp_clss_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}),
|
||||
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, rnd_mode_s2, fp_clss_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2})
|
||||
);
|
||||
|
||||
wire [LANES-1:0] rounded_sign;
|
||||
|
@ -314,7 +307,7 @@ module VX_fp_cvt #(
|
|||
wire [TAGW-1:0] tag_in_s3;
|
||||
wire is_itof_s3;
|
||||
wire unsigned_s3;
|
||||
fp_type_t [LANES-1:0] in_a_type_s3;
|
||||
fp_class_t [LANES-1:0] fp_clss_s3;
|
||||
wire [LANES-1:0] mant_is_zero_s3;
|
||||
wire [LANES-1:0] input_sign_s3;
|
||||
wire [LANES-1:0] rounded_sign_s3;
|
||||
|
@ -322,14 +315,14 @@ module VX_fp_cvt #(
|
|||
wire [LANES-1:0] of_before_round_s3;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1 + 1)),
|
||||
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_class_t) + 1 + 1 + 32 + 1 + 1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg3 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs, rounded_sign, of_before_round_s2}),
|
||||
.data_out ({valid_in_s3, tag_in_s3, is_itof_s3, unsigned_s3, in_a_type_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3})
|
||||
.data_in ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, fp_clss_s2, mant_is_zero_s2, input_sign_s2, rounded_abs, rounded_sign, of_before_round_s2}),
|
||||
.data_out ({valid_in_s3, tag_in_s3, is_itof_s3, unsigned_s3, fp_clss_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3})
|
||||
);
|
||||
|
||||
wire [LANES-1:0] of_after_round;
|
||||
|
@ -362,14 +355,14 @@ module VX_fp_cvt #(
|
|||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
// Detect special case from source format, I2F casts don't produce a special result
|
||||
assign fp_result_is_special[i] = ~is_itof_s3 & (in_a_type_s3[i].is_zero | in_a_type_s3[i].is_nan);
|
||||
assign fp_result_is_special[i] = ~is_itof_s3 & (fp_clss_s3[i].is_zero | fp_clss_s3[i].is_nan);
|
||||
|
||||
// Signalling input NaNs raise invalid flag, otherwise no flags set
|
||||
assign fp_special_status[i] = in_a_type_s3[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation
|
||||
assign fp_special_status[i] = fp_clss_s3[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation
|
||||
|
||||
// Assemble result according to destination format
|
||||
assign fp_special_result[i] = in_a_type_s3[i].is_zero ? (32'(input_sign_s3) << 31) // signed zero
|
||||
: {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
|
||||
assign fp_special_result[i] = fp_clss_s3[i].is_zero ? (32'(input_sign_s3) << 31) // signed zero
|
||||
: {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
|
||||
end
|
||||
|
||||
// INT Special case handling
|
||||
|
@ -381,7 +374,7 @@ module VX_fp_cvt #(
|
|||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
// Assemble result according to destination format
|
||||
always @(*) begin
|
||||
if (input_sign_s3[i] && !in_a_type_s3[i].is_nan) begin
|
||||
if (input_sign_s3[i] && !fp_clss_s3[i].is_nan) begin
|
||||
int_special_result[i][30:0] = 0; // alone yields 2**(31)-1
|
||||
int_special_result[i][31] = ~unsigned_s3; // for unsigned casts yields 2**31
|
||||
end else begin
|
||||
|
@ -391,8 +384,8 @@ module VX_fp_cvt #(
|
|||
end
|
||||
|
||||
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
|
||||
assign int_result_is_special[i] = in_a_type_s3[i].is_nan
|
||||
| in_a_type_s3[i].is_inf
|
||||
assign int_result_is_special[i] = fp_clss_s3[i].is_nan
|
||||
| fp_clss_s3[i].is_inf
|
||||
| of_before_round_s3[i]
|
||||
| (input_sign_s3[i] & unsigned_s3 & ~rounded_int_res_zero[i]);
|
||||
|
||||
|
@ -411,11 +404,11 @@ module VX_fp_cvt #(
|
|||
wire [31:0] fp_result, int_result;
|
||||
|
||||
wire inexact = is_itof_s3 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
|
||||
: (| fp_round_sticky_bits[i]) | (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i]));
|
||||
: (| fp_round_sticky_bits[i]) | (~fp_clss_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i]));
|
||||
|
||||
assign fp_regular_status.NV = is_itof_s3 & (of_before_round_s3[i] | of_after_round[i]); // overflow is invalid for I2F casts
|
||||
assign fp_regular_status.DZ = 1'b0; // no divisions
|
||||
assign fp_regular_status.OF = ~is_itof_s3 & (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); // inf casts no OF
|
||||
assign fp_regular_status.OF = ~is_itof_s3 & (~fp_clss_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); // inf casts no OF
|
||||
assign fp_regular_status.UF = uf_after_round[i] & inexact;
|
||||
assign fp_regular_status.NX = inexact;
|
||||
|
||||
|
@ -435,7 +428,7 @@ module VX_fp_cvt #(
|
|||
assign stall = ~ready_out && valid_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + (LANES * 32) + (LANES * `FFG_BITS)),
|
||||
.DATAW (1 + TAGW + (LANES * 32) + (LANES * `FFLAGS_BITS)),
|
||||
.RESETW (1)
|
||||
) pipe_reg4 (
|
||||
.clk (clk),
|
|
@ -1,8 +1,4 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fp_div #(
|
||||
parameter TAGW = 1,
|
||||
|
@ -16,7 +12,7 @@ module VX_fp_div #(
|
|||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
|
@ -39,7 +35,7 @@ module VX_fp_div #(
|
|||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
dpi_fdiv (dataa[i], datab[i], frm, r, f);
|
||||
dpi_fdiv (enable && valid_in, dataa[i], datab[i], frm, r, f);
|
||||
end
|
||||
`UNUSED_VAR (f)
|
||||
|
|
@ -1,8 +1,4 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fp_fma #(
|
||||
parameter TAGW = 1,
|
||||
|
@ -16,7 +12,7 @@ module VX_fp_fma #(
|
|||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire do_madd,
|
||||
input wire do_sub,
|
||||
|
@ -68,7 +64,7 @@ module VX_fp_fma #(
|
|||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
dpi_fmadd (a, b, c, frm, r, f);
|
||||
dpi_fmadd (enable && valid_in, a, b, c, frm, r, f);
|
||||
end
|
||||
`UNUSED_VAR (f)
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
/// Modified port of noncomp module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
||||
|
@ -15,8 +15,8 @@ module VX_fp_ncomp #(
|
|||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op_type,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FPU_BITS-1:0] op_type,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
|
@ -30,6 +30,9 @@ module VX_fp_ncomp #(
|
|||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
localparam EXP_BITS = 8;
|
||||
localparam MAN_BITS = 23;
|
||||
|
||||
localparam NEG_INF = 32'h00000001,
|
||||
NEG_NORM = 32'h00000002,
|
||||
NEG_SUBNORM = 32'h00000004,
|
||||
|
@ -38,86 +41,92 @@ module VX_fp_ncomp #(
|
|||
POS_SUBNORM = 32'h00000020,
|
||||
POS_NORM = 32'h00000040,
|
||||
POS_INF = 32'h00000080,
|
||||
SIG_NAN = 32'h00000100,
|
||||
//SIG_NAN = 32'h00000100,
|
||||
QUT_NAN = 32'h00000200;
|
||||
|
||||
wire [LANES-1:0] tmp_a_sign, tmp_b_sign;
|
||||
wire [LANES-1:0][7:0] tmp_a_exponent, tmp_b_exponent;
|
||||
wire [LANES-1:0][22:0] tmp_a_mantissa, tmp_b_mantissa;
|
||||
fp_type_t [LANES-1:0] tmp_a_type, tmp_b_type;
|
||||
wire [LANES-1:0] tmp_a_smaller, tmp_ab_equal;
|
||||
wire [LANES-1:0] a_sign, b_sign;
|
||||
wire [LANES-1:0][7:0] a_exponent, b_exponent;
|
||||
wire [LANES-1:0][22:0] a_mantissa, b_mantissa;
|
||||
fp_class_t [LANES-1:0] a_clss, b_clss;
|
||||
wire [LANES-1:0] a_smaller, ab_equal;
|
||||
|
||||
// Setup
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
assign tmp_a_sign[i] = dataa[i][31];
|
||||
assign tmp_a_exponent[i] = dataa[i][30:23];
|
||||
assign tmp_a_mantissa[i] = dataa[i][22:0];
|
||||
assign a_sign[i] = dataa[i][31];
|
||||
assign a_exponent[i] = dataa[i][30:23];
|
||||
assign a_mantissa[i] = dataa[i][22:0];
|
||||
|
||||
assign tmp_b_sign[i] = datab[i][31];
|
||||
assign tmp_b_exponent[i] = datab[i][30:23];
|
||||
assign tmp_b_mantissa[i] = datab[i][22:0];
|
||||
assign b_sign[i] = datab[i][31];
|
||||
assign b_exponent[i] = datab[i][30:23];
|
||||
assign b_mantissa[i] = datab[i][22:0];
|
||||
|
||||
VX_fp_type fp_type_a (
|
||||
.exp_i (tmp_a_exponent[i]),
|
||||
.man_i (tmp_a_mantissa[i]),
|
||||
.type_o (tmp_a_type[i])
|
||||
VX_fp_class #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class_a (
|
||||
.exp_i (a_exponent[i]),
|
||||
.man_i (a_mantissa[i]),
|
||||
.clss_o (a_clss[i])
|
||||
);
|
||||
|
||||
VX_fp_type fp_type_b (
|
||||
.exp_i (tmp_b_exponent[i]),
|
||||
.man_i (tmp_b_mantissa[i]),
|
||||
.type_o (tmp_b_type[i])
|
||||
VX_fp_class #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class_b (
|
||||
.exp_i (b_exponent[i]),
|
||||
.man_i (b_mantissa[i]),
|
||||
.clss_o (b_clss[i])
|
||||
);
|
||||
|
||||
assign tmp_a_smaller[i] = $signed(dataa[i]) < $signed(datab[i]);
|
||||
assign tmp_ab_equal[i] = (dataa[i] == datab[i]) | (tmp_a_type[i].is_zero & tmp_b_type[i].is_zero);
|
||||
assign a_smaller[i] = $signed(dataa[i]) < $signed(datab[i]);
|
||||
assign ab_equal[i] = (dataa[i] == datab[i]) | (a_clss[i].is_zero & b_clss[i].is_zero);
|
||||
end
|
||||
|
||||
// Pipeline stage0
|
||||
|
||||
wire valid_in_s0;
|
||||
wire [TAGW-1:0] tag_in_s0;
|
||||
wire [`FPU_BITS-1:0] op_type_s0;
|
||||
wire [`FRM_BITS-1:0] frm_s0;
|
||||
wire [`INST_FPU_BITS-1:0] op_type_s0;
|
||||
wire [`INST_FRM_BITS-1:0] frm_s0;
|
||||
wire [LANES-1:0][31:0] dataa_s0, datab_s0;
|
||||
wire [LANES-1:0] a_sign_s0, b_sign_s0;
|
||||
wire [LANES-1:0][7:0] a_exponent_s0;
|
||||
wire [LANES-1:0][22:0] a_mantissa_s0;
|
||||
fp_type_t [LANES-1:0] a_type_s0, b_type_s0;
|
||||
fp_class_t [LANES-1:0] a_clss_s0, b_clss_s0;
|
||||
wire [LANES-1:0] a_smaller_s0, ab_equal_s0;
|
||||
|
||||
wire stall;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + `FPU_BITS + `FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)),
|
||||
.DATAW (1 + TAGW + `INST_FPU_BITS + `INST_FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_class_t) + 1 + 1)),
|
||||
.RESETW (1),
|
||||
.DEPTH (0)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall),
|
||||
.data_in ({valid_in, tag_in, op_type, frm, dataa, datab, tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_type_s0, b_type_s0, a_smaller_s0, ab_equal_s0})
|
||||
.data_in ({valid_in, tag_in, op_type, frm, dataa, datab, a_sign, b_sign, a_exponent, a_mantissa, a_clss, b_clss, a_smaller, ab_equal}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_clss_s0, b_clss_s0, a_smaller_s0, ab_equal_s0})
|
||||
);
|
||||
|
||||
// FCLASS
|
||||
reg [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
if (a_type_s0[i].is_normal) begin
|
||||
if (a_clss_s0[i].is_normal) begin
|
||||
fclass_mask[i] = a_sign_s0[i] ? NEG_NORM : POS_NORM;
|
||||
end
|
||||
else if (a_type_s0[i].is_inf) begin
|
||||
else if (a_clss_s0[i].is_inf) begin
|
||||
fclass_mask[i] = a_sign_s0[i] ? NEG_INF : POS_INF;
|
||||
end
|
||||
else if (a_type_s0[i].is_zero) begin
|
||||
else if (a_clss_s0[i].is_zero) begin
|
||||
fclass_mask[i] = a_sign_s0[i] ? NEG_ZERO : POS_ZERO;
|
||||
end
|
||||
else if (a_type_s0[i].is_subnormal) begin
|
||||
else if (a_clss_s0[i].is_subnormal) begin
|
||||
fclass_mask[i] = a_sign_s0[i] ? NEG_SUBNORM : POS_SUBNORM;
|
||||
end
|
||||
else if (a_type_s0[i].is_nan) begin
|
||||
fclass_mask[i] = {22'h0, a_type_s0[i].is_quiet, a_type_s0[i].is_signaling, 8'h0};
|
||||
else if (a_clss_s0[i].is_nan) begin
|
||||
fclass_mask[i] = {22'h0, a_clss_s0[i].is_quiet, a_clss_s0[i].is_signaling, 8'h0};
|
||||
end
|
||||
else begin
|
||||
fclass_mask[i] = QUT_NAN;
|
||||
|
@ -129,11 +138,11 @@ module VX_fp_ncomp #(
|
|||
reg [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
if (a_type_s0[i].is_nan && b_type_s0[i].is_nan)
|
||||
if (a_clss_s0[i].is_nan && b_clss_s0[i].is_nan)
|
||||
fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
|
||||
else if (a_type_s0[i].is_nan)
|
||||
else if (a_clss_s0[i].is_nan)
|
||||
fminmax_res[i] = datab_s0[i];
|
||||
else if (b_type_s0[i].is_nan)
|
||||
else if (b_clss_s0[i].is_nan)
|
||||
fminmax_res[i] = dataa_s0[i];
|
||||
else begin
|
||||
case (frm_s0) // use LSB to distinguish MIN and MAX
|
||||
|
@ -160,33 +169,33 @@ module VX_fp_ncomp #(
|
|||
|
||||
// Comparison
|
||||
reg [LANES-1:0][31:0] fcmp_res; // result of comparison
|
||||
fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags
|
||||
fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
case (frm_s0)
|
||||
`FRM_RNE: begin // LE
|
||||
`INST_FRM_RNE: begin // LE
|
||||
fcmp_fflags[i] = 5'h0;
|
||||
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
|
||||
if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin
|
||||
fcmp_res[i] = 32'h0;
|
||||
fcmp_fflags[i].NV = 1'b1;
|
||||
end else begin
|
||||
fcmp_res[i] = {31'h0, (a_smaller_s0[i] | ab_equal_s0[i])};
|
||||
end
|
||||
end
|
||||
`FRM_RTZ: begin // LS
|
||||
`INST_FRM_RTZ: begin // LS
|
||||
fcmp_fflags[i] = 5'h0;
|
||||
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
|
||||
if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin
|
||||
fcmp_res[i] = 32'h0;
|
||||
fcmp_fflags[i].NV = 1'b1;
|
||||
end else begin
|
||||
fcmp_res[i] = {31'h0, (a_smaller_s0[i] & ~ab_equal_s0[i])};
|
||||
end
|
||||
end
|
||||
`FRM_RDN: begin // EQ
|
||||
`INST_FRM_RDN: begin // EQ
|
||||
fcmp_fflags[i] = 5'h0;
|
||||
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
|
||||
if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin
|
||||
fcmp_res[i] = 32'h0;
|
||||
fcmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling;
|
||||
fcmp_fflags[i].NV = a_clss_s0[i].is_signaling | b_clss_s0[i].is_signaling;
|
||||
end else begin
|
||||
fcmp_res[i] = {31'h0, ab_equal_s0[i]};
|
||||
end
|
||||
|
@ -207,11 +216,11 @@ module VX_fp_ncomp #(
|
|||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
case (op_type_s0)
|
||||
`FPU_CLASS: begin
|
||||
`INST_FPU_CLASS: begin
|
||||
tmp_result[i] = fclass_mask[i];
|
||||
tmp_fflags[i] = 'x;
|
||||
end
|
||||
`FPU_CMP: begin
|
||||
`INST_FPU_CMP: begin
|
||||
tmp_result[i] = fcmp_res[i];
|
||||
tmp_fflags[i] = fcmp_fflags[i];
|
||||
end
|
||||
|
@ -225,11 +234,11 @@ module VX_fp_ncomp #(
|
|||
3,4: begin
|
||||
tmp_result[i] = fminmax_res[i];
|
||||
tmp_fflags[i] = 0;
|
||||
tmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling;
|
||||
tmp_fflags[i].NV = a_clss_s0[i].is_signaling | b_clss_s0[i].is_signaling;
|
||||
end
|
||||
//5,6,7: MOVE
|
||||
default: begin
|
||||
tmp_result[i] = dataa[i];
|
||||
tmp_result[i] = dataa_s0[i];
|
||||
tmp_fflags[i] = 'x;
|
||||
end
|
||||
endcase
|
||||
|
@ -238,15 +247,15 @@ module VX_fp_ncomp #(
|
|||
end
|
||||
end
|
||||
|
||||
wire has_fflags_s0 = ((op_type_s0 == `FPU_MISC)
|
||||
&& (frm_s0 == 3 // MIN
|
||||
|| frm_s0 == 4)) // MAX
|
||||
|| (op_type_s0 == `FPU_CMP); // CMP
|
||||
wire has_fflags_s0 = ((op_type_s0 == `INST_FPU_MISC)
|
||||
&& (frm_s0 == 3 // MIN
|
||||
|| frm_s0 == 4)) // MAX
|
||||
|| (op_type_s0 == `INST_FPU_CMP); // CMP
|
||||
|
||||
assign stall = ~ready_out && valid_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)),
|
||||
.DATAW (1 + TAGW + (LANES * 32) + 1 + (LANES * `FFLAGS_BITS)),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
|
@ -1,5 +1,4 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
/// Modified port of rouding module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
||||
|
@ -34,7 +33,7 @@ module VX_fp_rounding #(
|
|||
|
||||
always @(*) begin
|
||||
case (rnd_mode_i)
|
||||
`FRM_RNE: // Decide accoring to round/sticky bits
|
||||
`INST_FRM_RNE: // Decide accoring to round/sticky bits
|
||||
case (round_sticky_bits_i)
|
||||
2'b00,
|
||||
2'b01: round_up = 1'b0; // < ulp/2 away, round down
|
||||
|
@ -42,10 +41,10 @@ module VX_fp_rounding #(
|
|||
2'b11: round_up = 1'b1; // > ulp/2 away, round up
|
||||
default: round_up = 1'bx;
|
||||
endcase
|
||||
`FRM_RTZ: round_up = 1'b0; // always round down
|
||||
`FRM_RDN: round_up = (| round_sticky_bits_i) & sign_i; // to 0 if +, away if -
|
||||
`FRM_RUP: round_up = (| round_sticky_bits_i) & ~sign_i; // to 0 if -, away if +
|
||||
`FRM_RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
|
||||
`INST_FRM_RTZ: round_up = 1'b0; // always round down
|
||||
`INST_FRM_RDN: round_up = (| round_sticky_bits_i) & sign_i; // to 0 if +, away if -
|
||||
`INST_FRM_RUP: round_up = (| round_sticky_bits_i) & ~sign_i; // to 0 if -, away if +
|
||||
`INST_FRM_RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
|
||||
default: round_up = 1'bx; // propagate x
|
||||
endcase
|
||||
end
|
||||
|
@ -58,7 +57,7 @@ module VX_fp_rounding #(
|
|||
|
||||
// In case of effective subtraction (thus signs of addition operands must have differed) and a
|
||||
// true zero result, the result sign is '-' in case of RDN and '+' for other modes.
|
||||
assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `FRM_RDN)
|
||||
assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `INST_FRM_RDN)
|
||||
: sign_i;
|
||||
|
||||
endmodule
|
|
@ -1,8 +1,4 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fp_sqrt #(
|
||||
parameter TAGW = 1,
|
||||
|
@ -16,7 +12,7 @@ module VX_fp_sqrt #(
|
|||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
@ -38,7 +34,7 @@ module VX_fp_sqrt #(
|
|||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
dpi_fsqrt (dataa[i], frm, r, f);
|
||||
dpi_fsqrt (enable && valid_in, dataa[i], frm, r, f);
|
||||
end
|
||||
`UNUSED_VAR (f)
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_type (
|
||||
// inputs
|
||||
input [7:0] exp_i,
|
||||
input [22:0] man_i,
|
||||
// outputs
|
||||
output fp_type_t type_o
|
||||
);
|
||||
wire is_normal = (exp_i != 8'd0) && (exp_i != 8'hff);
|
||||
wire is_zero = (exp_i == 8'd0) && (man_i == 23'd0);
|
||||
wire is_subnormal = (exp_i == 8'd0) && (man_i != 23'd0);
|
||||
wire is_inf = (exp_i == 8'hff) && (man_i == 23'd0);
|
||||
wire is_nan = (exp_i == 8'hff) && (man_i != 23'd0);
|
||||
wire is_signaling = is_nan && (man_i[22] == 1'b0);
|
||||
wire is_quiet = is_nan && !is_signaling;
|
||||
|
||||
assign type_o.is_normal = is_normal;
|
||||
assign type_o.is_zero = is_zero;
|
||||
assign type_o.is_subnormal = is_subnormal;
|
||||
assign type_o.is_inf = is_inf;
|
||||
assign type_o.is_nan = is_nan;
|
||||
assign type_o.is_quiet = is_quiet;
|
||||
assign type_o.is_signaling = is_signaling;
|
||||
|
||||
endmodule
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue