mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge pull request #185 from vortexgpgpu/tensor-core
Merge tensor-core and devel branch into master
This commit is contained in:
commit
91c135ac15
323 changed files with 11284 additions and 25978 deletions
36
.github/workflows/ci.yml
vendored
36
.github/workflows/ci.yml
vendored
|
@ -21,13 +21,13 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v4
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
|
@ -36,7 +36,7 @@ jobs:
|
|||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v4
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
|
@ -46,7 +46,7 @@ jobs:
|
|||
- name: Install Dependencies
|
||||
if: steps.cache-toolchain.outputs.cache-hit != 'true' || steps.cache-thirdparty.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
sudo bash ./ci/system_updates.sh
|
||||
sudo bash ./ci/install_dependencies.sh
|
||||
|
||||
- name: Setup Toolchain
|
||||
if: steps.cache-toolchain.outputs.cache-hit != 'true'
|
||||
|
@ -71,15 +71,15 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo bash ./ci/system_updates.sh
|
||||
sudo bash ./ci/install_dependencies.sh
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v4
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
|
@ -88,7 +88,7 @@ jobs:
|
|||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v4
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
|
@ -106,31 +106,31 @@ jobs:
|
|||
make tests -s > /dev/null
|
||||
|
||||
- name: Upload Build Artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: build-${{ matrix.xlen }}
|
||||
path: build${{ matrix.xlen }}
|
||||
|
||||
test:
|
||||
tests:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: build
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
name: [regression, opencl, cache, config1, config2, debug, stress, vm]
|
||||
name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis, vm]
|
||||
xlen: [32, 64]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo bash ./ci/system_updates.sh
|
||||
sudo bash ./ci/install_dependencies.sh
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v4
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
|
@ -139,7 +139,7 @@ jobs:
|
|||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v4
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
|
@ -147,10 +147,11 @@ jobs:
|
|||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Download Build Artifact
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: build-${{ matrix.xlen }}
|
||||
path: build${{ matrix.xlen }}
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
cd build${{ matrix.xlen }}
|
||||
|
@ -160,7 +161,6 @@ jobs:
|
|||
./ci/regression.sh --unittest
|
||||
./ci/regression.sh --isa
|
||||
./ci/regression.sh --kernel
|
||||
./ci/regression.sh --synthesis
|
||||
./ci/regression.sh --regression
|
||||
else
|
||||
./ci/regression.sh --${{ matrix.name }}
|
||||
|
@ -168,7 +168,7 @@ jobs:
|
|||
|
||||
complete:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: test
|
||||
needs: tests
|
||||
|
||||
steps:
|
||||
- name: Check Completion
|
||||
|
|
6
.gitmodules
vendored
6
.gitmodules
vendored
|
@ -1,9 +1,9 @@
|
|||
[submodule "third_party/fpnew"]
|
||||
path = third_party/fpnew
|
||||
url = https://github.com/pulp-platform/fpnew.git
|
||||
[submodule "third_party/softfloat"]
|
||||
path = third_party/softfloat
|
||||
url = https://github.com/ucb-bar/berkeley-softfloat-3.git
|
||||
[submodule "third_party/ramulator"]
|
||||
path = third_party/ramulator
|
||||
url = https://github.com/CMU-SAFARI/ramulator2.git
|
||||
[submodule "third_party/cvfpu"]
|
||||
path = third_party/cvfpu
|
||||
url = https://github.com/openhwgroup/cvfpu.git
|
||||
|
|
26
README.md
26
README.md
|
@ -5,6 +5,7 @@ Vortex is a full-stack open-source RISC-V GPGPU.
|
|||
## Specifications
|
||||
|
||||
- Support RISC-V RV32IMAF and RV64IMAFD
|
||||
|
||||
- Microarchitecture:
|
||||
- configurable number of cores, warps, and threads.
|
||||
- configurable number of ALU, FPU, LSU, and SFU units per core.
|
||||
|
@ -32,31 +33,28 @@ Vortex is a full-stack open-source RISC-V GPGPU.
|
|||
## Build Instructions
|
||||
More detailed build instructions can be found [here](docs/install_vortex.md).
|
||||
### Supported OS Platforms
|
||||
- Ubuntu 18.04, 20.04
|
||||
- Ubuntu 18.04, 20.04, 22.04, 24.04
|
||||
- Centos 7
|
||||
### Toolchain Dependencies
|
||||
- [POCL](http://portablecl.org/)
|
||||
- [LLVM](https://llvm.org/)
|
||||
- [RISCV-GNU-TOOLCHAIN](https://github.com/riscv-collab/riscv-gnu-toolchain)
|
||||
- [Verilator](https://www.veripool.org/verilator)
|
||||
- [FpNew](https://github.com/pulp-platform/fpnew.git)
|
||||
- [cvfpu](https://github.com/openhwgroup/cvfpu.git)
|
||||
- [SoftFloat](https://github.com/ucb-bar/berkeley-softfloat-3.git)
|
||||
- [Ramulator](https://github.com/CMU-SAFARI/ramulator.git)
|
||||
- [Yosys](https://github.com/YosysHQ/yosys)
|
||||
- [Sv2v](https://github.com/zachjs/sv2v)
|
||||
### Install development tools
|
||||
```sh
|
||||
sudo apt-get install build-essential
|
||||
sudo apt-get install binutils
|
||||
sudo apt-get install python
|
||||
sudo apt-get install uuid-dev
|
||||
sudo apt-get install git
|
||||
```
|
||||
### Install Vortex codebase
|
||||
```sh
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
cd vortex
|
||||
```
|
||||
### Install system dependencies
|
||||
```sh
|
||||
# ensure dependent libraries are present
|
||||
sudo ./ci/install_dependencies.sh
|
||||
```
|
||||
### Configure your build folder
|
||||
```sh
|
||||
mkdir build
|
||||
|
@ -91,19 +89,19 @@ make -s
|
|||
make -s
|
||||
make install
|
||||
```
|
||||
- Building Vortex 64-bit simply requires using --xlen=64 configure option.
|
||||
- Building Vortex 64-bit requires setting --xlen=64 configure option.
|
||||
```sh
|
||||
../configure --xlen=32 --tooldir=$HOME/tools
|
||||
../configure --xlen=64 --tooldir=$HOME/tools
|
||||
```
|
||||
- Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source <build-path>/ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login.
|
||||
```sh
|
||||
echo "source <build-path>/ci/toolchain_env.sh" >> ~/.bashrc
|
||||
```
|
||||
- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder.
|
||||
- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again without any options to get changes propagated to your build folder.
|
||||
```sh
|
||||
../configure
|
||||
```
|
||||
- To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information.
|
||||
- To debug the GPU, the simulation can generate a runtime trace for analysis. See /docs/debugging.md for more information.
|
||||
```sh
|
||||
./ci/blackbox.sh --app=demo --debug=3
|
||||
```
|
||||
|
|
463
ci/blackbox.sh
463
ci/blackbox.sh
|
@ -13,6 +13,9 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
SCRIPT_DIR=$(dirname "$0")
|
||||
ROOT_DIR=$SCRIPT_DIR/..
|
||||
|
||||
show_usage()
|
||||
{
|
||||
echo "Vortex BlackBox Test Driver v1.0"
|
||||
|
@ -29,302 +32,174 @@ show_help()
|
|||
echo "--rebuild: 0=disable, 1=force, 2=auto, 3=temp"
|
||||
}
|
||||
|
||||
SCRIPT_DIR=$(dirname "$0")
|
||||
ROOT_DIR=$SCRIPT_DIR/..
|
||||
|
||||
DRIVER=simx
|
||||
APP=sgemm
|
||||
CLUSTERS=1
|
||||
CORES=1
|
||||
WARPS=4
|
||||
THREADS=4
|
||||
L2=
|
||||
L3=
|
||||
DEBUG=0
|
||||
DEBUG_LEVEL=0
|
||||
SCOPE=0
|
||||
HAS_ARGS=0
|
||||
PERF_CLASS=0
|
||||
REBUILD=2
|
||||
TEMPBUILD=0
|
||||
LOGFILE=run.log
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
case $i in
|
||||
--driver=*)
|
||||
DRIVER=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--app=*)
|
||||
APP=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--clusters=*)
|
||||
CLUSTERS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--cores=*)
|
||||
CORES=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--warps=*)
|
||||
WARPS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--threads=*)
|
||||
THREADS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--l2cache)
|
||||
L2=-DL2_ENABLE
|
||||
shift
|
||||
;;
|
||||
--l3cache)
|
||||
L3=-DL3_ENABLE
|
||||
shift
|
||||
;;
|
||||
--debug=*)
|
||||
DEBUG_LEVEL=${i#*=}
|
||||
DEBUG=1
|
||||
shift
|
||||
;;
|
||||
--scope)
|
||||
SCOPE=1
|
||||
CORES=1
|
||||
shift
|
||||
;;
|
||||
--perf=*)
|
||||
PERF_FLAG=-DPERF_ENABLE
|
||||
PERF_CLASS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--args=*)
|
||||
ARGS=${i#*=}
|
||||
HAS_ARGS=1
|
||||
shift
|
||||
;;
|
||||
--rebuild=*)
|
||||
REBUILD=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--log=*)
|
||||
LOGFILE=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
show_usage
|
||||
exit -1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $REBUILD -eq 3 ];
|
||||
then
|
||||
REBUILD=1
|
||||
TEMPBUILD=1
|
||||
fi
|
||||
|
||||
case $DRIVER in
|
||||
gpu)
|
||||
DRIVER_PATH=
|
||||
;;
|
||||
simx)
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/simx
|
||||
;;
|
||||
rtlsim)
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/rtlsim
|
||||
;;
|
||||
opae)
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/opae
|
||||
;;
|
||||
xrt)
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/xrt
|
||||
;;
|
||||
*)
|
||||
echo "invalid driver: $DRIVER"
|
||||
exit -1
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ -d "$ROOT_DIR/tests/opencl/$APP" ];
|
||||
then
|
||||
APP_PATH=$ROOT_DIR/tests/opencl/$APP
|
||||
elif [ -d "$ROOT_DIR/tests/regression/$APP" ];
|
||||
then
|
||||
APP_PATH=$ROOT_DIR/tests/regression/$APP
|
||||
else
|
||||
echo "Application folder not found: $APP"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
if [ "$DRIVER" = "gpu" ];
|
||||
then
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER"
|
||||
OPTS=$ARGS make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
add_option() {
|
||||
if [ -n "$1" ]; then
|
||||
echo "$1 $2"
|
||||
else
|
||||
echo "running: make -C $APP_PATH run-$DRIVER"
|
||||
make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
echo "$2"
|
||||
fi
|
||||
}
|
||||
|
||||
DEFAULTS() {
|
||||
DRIVER=simx
|
||||
APP=sgemm
|
||||
DEBUG=0
|
||||
DEBUG_LEVEL=0
|
||||
SCOPE=0
|
||||
HAS_ARGS=0
|
||||
PERF_CLASS=0
|
||||
CONFIGS="$CONFIGS"
|
||||
REBUILD=2
|
||||
TEMPBUILD=0
|
||||
LOGFILE=run.log
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
DEFAULTS
|
||||
for i in "$@"; do
|
||||
case $i in
|
||||
--driver=*) DRIVER=${i#*=} ;;
|
||||
--app=*) APP=${i#*=} ;;
|
||||
--clusters=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CLUSTERS=${i#*=}") ;;
|
||||
--cores=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CORES=${i#*=}") ;;
|
||||
--warps=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_WARPS=${i#*=}") ;;
|
||||
--threads=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_THREADS=${i#*=}") ;;
|
||||
--l2cache) CONFIGS=$(add_option "$CONFIGS" "-DL2_ENABLE") ;;
|
||||
--l3cache) CONFIGS=$(add_option "$CONFIGS" "-DL3_ENABLE") ;;
|
||||
--perf=*) CONFIGS=$(add_option "$CONFIGS" "-DPERF_ENABLE"); PERF_CLASS=${i#*=} ;;
|
||||
--debug=*) DEBUG=1; DEBUG_LEVEL=${i#*=} ;;
|
||||
--scope) SCOPE=1; ;;
|
||||
--args=*) HAS_ARGS=1; ARGS=${i#*=} ;;
|
||||
--rebuild=*) REBUILD=${i#*=} ;;
|
||||
--log=*) LOGFILE=${i#*=} ;;
|
||||
--help) show_help; exit 0 ;;
|
||||
*) show_usage; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $REBUILD -eq 3 ];
|
||||
then
|
||||
REBUILD=1
|
||||
TEMPBUILD=1
|
||||
fi
|
||||
}
|
||||
|
||||
set_driver_path() {
|
||||
case $DRIVER in
|
||||
gpu) DRIVER_PATH="" ;;
|
||||
simx|rtlsim|opae|xrt) DRIVER_PATH="$ROOT_DIR/runtime/$DRIVER" ;;
|
||||
*) echo "Invalid driver: $DRIVER"; exit 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
set_app_path() {
|
||||
if [ -d "$ROOT_DIR/tests/opencl/$APP" ]; then
|
||||
APP_PATH="$ROOT_DIR/tests/opencl/$APP"
|
||||
elif [ -d "$ROOT_DIR/tests/regression/$APP" ]; then
|
||||
APP_PATH="$ROOT_DIR/tests/regression/$APP"
|
||||
else
|
||||
echo "Application folder not found: $APP"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
build_driver() {
|
||||
local cmd_opts=""
|
||||
[ $DEBUG -ne 0 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=$DEBUG_LEVEL")
|
||||
[ $SCOPE -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "SCOPE=1")
|
||||
[ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DESTDIR=\"$TEMPDIR\"")
|
||||
[ -n "$CONFIGS" ] && cmd_opts=$(add_option "$cmd_opts" "CONFIGS=\"$CONFIGS\"")
|
||||
|
||||
if [ -n "$cmd_opts" ]; then
|
||||
echo "Running: $cmd_opts make -C $DRIVER_PATH > /dev/null"
|
||||
eval "$cmd_opts make -C $DRIVER_PATH > /dev/null"
|
||||
else
|
||||
echo "Running: make -C $DRIVER_PATH > /dev/null"
|
||||
make -C $DRIVER_PATH > /dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
run_app() {
|
||||
local cmd_opts=""
|
||||
[ $DEBUG -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=1")
|
||||
[ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "VORTEX_RT_PATH=\"$TEMPDIR\"")
|
||||
[ $HAS_ARGS -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "OPTS=\"$ARGS\"")
|
||||
|
||||
if [ $DEBUG -ne 0 ]; then
|
||||
if [ -n "$cmd_opts" ]; then
|
||||
echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
eval "$cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
else
|
||||
echo "Running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
fi
|
||||
else
|
||||
if [ -n "$cmd_opts" ]; then
|
||||
echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER"
|
||||
eval "$cmd_opts make -C $APP_PATH run-$DRIVER"
|
||||
else
|
||||
echo "Running: make -C $APP_PATH run-$DRIVER"
|
||||
make -C $APP_PATH run-$DRIVER
|
||||
fi
|
||||
fi
|
||||
status=$?
|
||||
return $status
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_args "$@"
|
||||
set_driver_path
|
||||
set_app_path
|
||||
|
||||
# execute on default installed GPU
|
||||
if [ "$DRIVER" = "gpu" ]; then
|
||||
run_app
|
||||
exit $?
|
||||
fi
|
||||
|
||||
if [ -n "$CONFIGS" ]; then
|
||||
echo "CONFIGS=$CONFIGS"
|
||||
fi
|
||||
|
||||
if [ $REBUILD -ne 0 ]; then
|
||||
BLACKBOX_CACHE=blackbox.$DRIVER.cache
|
||||
LAST_CONFIGS=$(cat "$BLACKBOX_CACHE" 2>/dev/null || echo "")
|
||||
|
||||
if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ]; then
|
||||
make -C $DRIVER_PATH clean-driver > /dev/null
|
||||
echo "$CONFIGS+$DEBUG+$SCOPE" > "$BLACKBOX_CACHE"
|
||||
fi
|
||||
fi
|
||||
|
||||
export VORTEX_PROFILING=$PERF_CLASS
|
||||
|
||||
make -C "$ROOT_DIR/hw" config > /dev/null
|
||||
make -C "$ROOT_DIR/runtime/stub" > /dev/null
|
||||
|
||||
if [ $TEMPBUILD -eq 1 ]; then
|
||||
# setup temp directory
|
||||
TEMPDIR=$(mktemp -d)
|
||||
mkdir -p "$TEMPDIR"
|
||||
# build stub driver
|
||||
echo "running: DESTDIR=$TEMPDIR make -C $ROOT_DIR/runtime/stub"
|
||||
DESTDIR="$TEMPDIR" make -C $ROOT_DIR/runtime/stub > /dev/null
|
||||
# register tempdir cleanup on exit
|
||||
trap "rm -rf $TEMPDIR" EXIT
|
||||
fi
|
||||
|
||||
build_driver
|
||||
run_app
|
||||
status=$?
|
||||
|
||||
if [ $DEBUG -eq 1 ] && [ -f "$APP_PATH/trace.vcd" ]; then
|
||||
mv -f $APP_PATH/trace.vcd .
|
||||
fi
|
||||
|
||||
if [ $SCOPE -eq 1 ] && [ -f "$APP_PATH/scope.vcd" ]; then
|
||||
mv -f $APP_PATH/scope.vcd .
|
||||
fi
|
||||
|
||||
exit $status
|
||||
fi
|
||||
}
|
||||
|
||||
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS $L2 $L3 $PERF_FLAG $CONFIGS"
|
||||
|
||||
echo "CONFIGS=$CONFIGS"
|
||||
|
||||
if [ $REBUILD -ne 0 ]
|
||||
then
|
||||
BLACKBOX_CACHE=blackbox.$DRIVER.cache
|
||||
if [ -f "$BLACKBOX_CACHE" ]
|
||||
then
|
||||
LAST_CONFIGS=`cat $BLACKBOX_CACHE`
|
||||
fi
|
||||
|
||||
if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ];
|
||||
then
|
||||
make -C $DRIVER_PATH clean-driver > /dev/null
|
||||
echo "$CONFIGS+$DEBUG+$SCOPE" > $BLACKBOX_CACHE
|
||||
fi
|
||||
fi
|
||||
|
||||
# export performance monitor class identifier
|
||||
export VORTEX_PROFILING=$PERF_CLASS
|
||||
|
||||
status=0
|
||||
|
||||
# ensure config update
|
||||
make -C $ROOT_DIR/hw config > /dev/null
|
||||
|
||||
# ensure the stub driver is present
|
||||
make -C $ROOT_DIR/runtime/stub > /dev/null
|
||||
|
||||
if [ $DEBUG -ne 0 ]
|
||||
then
|
||||
# running application
|
||||
if [ $TEMPBUILD -eq 1 ]
|
||||
then
|
||||
# setup temp directory
|
||||
TEMPDIR=$(mktemp -d)
|
||||
mkdir -p "$TEMPDIR/$DRIVER"
|
||||
|
||||
# driver initialization
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
echo "running: DESTDIR=$TEMPDIR/$DRIVER DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DESTDIR="$TEMPDIR/$DRIVER" DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
else
|
||||
echo "running: DESTDIR=$TEMPDIR/$DRIVER DEBUG=$DEBUG_LEVEL CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DESTDIR="$TEMPDIR/$DRIVER" DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
fi
|
||||
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
echo "running: VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
DEBUG=1 VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
status=$?
|
||||
else
|
||||
echo "running: VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
DEBUG=1 VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
status=$?
|
||||
fi
|
||||
|
||||
# cleanup temp directory
|
||||
trap "rm -rf $TEMPDIR" EXIT
|
||||
else
|
||||
# driver initialization
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
echo "running: DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
else
|
||||
echo "running: DEBUG=$DEBUG_LEVEL CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
fi
|
||||
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
DEBUG=1 OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
status=$?
|
||||
else
|
||||
echo "running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
DEBUG=1 make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
status=$?
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -f "$APP_PATH/trace.vcd" ]
|
||||
then
|
||||
mv -f $APP_PATH/trace.vcd .
|
||||
fi
|
||||
else
|
||||
if [ $TEMPBUILD -eq 1 ]
|
||||
then
|
||||
# setup temp directory
|
||||
TEMPDIR=$(mktemp -d)
|
||||
mkdir -p "$TEMPDIR/$DRIVER"
|
||||
|
||||
# driver initialization
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
echo "running: DESTDIR=$TEMPDIR/$DRIVER SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DESTDIR="$TEMPDIR/$DRIVER" SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
else
|
||||
echo "running: DESTDIR=$TEMPDIR/$DRIVER CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DESTDIR="$TEMPDIR/$DRIVER" CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
fi
|
||||
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
echo "running: VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER"
|
||||
VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
else
|
||||
echo "running: VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER"
|
||||
VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
fi
|
||||
|
||||
# cleanup temp directory
|
||||
trap "rm -rf $TEMPDIR" EXIT
|
||||
else
|
||||
|
||||
# driver initialization
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
echo "running: SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
else
|
||||
echo "running: CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
fi
|
||||
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER"
|
||||
OPTS=$ARGS make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
else
|
||||
echo "running: make -C $APP_PATH run-$DRIVER"
|
||||
make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
exit $status
|
||||
main "$@"
|
46
ci/install_dependencies.sh
Executable file
46
ci/install_dependencies.sh
Executable file
|
@ -0,0 +1,46 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -e
|
||||
|
||||
# Function to check if GCC version is less than 11
|
||||
check_gcc_version() {
|
||||
local gcc_version
|
||||
gcc_version=$(gcc -dumpversion)
|
||||
if dpkg --compare-versions "$gcc_version" lt 11; then
|
||||
return 0 # GCC version is less than 11
|
||||
else
|
||||
return 1 # GCC version is 11 or greater
|
||||
fi
|
||||
}
|
||||
|
||||
# Update package list
|
||||
apt-get update -y
|
||||
|
||||
# install system dependencies
|
||||
apt-get install -y build-essential valgrind libstdc++6 binutils python3 uuid-dev ccache
|
||||
|
||||
# Check and install GCC 11 if necessary
|
||||
if check_gcc_version; then
|
||||
echo "GCC version is less than 11. Installing GCC 11..."
|
||||
add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
apt-get update
|
||||
apt-get install -y g++-11 gcc-11
|
||||
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100
|
||||
else
|
||||
echo "GCC version is 11 or greater. No need to install GCC 11."
|
||||
fi
|
|
@ -43,31 +43,23 @@ isa()
|
|||
make -C tests/riscv/isa run-simx
|
||||
make -C tests/riscv/isa run-rtlsim
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
if [ "$XLEN" == "64" ]
|
||||
then
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64fx
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64fx
|
||||
fi
|
||||
|
||||
# clean build
|
||||
|
@ -102,10 +94,18 @@ regression()
|
|||
# test global barrier
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
|
||||
# test local barrier
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
|
||||
./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar"
|
||||
./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tbar"
|
||||
|
||||
# test temp driver mode for
|
||||
./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3
|
||||
|
||||
# test for matmul
|
||||
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
|
||||
|
||||
echo "regression tests done!"
|
||||
}
|
||||
|
@ -253,37 +253,39 @@ config2()
|
|||
# test opaesim
|
||||
./ci/blackbox.sh --driver=opae --app=printf
|
||||
./ci/blackbox.sh --driver=opae --app=diverge
|
||||
./ci/blackbox.sh --driver=xrt --app=diverge
|
||||
|
||||
# disable DPI
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
|
||||
if [ "$XLEN" == "64" ]; then
|
||||
# need to disable trig on 64-bit due to a bug inside fpnew's sqrt core.
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-xtrig -xbar -xgbar"
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-xtrig -xbar -xgbar"
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-xtrig -xbar -xgbar"
|
||||
else
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood
|
||||
fi
|
||||
|
||||
# custom program startup address
|
||||
make -C tests/regression/dogfood clean-kernel
|
||||
if [ "$XLEN" == "64" ]; then
|
||||
STARTUP_ADDR=0x180000000 make -C tests/regression/dogfood
|
||||
else
|
||||
STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood
|
||||
fi
|
||||
STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood
|
||||
./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
make -C tests/regression/dogfood clean-kernel
|
||||
|
||||
# disabling M & F extensions
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32i
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32i
|
||||
make -C sim/rtlsim clean
|
||||
|
||||
# disabling ZICOND extension
|
||||
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
|
||||
|
||||
# test AXI bus
|
||||
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
|
||||
# test 128-bit MEM block
|
||||
# test 128-bit memory block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
|
||||
# test XLEN-bit MEM block
|
||||
# test XLEN-bit memory block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
|
||||
|
@ -291,11 +293,27 @@ config2()
|
|||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
|
||||
|
||||
# test single-bank DRAM
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
# test single-bank memory
|
||||
if [ "$XLEN" == "64" ]; then
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
else
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
fi
|
||||
|
||||
# test 27-bit DRAM address
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
# test larger memory address
|
||||
if [ "$XLEN" == "64" ]; then
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
else
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
fi
|
||||
|
||||
# test memory banks interleaving
|
||||
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
|
||||
echo "configuration-2 tests done!"
|
||||
}
|
||||
|
@ -321,20 +339,32 @@ debug()
|
|||
|
||||
test_csv_trace
|
||||
|
||||
CONFIGS="-O0" ./ci/blackbox.sh --driver=opae --app=demo --args="-n1"
|
||||
CONFIGS="-O0" ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
|
||||
|
||||
echo "debugging tests done!"
|
||||
}
|
||||
|
||||
scope()
|
||||
{
|
||||
echo "begin scope tests..."
|
||||
|
||||
SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope
|
||||
SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope
|
||||
|
||||
echo "debugging scope done!"
|
||||
}
|
||||
|
||||
stress()
|
||||
{
|
||||
echo "begin stress tests..."
|
||||
|
||||
# test verilator reset values
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=xrt --app=sgemmx --args="-n128" --l2cache
|
||||
|
||||
echo "stress tests done!"
|
||||
}
|
||||
|
@ -352,7 +382,7 @@ synthesis()
|
|||
show_usage()
|
||||
{
|
||||
echo "Vortex Regression Test"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--all] [--h|--help]"
|
||||
}
|
||||
|
||||
declare -a tests=()
|
||||
|
@ -393,6 +423,9 @@ while [ "$1" != "" ]; do
|
|||
--debug )
|
||||
tests+=("debug")
|
||||
;;
|
||||
--scope )
|
||||
tests+=("scope")
|
||||
;;
|
||||
--stress )
|
||||
tests+=("stress")
|
||||
;;
|
||||
|
@ -411,6 +444,7 @@ while [ "$1" != "" ]; do
|
|||
tests+=("config1")
|
||||
tests+=("config2")
|
||||
tests+=("debug")
|
||||
tests+=("scope")
|
||||
tests+=("stress")
|
||||
tests+=("synthesis")
|
||||
;;
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -e
|
||||
|
||||
apt-get update -y
|
||||
|
||||
add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
apt-get update
|
||||
apt-get install -y g++-11 gcc-11
|
||||
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100
|
||||
|
||||
apt-get install -y build-essential valgrind libstdc++6 binutils python uuid-dev ccache
|
|
@ -23,9 +23,9 @@ OSVERSION=${OSVERSION:=@OSVERSION@}
|
|||
riscv32()
|
||||
{
|
||||
case $OSVERSION in
|
||||
"centos/7") parts=$(eval echo {a..h}) ;;
|
||||
"ubuntu/focal") parts=$(eval echo {a..k}) ;;
|
||||
*) parts=$(eval echo {a..j}) ;;
|
||||
"centos/7") parts=$(eval echo {a..l}) ;;
|
||||
"ubuntu/bionic") parts=$(eval echo {a..j}) ;;
|
||||
*) parts=$(eval echo {a..k}) ;;
|
||||
esac
|
||||
rm -f riscv32-gnu-toolchain.tar.bz2.parta*
|
||||
for x in $parts
|
||||
|
@ -41,7 +41,7 @@ riscv32()
|
|||
riscv64()
|
||||
{
|
||||
case $OSVERSION in
|
||||
"centos/7") parts=$(eval echo {a..h}) ;;
|
||||
"centos/7") parts=$(eval echo {a..l}) ;;
|
||||
*) parts=$(eval echo {a..j}) ;;
|
||||
esac
|
||||
rm -f riscv64-gnu-toolchain.tar.bz2.parta*
|
||||
|
|
|
@ -44,7 +44,8 @@ def load_config(filename):
|
|||
'num_barriers': int(config_match.group(7)),
|
||||
}
|
||||
return config
|
||||
return None
|
||||
print("Error: missing CONFIGS: header")
|
||||
sys.exit(1)
|
||||
|
||||
def parse_simx(log_lines):
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
|
@ -274,6 +275,8 @@ def split_log_file(log_filename):
|
|||
|
||||
if current_sublog is not None:
|
||||
sublogs.append(current_sublog)
|
||||
else:
|
||||
sublogs.append(log_lines)
|
||||
|
||||
return sublogs
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
|
|
|
@ -31,7 +31,4 @@ RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv$(XLEN)-gnu-toolchain
|
|||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
|
||||
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
|
||||
|
||||
VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime
|
||||
VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel
|
||||
|
||||
THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party
|
||||
|
|
4
configure
vendored
4
configure
vendored
|
@ -26,6 +26,8 @@ detect_osversion() {
|
|||
case "$VERSION_CODENAME" in
|
||||
bionic) osversion="ubuntu/bionic";;
|
||||
focal) osversion="ubuntu/focal";;
|
||||
jammy) osversion="ubuntu/focal";;
|
||||
noble) osversion="ubuntu/focal";;
|
||||
# Add new versions as needed
|
||||
esac
|
||||
;;
|
||||
|
@ -63,7 +65,7 @@ copy_files() {
|
|||
filename_no_ext="${filename%.in}"
|
||||
dest_file="$dest_dir/$filename_no_ext"
|
||||
mkdir -p "$dest_dir"
|
||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g" "$file" > "$dest_file"
|
||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@CURRENTDIR@|$CURRENT_DIR|g" "$file" > "$dest_file"
|
||||
# apply permissions to bash scripts
|
||||
read -r firstline < "$dest_file"
|
||||
if [[ "$firstline" =~ ^#!.*bash ]]; then
|
||||
|
|
|
@ -34,7 +34,7 @@ The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware
|
|||
- `NUM_THREADS`: Number of threads per warps
|
||||
- `PERF_ENABLE`: enable the use of all profile counters
|
||||
|
||||
You configure the syntesis build from the command line:
|
||||
You can configure the synthesis build from the command line:
|
||||
|
||||
$ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make
|
||||
|
||||
|
@ -43,7 +43,7 @@ OPAE Build Progress
|
|||
|
||||
You could check the last 10 lines in the build log for possible errors until build completion.
|
||||
|
||||
$ tail -n 10 <build_dir>/build.log
|
||||
$ tail -n 10 <build_dir>/synth/build.log
|
||||
|
||||
Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs.
|
||||
|
||||
|
@ -70,10 +70,23 @@ Sample FPGA Run Test
|
|||
|
||||
Ensure you have the correct opae runtime for the FPGA target
|
||||
|
||||
$ make -C runtime/opae clean
|
||||
$ TARGET=FPGA make -C runtime/opae
|
||||
|
||||
Run the following from your Vortex build directory
|
||||
|
||||
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"
|
||||
|
||||
Testing Vortex using OPAE with Intel ASE Simulation
|
||||
---------------------------------------------------
|
||||
|
||||
Building ASE synthesis
|
||||
|
||||
$ TARGET=asesim make -C runtime/opae
|
||||
|
||||
Building ASE runtime
|
||||
|
||||
$ TARGET=asesim make -C runtime/opae
|
||||
|
||||
Running ASE simulation
|
||||
|
||||
$ ASE_LOG=0 ASE_WORKDIR=<build_dir>/synth/work TARGET=asesim ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n16"
|
|
@ -33,4 +33,20 @@ Ensure you have the correct opae runtime for the FPGA target
|
|||
|
||||
Run the following from your Vortex build directory
|
||||
|
||||
$ TARGET=hw FPGA_BIN_DIR=<BUILD_DIR>/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128"
|
||||
$ TARGET=hw FPGA_BIN_DIR=<BUILD_DIR>/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128"
|
||||
|
||||
Testing Vortex using XRT Hardware Emulation
|
||||
-------------------------------------------
|
||||
|
||||
Building XRT's hw_emu target
|
||||
|
||||
$ cd hw/syn/xilinx/xrt
|
||||
$ PREFIX=test2 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw_emu make
|
||||
|
||||
Building XRT hw_meu runtime
|
||||
|
||||
$ TARGET=hw_emu make -C runtime/xrt
|
||||
|
||||
Running XRT hw_emu simulation
|
||||
|
||||
$ TARGET=hw_emu FPGA_BIN_DIR=<BUILD_DIR>/bin ./ci/blackbox.sh --driver=xrt --app=sgemm
|
|
@ -47,8 +47,6 @@ extern "C" {
|
|||
void dpi_trace(int level, const char* format, ...);
|
||||
void dpi_trace_start();
|
||||
void dpi_trace_stop();
|
||||
|
||||
uint64_t dpi_uuid_gen(bool reset, int wid);
|
||||
}
|
||||
|
||||
bool sim_trace_enabled();
|
||||
|
@ -204,17 +202,3 @@ void dpi_trace_start() {
|
|||
void dpi_trace_stop() {
|
||||
sim_trace_enable(false);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
std::unordered_map<uint32_t, uint32_t> g_uuid_gens;
|
||||
|
||||
uint64_t dpi_uuid_gen(bool reset, int wid) {
|
||||
if (reset) {
|
||||
g_uuid_gens.clear();
|
||||
return 0;
|
||||
}
|
||||
uint32_t instr_uuid = g_uuid_gens[wid]++;
|
||||
uint64_t uuid = (uint64_t(wid) << 32) | instr_uuid;
|
||||
return uuid;
|
||||
}
|
|
@ -30,6 +30,4 @@ import "DPI-C" function void dpi_trace(input int level, input string format /*ve
|
|||
import "DPI-C" function void dpi_trace_start();
|
||||
import "DPI-C" function void dpi_trace_stop();
|
||||
|
||||
import "DPI-C" function longint dpi_uuid_gen(input logic reset, input int wid);
|
||||
|
||||
`endif
|
||||
|
|
|
@ -56,14 +56,12 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
VX_gbar_bus_if per_socket_gbar_bus_if[`NUM_SOCKETS]();
|
||||
VX_gbar_bus_if gbar_bus_if();
|
||||
|
||||
`RESET_RELAY (gbar_reset, reset);
|
||||
|
||||
VX_gbar_arb #(
|
||||
.NUM_REQS (`NUM_SOCKETS),
|
||||
.OUT_BUF ((`NUM_SOCKETS > 2) ? 1 : 0) // bgar_unit has no backpressure
|
||||
) gbar_arb (
|
||||
.clk (clk),
|
||||
.reset (gbar_reset),
|
||||
.reset (reset),
|
||||
.bus_in_if (per_socket_gbar_bus_if),
|
||||
.bus_out_if (gbar_bus_if)
|
||||
);
|
||||
|
@ -72,7 +70,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
.INSTANCE_ID ($sformatf("gbar%0d", CLUSTER_ID))
|
||||
) gbar_unit (
|
||||
.clk (clk),
|
||||
.reset (gbar_reset),
|
||||
.reset (reset),
|
||||
.gbar_bus_if (gbar_bus_if)
|
||||
);
|
||||
|
||||
|
@ -102,8 +100,8 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
.WRITEBACK (`L2_WRITEBACK),
|
||||
.DIRTY_BYTES (`L2_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (3),
|
||||
.NC_ENABLE (1),
|
||||
.PASSTHRU (!`L2_ENABLED)
|
||||
) l2cache (
|
||||
|
@ -118,21 +116,17 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_dcr_bus_if socket_dcr_bus_tmp_if();
|
||||
assign socket_dcr_bus_tmp_if.write_valid = dcr_bus_if.write_valid && (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END);
|
||||
assign socket_dcr_bus_tmp_if.write_addr = dcr_bus_if.write_addr;
|
||||
assign socket_dcr_bus_tmp_if.write_data = dcr_bus_if.write_data;
|
||||
|
||||
wire [`NUM_SOCKETS-1:0] per_socket_busy;
|
||||
|
||||
VX_dcr_bus_if socket_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1));
|
||||
|
||||
// Generate all sockets
|
||||
for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : sockets
|
||||
for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : g_sockets
|
||||
|
||||
`RESET_RELAY (socket_reset, reset);
|
||||
|
||||
VX_dcr_bus_if socket_dcr_bus_if();
|
||||
wire is_base_dcr_addr = (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END);
|
||||
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, dcr_bus_if, is_base_dcr_addr, (`NUM_SOCKETS > 1))
|
||||
|
||||
VX_socket #(
|
||||
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id),
|
||||
.INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id))
|
||||
|
|
|
@ -111,6 +111,24 @@
|
|||
`define SOCKET_SIZE `MIN(4, `NUM_CORES)
|
||||
`endif
|
||||
|
||||
// Size of Tensor Core
|
||||
`ifndef TC_SIZE
|
||||
`define TC_SIZE 8
|
||||
`endif
|
||||
|
||||
// Number of TCs per Warp
|
||||
`ifndef TC_NUM
|
||||
`define TC_NUM 4
|
||||
`endif
|
||||
|
||||
`ifndef NUM_TCU_LANES
|
||||
`define NUM_TCU_LANES `TC_NUM
|
||||
`endif
|
||||
|
||||
`ifndef NUM_TCU_BLOCKS
|
||||
`define NUM_TCU_BLOCKS `ISSUE_WIDTH
|
||||
`endif
|
||||
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_ENABLED 1
|
||||
`else
|
||||
|
@ -159,7 +177,7 @@
|
|||
`endif
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
`define STARTUP_ADDR 64'h180000000
|
||||
`define STARTUP_ADDR 64'h080000000
|
||||
`endif
|
||||
|
||||
`ifndef USER_BASE_ADDR
|
||||
|
@ -172,7 +190,7 @@
|
|||
|
||||
`ifdef VM_ENABLE
|
||||
`ifndef PAGE_TABLE_BASE_ADDR
|
||||
`define PAGE_TABLE_BASE_ADDR 64'h1F0000000
|
||||
`define PAGE_TABLE_BASE_ADDR 64'h0F0000000
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
@ -229,15 +247,17 @@
|
|||
`endif
|
||||
`define STACK_SIZE (1 << `STACK_LOG2_SIZE)
|
||||
|
||||
`define RESET_DELAY 8
|
||||
`define RESET_DELAY 8
|
||||
|
||||
`ifndef STALL_TIMEOUT
|
||||
`define STALL_TIMEOUT (100000 * (1 ** (`L2_ENABLED + `L3_ENABLED)))
|
||||
`endif
|
||||
|
||||
`ifndef SV_DPI
|
||||
`ifndef DPI_DISABLE
|
||||
`define DPI_DISABLE
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef FPU_FPNEW
|
||||
`ifndef FPU_DSP
|
||||
|
@ -719,7 +739,7 @@
|
|||
`endif
|
||||
|
||||
`ifndef MEMORY_BANKS
|
||||
`define MEMORY_BANKS 8
|
||||
`define MEMORY_BANKS 2
|
||||
`endif
|
||||
|
||||
// Number of Memory Ports from LLC
|
||||
|
|
|
@ -50,10 +50,16 @@
|
|||
`define PERF_CTR_BITS 44
|
||||
|
||||
`ifndef NDEBUG
|
||||
`define UUID_ENABLE
|
||||
`define UUID_WIDTH 44
|
||||
`else
|
||||
`ifdef SCOPE
|
||||
`define UUID_ENABLE
|
||||
`define UUID_WIDTH 44
|
||||
`else
|
||||
`define UUID_WIDTH 1
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`define PC_BITS (`XLEN-1)
|
||||
`define OFFSET_BITS 12
|
||||
|
@ -227,22 +233,19 @@
|
|||
`define INST_FENCE_D 1'h0
|
||||
`define INST_FENCE_I 1'h1
|
||||
|
||||
`define INST_FPU_ADD 4'b0000
|
||||
`define INST_FPU_SUB 4'b0001
|
||||
`define INST_FPU_MUL 4'b0010
|
||||
`define INST_FPU_DIV 4'b0011
|
||||
`define INST_FPU_SQRT 4'b0100
|
||||
`define INST_FPU_CMP 4'b0101 // frm: LE=0, LT=1, EQ=2
|
||||
`define INST_FPU_F2F 4'b0110
|
||||
`define INST_FPU_MISC 4'b0111 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
|
||||
`define INST_FPU_F2I 4'b1000
|
||||
`define INST_FPU_F2U 4'b1001
|
||||
`define INST_FPU_I2F 4'b1010
|
||||
`define INST_FPU_U2F 4'b1011
|
||||
`define INST_FPU_MADD 4'b1100
|
||||
`define INST_FPU_MSUB 4'b1101
|
||||
`define INST_FPU_NMSUB 4'b1110
|
||||
`define INST_FPU_NMADD 4'b1111
|
||||
`define INST_FPU_ADD 4'b0000 // SUB=fmt[1]
|
||||
`define INST_FPU_MUL 4'b0001
|
||||
`define INST_FPU_MADD 4'b0010 // SUB=fmt[1]
|
||||
`define INST_FPU_NMADD 4'b0011 // SUB=fmt[1]
|
||||
`define INST_FPU_DIV 4'b0100
|
||||
`define INST_FPU_SQRT 4'b0101
|
||||
`define INST_FPU_F2I 4'b1000 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
|
||||
`define INST_FPU_F2U 4'b1001 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
|
||||
`define INST_FPU_I2F 4'b1010 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
|
||||
`define INST_FPU_U2F 4'b1011 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
|
||||
`define INST_FPU_CMP 4'b1100 // frm: LE=0, LT=1, EQ=2
|
||||
`define INST_FPU_F2F 4'b1101 // fmt[0]: F32=0, F64=1
|
||||
`define INST_FPU_MISC 4'b1110 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
|
||||
`define INST_FPU_BITS 4
|
||||
`define INST_FPU_IS_CLASS(op, frm) (op == `INST_FPU_MISC && frm == 3)
|
||||
`define INST_FPU_IS_MVXW(op, frm) (op == `INST_FPU_MISC && frm == 4)
|
||||
|
@ -267,14 +270,14 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks) \
|
||||
(`CLOG2(mshr_size) + `CLOG2(num_banks))
|
||||
`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width) \
|
||||
(uuid_width + `CLOG2(mshr_size) + `CLOG2(num_banks))
|
||||
|
||||
`define CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width) \
|
||||
(`CLOG2(num_reqs) + `CLOG2(line_size / word_size) + tag_width)
|
||||
|
||||
`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width) \
|
||||
(`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), `CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width)) + 1)
|
||||
`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, uuid_width) \
|
||||
(`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width), `CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width)) + 1)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -284,14 +287,14 @@
|
|||
`define CACHE_CLUSTER_MEM_ARB_TAG(tag_width, num_caches) \
|
||||
(tag_width + `ARB_SEL_BITS(`UP(num_caches), 1))
|
||||
|
||||
`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), num_caches)
|
||||
`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches, uuid_width) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches)
|
||||
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches, uuid_width) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches), uuid_width), num_caches)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -303,10 +306,10 @@
|
|||
`define L1_ENABLE
|
||||
`endif
|
||||
|
||||
`define ADDR_TYPE_FLUSH 0
|
||||
`define ADDR_TYPE_IO 1
|
||||
`define ADDR_TYPE_LOCAL 2 // shoud be last since optional
|
||||
`define ADDR_TYPE_WIDTH (`ADDR_TYPE_LOCAL + `LMEM_ENABLED)
|
||||
`define MEM_REQ_FLAG_FLUSH 0
|
||||
`define MEM_REQ_FLAG_IO 1
|
||||
`define MEM_REQ_FLAG_LOCAL 2 // shoud be last since optional
|
||||
`define MEM_REQ_FLAGS_WIDTH (`MEM_REQ_FLAG_LOCAL + `LMEM_ENABLED)
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
||||
`define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE))
|
||||
|
@ -320,6 +323,18 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define NEG_EDGE(dst, src) \
|
||||
wire dst; \
|
||||
VX_edge_trigger #( \
|
||||
.POS (0), \
|
||||
.INIT (0) \
|
||||
) __``dst``__ ( \
|
||||
.clk (clk), \
|
||||
.reset (1'b0), \
|
||||
.data_in (src), \
|
||||
.data_out (dst) \
|
||||
)
|
||||
|
||||
`define BUFFER_EX(dst, src, ena, latency) \
|
||||
VX_pipe_register #( \
|
||||
.DATAW ($bits(dst)), \
|
||||
|
@ -359,43 +374,60 @@
|
|||
assign src.rsp_data = dst.rsp_data; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define ASSIGN_VX_MEM_BUS_RO_IF(dst, src) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data.rw = 0; \
|
||||
assign dst.req_data.addr = src.req_data.addr; \
|
||||
assign dst.req_data.data = '0; \
|
||||
assign dst.req_data.byteen = '1; \
|
||||
assign dst.req_data.flags = src.req_data.flags; \
|
||||
assign dst.req_data.tag = src.req_data.tag; \
|
||||
assign src.req_ready = dst.req_ready; \
|
||||
assign src.rsp_valid = dst.rsp_valid; \
|
||||
assign src.rsp_data.data = dst.rsp_data.data; \
|
||||
assign src.rsp_data.tag = dst.rsp_data.tag; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define ASSIGN_VX_MEM_BUS_IF_X(dst, src, TD, TS) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data.rw = src.req_data.rw; \
|
||||
assign dst.req_data.byteen = src.req_data.byteen; \
|
||||
assign dst.req_data.addr = src.req_data.addr; \
|
||||
assign dst.req_data.atype = src.req_data.atype; \
|
||||
assign dst.req_data.data = src.req_data.data; \
|
||||
if (TD != TS) \
|
||||
assign dst.req_data.byteen = src.req_data.byteen; \
|
||||
assign dst.req_data.flags = src.req_data.flags; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (TD != TS) begin \
|
||||
assign dst.req_data.tag = {src.req_data.tag, {(TD-TS){1'b0}}}; \
|
||||
else \
|
||||
end else begin \
|
||||
assign dst.req_data.tag = src.req_data.tag; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */ \
|
||||
assign src.req_ready = dst.req_ready; \
|
||||
assign src.rsp_valid = dst.rsp_valid; \
|
||||
assign src.rsp_data.data = dst.rsp_data.data; \
|
||||
assign src.rsp_data.tag = dst.rsp_data.tag[TD-1 -: TS]; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define ASSIGN_VX_LSU_MEM_IF(dst, src) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data = src.req_data; \
|
||||
assign src.req_ready = dst.req_ready; \
|
||||
assign src.rsp_valid = dst.rsp_valid; \
|
||||
assign src.rsp_data = dst.rsp_data; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define BUFFER_DCR_BUS_IF(dst, src, enable) \
|
||||
if (enable) begin \
|
||||
reg [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __dst; \
|
||||
always @(posedge clk) begin \
|
||||
__dst <= {src.write_valid, src.write_addr, src.write_data}; \
|
||||
end \
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = __dst; \
|
||||
`define BUFFER_DCR_BUS_IF(dst, src, ena, latency) \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (latency != 0) begin \
|
||||
VX_pipe_register #( \
|
||||
.DATAW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \
|
||||
.DEPTH (latency) \
|
||||
) pipe_reg ( \
|
||||
.clk (clk), \
|
||||
.reset (1'b0), \
|
||||
.enable (1'b1), \
|
||||
.data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \
|
||||
.data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \
|
||||
); \
|
||||
end else begin \
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid, src.write_addr, src.write_data}; \
|
||||
end
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid && ena, src.write_addr, src.write_data}; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (count > 1) begin \
|
||||
wire [count-1:0][width-1:0] __reduce_add_i_field; \
|
||||
wire [width-1:0] __reduce_add_o_field; \
|
||||
|
@ -421,9 +453,11 @@
|
|||
end \
|
||||
end else begin \
|
||||
assign dst.``field = src[0].``field; \
|
||||
end
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (block_size != 1) begin \
|
||||
if (block_size != `NUM_WARPS) begin \
|
||||
assign dst = {src[`NW_WIDTH-1:`CLOG2(block_size)], `CLOG2(block_size)'(block_idx)}; \
|
||||
|
@ -432,6 +466,7 @@
|
|||
end \
|
||||
end else begin \
|
||||
assign dst = src; \
|
||||
end
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`endif // VX_DEFINE_VH
|
||||
|
|
|
@ -166,7 +166,7 @@ package VX_gpu_pkg;
|
|||
|
||||
// Memory request tag bits
|
||||
`ifdef ICACHE_ENABLE
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES);
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES, `UUID_WIDTH);
|
||||
`else
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
|
||||
`endif
|
||||
|
@ -197,7 +197,7 @@ package VX_gpu_pkg;
|
|||
|
||||
// Memory request tag bits
|
||||
`ifdef DCACHE_ENABLE
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES, `UUID_WIDTH);
|
||||
`else
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
||||
`endif
|
||||
|
@ -226,7 +226,7 @@ package VX_gpu_pkg;
|
|||
|
||||
// Memory request tag bits
|
||||
`ifdef L2_ENABLE
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH, `UUID_WIDTH);
|
||||
`else
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
||||
`endif
|
||||
|
@ -247,7 +247,7 @@ package VX_gpu_pkg;
|
|||
|
||||
// Memory request tag bits
|
||||
`ifdef L3_ENABLE
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH, `UUID_WIDTH);
|
||||
`else
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
||||
`endif
|
||||
|
@ -308,6 +308,430 @@ package VX_gpu_pkg;
|
|||
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
////////////////////////////////// Tracing ////////////////////////////////////
|
||||
|
||||
`ifdef SIMULATION
|
||||
|
||||
`ifdef SV_DPI
|
||||
import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/);
|
||||
`endif
|
||||
|
||||
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
|
||||
case (ex_type)
|
||||
`EX_ALU: `TRACE(level, ("ALU"))
|
||||
`EX_LSU: `TRACE(level, ("LSU"))
|
||||
`EX_SFU: `TRACE(level, ("SFU"))
|
||||
`ifdef EXT_F_ENABLE
|
||||
`EX_FPU: `TRACE(level, ("FPU"))
|
||||
`endif
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_ex_op(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
case (op_args.alu.xtype)
|
||||
`ALU_TYPE_ARITH: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDIW"))
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLIW"))
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLIW"))
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAIW"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDW"))
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUBW"))
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLW"))
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLW"))
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAW"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDI"))
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLI"))
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLI"))
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAI"))
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLTI"))
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"))
|
||||
`INST_ALU_XOR: `TRACE(level, ("XORI"))
|
||||
`INST_ALU_OR: `TRACE(level, ("ORI"))
|
||||
`INST_ALU_AND: `TRACE(level, ("ANDI"))
|
||||
`INST_ALU_LUI: `TRACE(level, ("LUI"))
|
||||
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADD"))
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUB"))
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLL"))
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRL"))
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRA"))
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLT"))
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTU"))
|
||||
`INST_ALU_XOR: `TRACE(level, ("XOR"))
|
||||
`INST_ALU_OR: `TRACE(level, ("OR"))
|
||||
`INST_ALU_AND: `TRACE(level, ("AND"))
|
||||
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"))
|
||||
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
`ALU_TYPE_BRANCH: begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: `TRACE(level, ("BEQ"))
|
||||
`INST_BR_NE: `TRACE(level, ("BNE"))
|
||||
`INST_BR_LT: `TRACE(level, ("BLT"))
|
||||
`INST_BR_GE: `TRACE(level, ("BGE"))
|
||||
`INST_BR_LTU: `TRACE(level, ("BLTU"))
|
||||
`INST_BR_GEU: `TRACE(level, ("BGEU"))
|
||||
`INST_BR_JAL: `TRACE(level, ("JAL"))
|
||||
`INST_BR_JALR: `TRACE(level, ("JALR"))
|
||||
`INST_BR_ECALL: `TRACE(level, ("ECALL"))
|
||||
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"))
|
||||
`INST_BR_URET: `TRACE(level, ("URET"))
|
||||
`INST_BR_SRET: `TRACE(level, ("SRET"))
|
||||
`INST_BR_MRET: `TRACE(level, ("MRET"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
`ALU_TYPE_MULDIV: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MULW"))
|
||||
`INST_M_DIV: `TRACE(level, ("DIVW"))
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVUW"))
|
||||
`INST_M_REM: `TRACE(level, ("REMW"))
|
||||
`INST_M_REMU: `TRACE(level, ("REMUW"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MUL"))
|
||||
`INST_M_MULH: `TRACE(level, ("MULH"))
|
||||
`INST_M_MULHSU:`TRACE(level, ("MULHSU"))
|
||||
`INST_M_MULHU: `TRACE(level, ("MULHU"))
|
||||
`INST_M_DIV: `TRACE(level, ("DIV"))
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVU"))
|
||||
`INST_M_REM: `TRACE(level, ("REM"))
|
||||
`INST_M_REMU: `TRACE(level, ("REMU"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
`EX_LSU: begin
|
||||
if (op_args.lsu.is_float) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LW: `TRACE(level, ("FLW"))
|
||||
`INST_LSU_LD: `TRACE(level, ("FLD"))
|
||||
`INST_LSU_SW: `TRACE(level, ("FSW"))
|
||||
`INST_LSU_SD: `TRACE(level, ("FSD"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: `TRACE(level, ("LB"))
|
||||
`INST_LSU_LH: `TRACE(level, ("LH"))
|
||||
`INST_LSU_LW: `TRACE(level, ("LW"))
|
||||
`INST_LSU_LD: `TRACE(level, ("LD"))
|
||||
`INST_LSU_LBU:`TRACE(level, ("LBU"))
|
||||
`INST_LSU_LHU:`TRACE(level, ("LHU"))
|
||||
`INST_LSU_LWU:`TRACE(level, ("LWU"))
|
||||
`INST_LSU_SB: `TRACE(level, ("SB"))
|
||||
`INST_LSU_SH: `TRACE(level, ("SH"))
|
||||
`INST_LSU_SW: `TRACE(level, ("SW"))
|
||||
`INST_LSU_SD: `TRACE(level, ("SD"))
|
||||
`INST_LSU_FENCE:`TRACE(level,("FENCE"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_SFU: begin
|
||||
case (`INST_SFU_BITS'(op_type))
|
||||
`INST_SFU_TMC: `TRACE(level, ("TMC"))
|
||||
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"))
|
||||
`INST_SFU_SPLIT: begin
|
||||
if (op_args.wctl.is_neg) begin
|
||||
`TRACE(level, ("SPLIT.N"))
|
||||
end else begin
|
||||
`TRACE(level, ("SPLIT"))
|
||||
end
|
||||
end
|
||||
`INST_SFU_JOIN: `TRACE(level, ("JOIN"))
|
||||
`INST_SFU_BAR: `TRACE(level, ("BAR"))
|
||||
`INST_SFU_PRED: begin
|
||||
if (op_args.wctl.is_neg) begin
|
||||
`TRACE(level, ("PRED.N"))
|
||||
end else begin
|
||||
`TRACE(level, ("PRED"))
|
||||
end
|
||||
end
|
||||
`INST_SFU_CSRRW: begin
|
||||
if (op_args.csr.use_imm) begin
|
||||
`TRACE(level, ("CSRRWI"))
|
||||
end else begin
|
||||
`TRACE(level, ("CSRRW"))
|
||||
end
|
||||
end
|
||||
`INST_SFU_CSRRS: begin
|
||||
if (op_args.csr.use_imm) begin
|
||||
`TRACE(level, ("CSRRSI"))
|
||||
end else begin
|
||||
`TRACE(level, ("CSRRS"))
|
||||
end
|
||||
end
|
||||
`INST_SFU_CSRRC: begin
|
||||
if (op_args.csr.use_imm) begin
|
||||
`TRACE(level, ("CSRRCI"))
|
||||
end else begin
|
||||
`TRACE(level, ("CSRRC"))
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`EX_FPU: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FSUB.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FSUB.S"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FADD.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FADD.S"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MADD: begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FMSUB.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FMSUB.S"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FMADD.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FMADD.S"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_NMADD: begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FNMSUB.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FNMSUB.S"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FNMADD.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FNMADD.S"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MUL: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FMUL.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FMUL.S"))
|
||||
end
|
||||
end
|
||||
`INST_FPU_DIV: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FDIV.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FDIV.S"))
|
||||
end
|
||||
end
|
||||
`INST_FPU_SQRT: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FSQRT.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FSQRT.S"))
|
||||
end
|
||||
end
|
||||
`INST_FPU_CMP: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.D"))
|
||||
1: `TRACE(level, ("FLT.D"))
|
||||
2: `TRACE(level, ("FEQ.D"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.S"))
|
||||
1: `TRACE(level, ("FLT.S"))
|
||||
2: `TRACE(level, ("FEQ.S"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FCVT.D.S"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.D"))
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2I: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.D"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.S"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.S"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2U: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.D"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.S"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.S"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_I2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.L"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.W"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.L"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.W"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_U2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.LU"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.WU"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.LU"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.WU"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MISC: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.D"))
|
||||
1: `TRACE(level, ("FSGNJN.D"))
|
||||
2: `TRACE(level, ("FSGNJX.D"))
|
||||
3: `TRACE(level, ("FCLASS.D"))
|
||||
4: `TRACE(level, ("FMV.X.D"))
|
||||
5: `TRACE(level, ("FMV.D.X"))
|
||||
6: `TRACE(level, ("FMIN.D"))
|
||||
7: `TRACE(level, ("FMAX.D"))
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.S"))
|
||||
1: `TRACE(level, ("FSGNJN.S"))
|
||||
2: `TRACE(level, ("FSGNJX.S"))
|
||||
3: `TRACE(level, ("FCLASS.S"))
|
||||
4: `TRACE(level, ("FMV.X.S"))
|
||||
5: `TRACE(level, ("FMV.S.X"))
|
||||
6: `TRACE(level, ("FMIN.S"))
|
||||
7: `TRACE(level, ("FMAX.S"))
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
`endif
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_op_args(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
`TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm))
|
||||
end
|
||||
`EX_LSU: begin
|
||||
`TRACE(level, (", offset=0x%0h", op_args.lsu.offset))
|
||||
end
|
||||
`EX_SFU: begin
|
||||
if (`INST_SFU_IS_CSR(op_type)) begin
|
||||
`TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm))
|
||||
end
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`EX_FPU: begin
|
||||
`TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm))
|
||||
end
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
|
||||
case (addr)
|
||||
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"))
|
||||
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"))
|
||||
`VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0"))
|
||||
`VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1"))
|
||||
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
endtask
|
||||
|
||||
`endif
|
||||
|
||||
endpackage
|
||||
|
||||
`endif // VX_GPU_PKG_VH
|
||||
|
|
|
@ -22,36 +22,39 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef VIVADO
|
||||
`define STRING
|
||||
`else
|
||||
`define STRING string
|
||||
`endif
|
||||
`ifdef SIMULATION
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
`define TRACING_ON
|
||||
`define TRACING_OFF
|
||||
`ifndef NDEBUG
|
||||
`define DEBUG_BLOCK(x) x
|
||||
`else
|
||||
`define DEBUG_BLOCK(x)
|
||||
`endif
|
||||
`define IGNORE_UNOPTFLAT_BEGIN
|
||||
`define IGNORE_UNOPTFLAT_END
|
||||
`define IGNORE_UNUSED_BEGIN
|
||||
`define IGNORE_UNUSED_END
|
||||
`define IGNORE_WARNINGS_BEGIN
|
||||
`define IGNORE_WARNINGS_END
|
||||
`define UNUSED_PARAM(x)
|
||||
`define UNUSED_SPARAM(x)
|
||||
`define UNUSED_VAR(x)
|
||||
`define UNUSED_PIN(x) . x ()
|
||||
`define UNUSED_ARG(x) x
|
||||
`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args
|
||||
`else
|
||||
`ifdef VERILATOR
|
||||
`define STATIC_ASSERT(cond, msg) \
|
||||
generate \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (!(cond)) $error msg; \
|
||||
/* verilator lint_on GENUNNAMED */ \
|
||||
endgenerate
|
||||
|
||||
`define ERROR(msg) \
|
||||
$error msg
|
||||
|
||||
`define ASSERT(cond, msg) \
|
||||
assert(cond) else $error msg
|
||||
|
||||
`define RUNTIME_ASSERT(cond, msg) \
|
||||
always @(posedge clk) begin \
|
||||
assert(cond) else $error msg; \
|
||||
end
|
||||
|
||||
`define __SCOPE
|
||||
`define __SCOPE_X
|
||||
`define __SCOPE_ON
|
||||
`define __SCOPE_OFF
|
||||
|
||||
`ifndef TRACING_ALL
|
||||
`define TRACING_ON /* verilator tracing_on */
|
||||
`define TRACING_OFF /* verilator tracing_off */
|
||||
`else
|
||||
`define TRACING_ON
|
||||
`define TRACING_OFF
|
||||
`endif
|
||||
|
||||
`ifndef NDEBUG
|
||||
`define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \
|
||||
x \
|
||||
|
@ -100,49 +103,68 @@
|
|||
localparam `STRING __``x = x; \
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
`define UNUSED_VAR(x) if (1) begin \
|
||||
`define UNUSED_VAR(x) /* verilator lint_off GENUNNAMED */ \
|
||||
if (1) begin \
|
||||
/* verilator lint_off UNUSED */ \
|
||||
wire [$bits(x)-1:0] __x = x; \
|
||||
/* verilator lint_on UNUSED */ \
|
||||
end
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \
|
||||
. x () \
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
`define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \
|
||||
x \
|
||||
/* verilator lint_on UNUSED */
|
||||
`endif
|
||||
|
||||
`ifdef SV_DPI
|
||||
`define TRACE(level, args) dpi_trace(level, $sformatf args)
|
||||
`define TRACE(level, args) dpi_trace(level, $sformatf args);
|
||||
`else
|
||||
`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args
|
||||
`define TRACE(level, args) \
|
||||
if (level <= `DEBUG_LEVEL) begin \
|
||||
$write args; \
|
||||
end
|
||||
`endif
|
||||
|
||||
`endif
|
||||
`else // SYNTHESIS
|
||||
|
||||
`ifdef SIMULATION
|
||||
`define STATIC_ASSERT(cond, msg) \
|
||||
generate \
|
||||
if (!(cond)) $error msg; \
|
||||
endgenerate
|
||||
`define STATIC_ASSERT(cond, msg)
|
||||
`define ERROR(msg) //
|
||||
`define ASSERT(cond, msg) //
|
||||
`define RUNTIME_ASSERT(cond, msg)
|
||||
|
||||
`define ERROR(msg) \
|
||||
$error msg
|
||||
`define DEBUG_BLOCK(x)
|
||||
`define TRACE(level, args)
|
||||
|
||||
`define ASSERT(cond, msg) \
|
||||
assert(cond) else $error msg
|
||||
`define TRACING_ON
|
||||
`define TRACING_OFF
|
||||
|
||||
`define IGNORE_UNOPTFLAT_BEGIN
|
||||
`define IGNORE_UNOPTFLAT_END
|
||||
`define IGNORE_UNUSED_BEGIN
|
||||
`define IGNORE_UNUSED_END
|
||||
`define IGNORE_WARNINGS_BEGIN
|
||||
`define IGNORE_WARNINGS_END
|
||||
`define UNUSED_PARAM(x)
|
||||
`define UNUSED_SPARAM(x)
|
||||
`define UNUSED_VAR(x)
|
||||
`define UNUSED_PIN(x) . x ()
|
||||
`define UNUSED_ARG(x) x
|
||||
|
||||
`define __SCOPE (* mark_debug="true" *)
|
||||
|
||||
`define __SCOPE_X
|
||||
|
||||
`define __SCOPE_ON \
|
||||
`undef __SCOPE_X \
|
||||
`define __SCOPE_X `__SCOPE
|
||||
|
||||
`define __SCOPE_OFF \
|
||||
`undef __SCOPE_X \
|
||||
`define __SCOPE_X
|
||||
|
||||
`define RUNTIME_ASSERT(cond, msg) \
|
||||
always @(posedge clk) begin \
|
||||
assert(cond) else $error msg; \
|
||||
end
|
||||
`else
|
||||
`define STATIC_ASSERT(cond, msg)
|
||||
`define ERROR(msg) //
|
||||
`define ASSERT(cond, msg) //
|
||||
`define RUNTIME_ASSERT(cond, msg)
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -154,6 +176,7 @@
|
|||
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
|
||||
`define DISABLE_BRAM (* ramstyle = "logic" *)
|
||||
`define PRESERVE_NET (* preserve *)
|
||||
`define STRING string
|
||||
`elsif VIVADO
|
||||
`define MAX_FANOUT 8
|
||||
`define IF_DATA_SIZE(x) $bits(x.data)
|
||||
|
@ -161,6 +184,7 @@
|
|||
`define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *)
|
||||
`define DISABLE_BRAM (* ram_style = "registers" *)
|
||||
`define PRESERVE_NET (* keep = "true" *)
|
||||
`define STRING
|
||||
`else
|
||||
`define MAX_FANOUT 8
|
||||
`define IF_DATA_SIZE(x) x.DATA_WIDTH
|
||||
|
@ -168,6 +192,7 @@
|
|||
`define NO_RW_RAM_CHECK
|
||||
`define DISABLE_BRAM
|
||||
`define PRESERVE_NET
|
||||
`define STRING string
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -204,23 +229,23 @@
|
|||
`define SEXT(len, x) {{(len-$bits(x)+1){x[$bits(x)-1]}}, x[$bits(x)-2:0]}
|
||||
|
||||
`define TRACE_ARRAY1D(lvl, fmt, arr, n) \
|
||||
`TRACE(lvl, ("{")); \
|
||||
`TRACE(lvl, ("{")) \
|
||||
for (integer __i = (n-1); __i >= 0; --__i) begin \
|
||||
if (__i != (n-1)) `TRACE(lvl, (", ")); \
|
||||
`TRACE(lvl, (fmt, arr[__i])); \
|
||||
if (__i != (n-1)) `TRACE(lvl, (", ")) \
|
||||
`TRACE(lvl, (fmt, arr[__i])) \
|
||||
end \
|
||||
`TRACE(lvl, ("}"));
|
||||
`TRACE(lvl, ("}"))
|
||||
|
||||
`define TRACE_ARRAY2D(lvl, fmt, arr, m, n) \
|
||||
`TRACE(lvl, ("{")); \
|
||||
`TRACE(lvl, ("{")) \
|
||||
for (integer __i = n-1; __i >= 0; --__i) begin \
|
||||
if (__i != (n-1)) `TRACE(lvl, (", ")); \
|
||||
`TRACE(lvl, ("{")); \
|
||||
if (__i != (n-1)) `TRACE(lvl, (", ")) \
|
||||
`TRACE(lvl, ("{")) \
|
||||
for (integer __j = (m-1); __j >= 0; --__j) begin \
|
||||
if (__j != (m-1)) `TRACE(lvl, (", "));\
|
||||
`TRACE(lvl, (fmt, arr[__i][__j])); \
|
||||
if (__j != (m-1)) `TRACE(lvl, (", "))\
|
||||
`TRACE(lvl, (fmt, arr[__i][__j])) \
|
||||
end \
|
||||
`TRACE(lvl, ("}")); \
|
||||
`TRACE(lvl, ("}")) \
|
||||
end \
|
||||
`TRACE(lvl, ("}"))
|
||||
|
||||
|
@ -239,10 +264,13 @@
|
|||
`RESET_RELAY_EX (dst, src, 1, 0)
|
||||
|
||||
// size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2, 5 -> 2
|
||||
`define TO_OUT_BUF_SIZE(s) `MIN(s, 2)
|
||||
`define TO_OUT_BUF_SIZE(s) `MIN(s & 7, 2)
|
||||
|
||||
// reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2, 5 > 3
|
||||
`define TO_OUT_BUF_REG(s) ((s < 2) ? s : (s - 2))
|
||||
`define TO_OUT_BUF_REG(s) (((s & 7) < 2) ? (s & 7) : ((s & 7) - 2))
|
||||
|
||||
// lut(x): (x & 8) != 0
|
||||
`define TO_OUT_BUF_LUTRAM(s) ((s & 8) != 0)
|
||||
|
||||
`define REPEAT(n,f,s) `_REPEAT_``n(f,s)
|
||||
`define _REPEAT_0(f,s)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,48 +21,67 @@
|
|||
input wire scope_bus_in, \
|
||||
output wire scope_bus_out,
|
||||
|
||||
`define SCOPE_IO_SWITCH(__count) \
|
||||
wire scope_bus_in_w [__count]; \
|
||||
wire scope_bus_out_w [__count]; \
|
||||
`RESET_RELAY_EX(scope_reset_w, scope_reset, __count, 4); \
|
||||
VX_scope_switch #( \
|
||||
.N (__count) \
|
||||
) scope_switch ( \
|
||||
.clk (clk), \
|
||||
.reset (scope_reset), \
|
||||
.req_in (scope_bus_in), \
|
||||
.rsp_out (scope_bus_out), \
|
||||
.req_out (scope_bus_in_w), \
|
||||
.rsp_in (scope_bus_out_w) \
|
||||
);
|
||||
|
||||
`define SCOPE_IO_BIND(__i) \
|
||||
.scope_reset (scope_reset_w[__i]), \
|
||||
.scope_bus_in (scope_bus_in_w[__i]), \
|
||||
.scope_bus_out (scope_bus_out_w[__i]),
|
||||
|
||||
`define SCOPE_IO_UNUSED() \
|
||||
`UNUSED_VAR (scope_reset); \
|
||||
`UNUSED_VAR (scope_bus_in); \
|
||||
assign scope_bus_out = 0;
|
||||
|
||||
`define SCOPE_IO_UNUSED_W(__i) \
|
||||
`define SCOPE_IO_UNUSED(__i) \
|
||||
`UNUSED_VAR (scope_reset_w[__i]); \
|
||||
`UNUSED_VAR (scope_bus_in_w[__i]); \
|
||||
assign scope_bus_out_w[__i] = 0;
|
||||
|
||||
`define SCOPE_IO_SWITCH(__count) \
|
||||
wire [__count-1:0] scope_bus_in_w; \
|
||||
wire [__count-1:0] scope_bus_out_w; \
|
||||
wire [__count-1:0] scope_reset_w = {__count{scope_reset}}; \
|
||||
VX_scope_switch #( \
|
||||
.N (__count) \
|
||||
) scope_switch ( \
|
||||
.clk (clk), \
|
||||
.reset (scope_reset), \
|
||||
.req_in (scope_bus_in), \
|
||||
.rsp_out (scope_bus_out), \
|
||||
.req_out (scope_bus_in_w), \
|
||||
.rsp_in (scope_bus_out_w) \
|
||||
)
|
||||
|
||||
`define SCOPE_TAP_EX(__idx, __id, __xtriggers_w, __htriggers_w, __probes_w, __xtriggers, __htriggers, __probes, __start, __stop, __depth) \
|
||||
VX_scope_tap #( \
|
||||
.SCOPE_ID (__id), \
|
||||
.XTRIGGERW(__xtriggers_w), \
|
||||
.HTRIGGERW(__htriggers_w), \
|
||||
.PROBEW (__probes_w), \
|
||||
.DEPTH (__depth) \
|
||||
) scope_tap_``idx ( \
|
||||
.clk (clk), \
|
||||
.reset (scope_reset_w[__idx]), \
|
||||
.start (__start), \
|
||||
.stop (__stop), \
|
||||
.xtriggers(__xtriggers), \
|
||||
.htriggers(__htriggers), \
|
||||
.probes (__probes), \
|
||||
.bus_in (scope_bus_in_w[__idx]), \
|
||||
.bus_out(scope_bus_out_w[__idx]) \
|
||||
)
|
||||
|
||||
`define SCOPE_TAP(__idx, __id, __xtriggers, __htriggers, __probes, __start, __stop, __depth) \
|
||||
`SCOPE_TAP_EX(__idx, __id, $bits(__xtriggers), $bits(__htriggers), $bits(__probes), __xtriggers, __htriggers, __probes, __start, __stop, __depth)
|
||||
|
||||
`else
|
||||
|
||||
`define SCOPE_IO_DECL
|
||||
|
||||
`define SCOPE_IO_SWITCH(__count)
|
||||
|
||||
`define SCOPE_IO_BIND(__i)
|
||||
|
||||
`define SCOPE_IO_UNUSED_W(__i)
|
||||
|
||||
`define SCOPE_IO_UNUSED(__i)
|
||||
|
||||
`define SCOPE_IO_SWITCH(__count)
|
||||
|
||||
`define SCOPE_TAP(__idx, __id, __xtriggers, __probes, __depth)
|
||||
|
||||
`define SCOPE_TAP_EX(__idx, __id, __xtriggers_w, __probes_w, __xtriggers, __probes, __depth)
|
||||
|
||||
`endif
|
||||
|
||||
`endif // VX_SCOPE_VH
|
||||
|
|
|
@ -49,14 +49,12 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE]();
|
||||
|
||||
`RESET_RELAY (gbar_arb_reset, reset);
|
||||
|
||||
VX_gbar_arb #(
|
||||
.NUM_REQS (`SOCKET_SIZE),
|
||||
.OUT_BUF ((`SOCKET_SIZE > 1) ? 2 : 0)
|
||||
) gbar_arb (
|
||||
.clk (clk),
|
||||
.reset (gbar_arb_reset),
|
||||
.reset (reset),
|
||||
.bus_in_if (per_core_gbar_bus_if),
|
||||
.bus_out_if (gbar_bus_if)
|
||||
);
|
||||
|
@ -105,7 +103,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.WRITE_ENABLE (0),
|
||||
.NC_ENABLE (0),
|
||||
.CORE_OUT_BUF (2),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (2)
|
||||
) icache (
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -152,7 +150,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.WRITEBACK (`DCACHE_WRITEBACK),
|
||||
.DIRTY_BYTES (`DCACHE_WRITEBACK),
|
||||
.NC_ENABLE (1),
|
||||
.CORE_OUT_BUF (2),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (2)
|
||||
) dcache (
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -180,13 +178,13 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[1], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATA_SIZE (`L1_LINE_SIZE),
|
||||
.TAG_WIDTH (L1_MEM_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (0),
|
||||
.ARBITER ("R"),
|
||||
.REQ_OUT_BUF (2),
|
||||
.RSP_OUT_BUF (2)
|
||||
.NUM_INPUTS (2),
|
||||
.DATA_SIZE (`L1_LINE_SIZE),
|
||||
.TAG_WIDTH (L1_MEM_TAG_WIDTH),
|
||||
.TAG_SEL_IDX(0),
|
||||
.ARBITER ("P"), // prioritize the icache
|
||||
.REQ_OUT_BUF(3),
|
||||
.RSP_OUT_BUF(3)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -200,14 +198,14 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [`SOCKET_SIZE-1:0] per_core_busy;
|
||||
|
||||
VX_dcr_bus_if core_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1));
|
||||
|
||||
// Generate all cores
|
||||
for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : cores
|
||||
for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : g_cores
|
||||
|
||||
`RESET_RELAY (core_reset, reset);
|
||||
|
||||
VX_dcr_bus_if core_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, 1'b1, (`SOCKET_SIZE > 1))
|
||||
|
||||
VX_core #(
|
||||
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id),
|
||||
.INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id))
|
||||
|
|
|
@ -201,4 +201,10 @@
|
|||
`define VX_CSR_NUM_CORES 12'hFC2
|
||||
`define VX_CSR_LOCAL_MEM_BASE 12'hFC3
|
||||
|
||||
`define VX_MAT_MUL_SIZE 12'hFC4 // VX_MAT_MUL_SIZE = Matrix Size / TC Size
|
||||
`define VX_TC_NUM 12'hFC5
|
||||
`define VX_TC_SIZE 12'hFC6
|
||||
|
||||
|
||||
|
||||
`endif // VX_TYPES_VH
|
||||
|
|
|
@ -86,8 +86,8 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
.WRITEBACK (`L3_WRITEBACK),
|
||||
.DIRTY_BYTES (`L3_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (3),
|
||||
.NC_ENABLE (1),
|
||||
.PASSTHRU (!`L3_ENABLED)
|
||||
) l3cache (
|
||||
|
@ -109,7 +109,7 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
`UNUSED_VAR (mem_bus_if.req_data.atype)
|
||||
`UNUSED_VAR (mem_bus_if.req_data.flags)
|
||||
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_data.data = mem_rsp_data;
|
||||
|
@ -129,12 +129,12 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
||||
// Generate all clusters
|
||||
for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters
|
||||
for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : g_clusters
|
||||
|
||||
`RESET_RELAY (cluster_reset, reset);
|
||||
|
||||
VX_dcr_bus_if cluster_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
|
||||
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, 1'b1, (`NUM_CLUSTERS > 1))
|
||||
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID (cluster_id),
|
||||
|
@ -189,16 +189,26 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
|
||||
`endif
|
||||
|
||||
// dump device configuration
|
||||
initial begin
|
||||
`TRACE(0, ("CONFIGS: num_threads=%0d, num_warps=%0d, num_cores=%0d, num_clusters=%0d, socket_size=%0d, local_mem_base=0x%0h, num_barriers=%0d\n",
|
||||
`NUM_THREADS, `NUM_WARPS, `NUM_CORES, `NUM_CLUSTERS, `SOCKET_SIZE, `LMEM_BASE_ADDR, `NUM_BARRIERS))
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_MEM
|
||||
wire [`UUID_WIDTH-1:0] mem_req_uuid = mem_req_tag[`VX_MEM_TAG_WIDTH-1 -: `UUID_WIDTH];
|
||||
wire [`UUID_WIDTH-1:0] mem_rsp_uuid = mem_rsp_tag[`VX_MEM_TAG_WIDTH-1 -: `UUID_WIDTH];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw)
|
||||
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data));
|
||||
else
|
||||
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen));
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid))
|
||||
end else begin
|
||||
`TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid))
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data));
|
||||
`TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
module Vortex_axi import VX_gpu_pkg::*; #(
|
||||
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH + (`VX_MEM_DATA_WIDTH/8),
|
||||
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
|
||||
parameter AXI_NUM_BANKS = 1
|
||||
)(
|
||||
|
@ -82,9 +82,11 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
// Status
|
||||
output wire busy
|
||||
);
|
||||
`STATIC_ASSERT((AXI_DATA_WIDTH == `VX_MEM_DATA_WIDTH), ("invalid memory data size: current=%0d, expected=%0d", AXI_DATA_WIDTH, `VX_MEM_DATA_WIDTH))
|
||||
`STATIC_ASSERT((AXI_ADDR_WIDTH >= `MEM_ADDR_WIDTH), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH))
|
||||
//`STATIC_ASSERT((AXI_TID_WIDTH >= `VX_MEM_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, `VX_MEM_TAG_WIDTH))
|
||||
localparam DST_LDATAW = `CLOG2(AXI_DATA_WIDTH);
|
||||
localparam SRC_LDATAW = `CLOG2(`VX_MEM_DATA_WIDTH);
|
||||
localparam SUB_LDATAW = DST_LDATAW - SRC_LDATAW;
|
||||
localparam VX_MEM_TAG_A_WIDTH = `VX_MEM_TAG_WIDTH + `MAX(SUB_LDATAW, 0);
|
||||
localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH - SUB_LDATAW;
|
||||
|
||||
wire mem_req_valid;
|
||||
wire mem_req_rw;
|
||||
|
@ -99,95 +101,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_awaddr_unqual [AXI_NUM_BANKS];
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_araddr_unqual [AXI_NUM_BANKS];
|
||||
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_awid_unqual [AXI_NUM_BANKS];
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_arid_unqual [AXI_NUM_BANKS];
|
||||
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual [AXI_NUM_BANKS];
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual [AXI_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin
|
||||
assign m_axi_awaddr[i] = `MEM_ADDR_WIDTH'(m_axi_awaddr_unqual[i]);
|
||||
assign m_axi_araddr[i] = `MEM_ADDR_WIDTH'(m_axi_araddr_unqual[i]);
|
||||
|
||||
assign m_axi_awid[i] = AXI_TID_WIDTH'(m_axi_awid_unqual[i]);
|
||||
assign m_axi_arid[i] = AXI_TID_WIDTH'(m_axi_arid_unqual[i]);
|
||||
|
||||
assign m_axi_rid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_rid[i]);
|
||||
assign m_axi_bid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_bid[i]);
|
||||
end
|
||||
|
||||
VX_axi_adapter #(
|
||||
.DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`VX_MEM_TAG_WIDTH),
|
||||
.NUM_BANKS (AXI_NUM_BANKS),
|
||||
.RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0)
|
||||
) axi_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid (mem_req_valid),
|
||||
.mem_req_rw (mem_req_rw),
|
||||
.mem_req_byteen (mem_req_byteen),
|
||||
.mem_req_addr (mem_req_addr),
|
||||
.mem_req_data (mem_req_data),
|
||||
.mem_req_tag (mem_req_tag),
|
||||
.mem_req_ready (mem_req_ready),
|
||||
|
||||
.mem_rsp_valid (mem_rsp_valid),
|
||||
.mem_rsp_data (mem_rsp_data),
|
||||
.mem_rsp_tag (mem_rsp_tag),
|
||||
.mem_rsp_ready (mem_rsp_ready),
|
||||
|
||||
.m_axi_awvalid (m_axi_awvalid),
|
||||
.m_axi_awready (m_axi_awready),
|
||||
.m_axi_awaddr (m_axi_awaddr_unqual),
|
||||
.m_axi_awid (m_axi_awid_unqual),
|
||||
.m_axi_awlen (m_axi_awlen),
|
||||
.m_axi_awsize (m_axi_awsize),
|
||||
.m_axi_awburst (m_axi_awburst),
|
||||
.m_axi_awlock (m_axi_awlock),
|
||||
.m_axi_awcache (m_axi_awcache),
|
||||
.m_axi_awprot (m_axi_awprot),
|
||||
.m_axi_awqos (m_axi_awqos),
|
||||
.m_axi_awregion (m_axi_awregion),
|
||||
|
||||
.m_axi_wvalid (m_axi_wvalid),
|
||||
.m_axi_wready (m_axi_wready),
|
||||
.m_axi_wdata (m_axi_wdata),
|
||||
.m_axi_wstrb (m_axi_wstrb),
|
||||
.m_axi_wlast (m_axi_wlast),
|
||||
|
||||
.m_axi_bvalid (m_axi_bvalid),
|
||||
.m_axi_bready (m_axi_bready),
|
||||
.m_axi_bid (m_axi_bid_unqual),
|
||||
.m_axi_bresp (m_axi_bresp),
|
||||
|
||||
.m_axi_arvalid (m_axi_arvalid),
|
||||
.m_axi_arready (m_axi_arready),
|
||||
.m_axi_araddr (m_axi_araddr_unqual),
|
||||
.m_axi_arid (m_axi_arid_unqual),
|
||||
.m_axi_arlen (m_axi_arlen),
|
||||
.m_axi_arsize (m_axi_arsize),
|
||||
.m_axi_arburst (m_axi_arburst),
|
||||
.m_axi_arlock (m_axi_arlock),
|
||||
.m_axi_arcache (m_axi_arcache),
|
||||
.m_axi_arprot (m_axi_arprot),
|
||||
.m_axi_arqos (m_axi_arqos),
|
||||
.m_axi_arregion (m_axi_arregion),
|
||||
|
||||
.m_axi_rvalid (m_axi_rvalid),
|
||||
.m_axi_rready (m_axi_rready),
|
||||
.m_axi_rdata (m_axi_rdata),
|
||||
.m_axi_rlast (m_axi_rlast) ,
|
||||
.m_axi_rid (m_axi_rid_unqual),
|
||||
.m_axi_rresp (m_axi_rresp)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (1)
|
||||
`SCOPE_IO_SWITCH (1);
|
||||
|
||||
Vortex vortex (
|
||||
`SCOPE_IO_BIND (0)
|
||||
|
@ -215,4 +129,128 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
.busy (busy)
|
||||
);
|
||||
|
||||
wire mem_req_valid_a;
|
||||
wire mem_req_rw_a;
|
||||
wire [(AXI_DATA_WIDTH/8)-1:0] mem_req_byteen_a;
|
||||
wire [VX_MEM_ADDR_A_WIDTH-1:0] mem_req_addr_a;
|
||||
wire [AXI_DATA_WIDTH-1:0] mem_req_data_a;
|
||||
wire [VX_MEM_TAG_A_WIDTH-1:0] mem_req_tag_a;
|
||||
wire mem_req_ready_a;
|
||||
|
||||
wire mem_rsp_valid_a;
|
||||
wire [AXI_DATA_WIDTH-1:0] mem_rsp_data_a;
|
||||
wire [VX_MEM_TAG_A_WIDTH-1:0] mem_rsp_tag_a;
|
||||
wire mem_rsp_ready_a;
|
||||
|
||||
VX_mem_adapter #(
|
||||
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.DST_DATA_WIDTH (AXI_DATA_WIDTH),
|
||||
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
|
||||
.DST_ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH),
|
||||
.SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
|
||||
.DST_TAG_WIDTH (VX_MEM_TAG_A_WIDTH),
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (0)
|
||||
) mem_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid_in (mem_req_valid),
|
||||
.mem_req_addr_in (mem_req_addr),
|
||||
.mem_req_rw_in (mem_req_rw),
|
||||
.mem_req_byteen_in (mem_req_byteen),
|
||||
.mem_req_data_in (mem_req_data),
|
||||
.mem_req_tag_in (mem_req_tag),
|
||||
.mem_req_ready_in (mem_req_ready),
|
||||
|
||||
.mem_rsp_valid_in (mem_rsp_valid),
|
||||
.mem_rsp_data_in (mem_rsp_data),
|
||||
.mem_rsp_tag_in (mem_rsp_tag),
|
||||
.mem_rsp_ready_in (mem_rsp_ready),
|
||||
|
||||
.mem_req_valid_out (mem_req_valid_a),
|
||||
.mem_req_addr_out (mem_req_addr_a),
|
||||
.mem_req_rw_out (mem_req_rw_a),
|
||||
.mem_req_byteen_out (mem_req_byteen_a),
|
||||
.mem_req_data_out (mem_req_data_a),
|
||||
.mem_req_tag_out (mem_req_tag_a),
|
||||
.mem_req_ready_out (mem_req_ready_a),
|
||||
|
||||
.mem_rsp_valid_out (mem_rsp_valid_a),
|
||||
.mem_rsp_data_out (mem_rsp_data_a),
|
||||
.mem_rsp_tag_out (mem_rsp_tag_a),
|
||||
.mem_rsp_ready_out (mem_rsp_ready_a)
|
||||
);
|
||||
|
||||
VX_axi_adapter #(
|
||||
.DATA_WIDTH (AXI_DATA_WIDTH),
|
||||
.ADDR_WIDTH_IN (VX_MEM_ADDR_A_WIDTH),
|
||||
.ADDR_WIDTH_OUT (AXI_ADDR_WIDTH),
|
||||
.TAG_WIDTH_IN (VX_MEM_TAG_A_WIDTH),
|
||||
.TAG_WIDTH_OUT (AXI_TID_WIDTH),
|
||||
.NUM_BANKS (AXI_NUM_BANKS),
|
||||
.BANK_INTERLEAVE(0),
|
||||
.RSP_OUT_BUF ((AXI_NUM_BANKS > 1) ? 2 : 0)
|
||||
) axi_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid (mem_req_valid_a),
|
||||
.mem_req_rw (mem_req_rw_a),
|
||||
.mem_req_byteen (mem_req_byteen_a),
|
||||
.mem_req_addr (mem_req_addr_a),
|
||||
.mem_req_data (mem_req_data_a),
|
||||
.mem_req_tag (mem_req_tag_a),
|
||||
.mem_req_ready (mem_req_ready_a),
|
||||
|
||||
.mem_rsp_valid (mem_rsp_valid_a),
|
||||
.mem_rsp_data (mem_rsp_data_a),
|
||||
.mem_rsp_tag (mem_rsp_tag_a),
|
||||
.mem_rsp_ready (mem_rsp_ready_a),
|
||||
|
||||
.m_axi_awvalid (m_axi_awvalid),
|
||||
.m_axi_awready (m_axi_awready),
|
||||
.m_axi_awaddr (m_axi_awaddr),
|
||||
.m_axi_awid (m_axi_awid),
|
||||
.m_axi_awlen (m_axi_awlen),
|
||||
.m_axi_awsize (m_axi_awsize),
|
||||
.m_axi_awburst (m_axi_awburst),
|
||||
.m_axi_awlock (m_axi_awlock),
|
||||
.m_axi_awcache (m_axi_awcache),
|
||||
.m_axi_awprot (m_axi_awprot),
|
||||
.m_axi_awqos (m_axi_awqos),
|
||||
.m_axi_awregion (m_axi_awregion),
|
||||
|
||||
.m_axi_wvalid (m_axi_wvalid),
|
||||
.m_axi_wready (m_axi_wready),
|
||||
.m_axi_wdata (m_axi_wdata),
|
||||
.m_axi_wstrb (m_axi_wstrb),
|
||||
.m_axi_wlast (m_axi_wlast),
|
||||
|
||||
.m_axi_bvalid (m_axi_bvalid),
|
||||
.m_axi_bready (m_axi_bready),
|
||||
.m_axi_bid (m_axi_bid),
|
||||
.m_axi_bresp (m_axi_bresp),
|
||||
|
||||
.m_axi_arvalid (m_axi_arvalid),
|
||||
.m_axi_arready (m_axi_arready),
|
||||
.m_axi_araddr (m_axi_araddr),
|
||||
.m_axi_arid (m_axi_arid),
|
||||
.m_axi_arlen (m_axi_arlen),
|
||||
.m_axi_arsize (m_axi_arsize),
|
||||
.m_axi_arburst (m_axi_arburst),
|
||||
.m_axi_arlock (m_axi_arlock),
|
||||
.m_axi_arcache (m_axi_arcache),
|
||||
.m_axi_arprot (m_axi_arprot),
|
||||
.m_axi_arqos (m_axi_arqos),
|
||||
.m_axi_arregion (m_axi_arregion),
|
||||
|
||||
.m_axi_rvalid (m_axi_rvalid),
|
||||
.m_axi_rready (m_axi_rready),
|
||||
.m_axi_rdata (m_axi_rdata),
|
||||
.m_axi_rlast (m_axi_rlast),
|
||||
.m_axi_rid (m_axi_rid),
|
||||
.m_axi_rresp (m_axi_rresp)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -30,7 +30,17 @@
|
|||
|
||||
//`include "platform_afu_top_config.vh"
|
||||
|
||||
`ifdef PLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH (`PLATFORM_MEMORY_ADDR_WIDTH - $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8))
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH `PLATFORM_MEMORY_DATA_WIDTH
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH `PLATFORM_MEMORY_BURST_CNT_WIDTH
|
||||
`endif
|
||||
|
||||
package local_mem_cfg_pkg;
|
||||
|
||||
|
@ -57,5 +67,3 @@ package local_mem_cfg_pkg;
|
|||
typedef logic [LOCAL_MEM_DATA_N_BYTES-1:0] t_local_mem_byte_mask;
|
||||
|
||||
endpackage // local_mem_cfg_pkg
|
||||
|
||||
`endif // PLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
|
|
|
@ -18,6 +18,10 @@
|
|||
`endif
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef PLATFORM_MEMORY_INTERLEAVE
|
||||
`define PLATFORM_MEMORY_INTERLEAVE 1
|
||||
`endif
|
||||
|
||||
module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_gpu_pkg::*; #(
|
||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||
) (
|
||||
|
@ -40,16 +44,17 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
output t_local_mem_burst_cnt avs_burstcount [NUM_LOCAL_MEM_BANKS],
|
||||
input wire avs_readdatavalid [NUM_LOCAL_MEM_BANKS]
|
||||
);
|
||||
|
||||
localparam LMEM_DATA_WIDTH = $bits(t_local_mem_data);
|
||||
localparam LMEM_DATA_SIZE = LMEM_DATA_WIDTH / 8;
|
||||
localparam LMEM_ADDR_WIDTH = $bits(t_local_mem_addr);
|
||||
localparam LMEM_ADDR_WIDTH = `VX_MEM_ADDR_WIDTH + ($clog2(`VX_MEM_DATA_WIDTH) - $clog2(LMEM_DATA_WIDTH));
|
||||
localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt);
|
||||
|
||||
localparam CCI_DATA_WIDTH = $bits(t_ccip_clData);
|
||||
localparam CCI_DATA_SIZE = CCI_DATA_WIDTH / 8;
|
||||
localparam CCI_ADDR_WIDTH = $bits(t_ccip_clAddr);
|
||||
|
||||
localparam RESET_CTR_WIDTH = `CLOG2(`RESET_DELAY+1);
|
||||
|
||||
localparam AVS_RD_QUEUE_SIZE = 32;
|
||||
localparam _VX_MEM_TAG_WIDTH = `VX_MEM_TAG_WIDTH;
|
||||
localparam _AVS_REQ_TAGW_VX = _VX_MEM_TAG_WIDTH + `CLOG2(LMEM_DATA_WIDTH) - `CLOG2(`VX_MEM_DATA_WIDTH);
|
||||
|
@ -64,6 +69,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
|
||||
localparam AFU_ID_H = 16'h0004; // AFU ID Higher
|
||||
|
||||
localparam CMD_IDLE = 0;
|
||||
localparam CMD_MEM_READ = `AFU_IMAGE_CMD_MEM_READ;
|
||||
localparam CMD_MEM_WRITE = `AFU_IMAGE_CMD_MEM_WRITE;
|
||||
localparam CMD_DCR_WRITE = `AFU_IMAGE_CMD_DCR_WRITE;
|
||||
|
@ -78,7 +84,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
localparam COUT_TID_WIDTH = `CLOG2(`VX_MEM_BYTEEN_WIDTH);
|
||||
localparam COUT_QUEUE_DATAW = COUT_TID_WIDTH + 8;
|
||||
localparam COUT_QUEUE_SIZE = 64;
|
||||
localparam COUT_QUEUE_SIZE = 1024;
|
||||
|
||||
localparam MMIO_DEV_CAPS = `AFU_IMAGE_MMIO_DEV_CAPS;
|
||||
localparam MMIO_ISA_CAPS = `AFU_IMAGE_MMIO_ISA_CAPS;
|
||||
|
@ -96,7 +102,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
wire [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
wire [63:0] dev_caps = {16'b0,
|
||||
wire [63:0] dev_caps = {8'b0,
|
||||
5'(`PLATFORM_MEMORY_ADDR_WIDTH-20),
|
||||
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
|
||||
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
|
||||
16'(`NUM_CORES * `NUM_CLUSTERS),
|
||||
8'(`NUM_WARPS),
|
||||
|
@ -139,14 +147,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
t_ccip_c0_ReqMmioHdr mmio_hdr;
|
||||
assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
|
||||
`UNUSED_VAR (mmio_hdr)
|
||||
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
|
||||
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr[$bits(t_ccip_c0_ReqMmioHdr)-1:0]);
|
||||
`UNUSED_VAR (mmio_req_hdr)
|
||||
|
||||
`STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!"))
|
||||
|
||||
t_if_ccip_c2_Tx mmio_tx;
|
||||
assign af2cp_sTxPort.c2 = mmio_tx;
|
||||
t_if_ccip_c2_Tx mmio_rsp;
|
||||
assign af2cp_sTxPort.c2 = mmio_rsp;
|
||||
|
||||
`ifdef SCOPE
|
||||
|
||||
|
@ -170,33 +176,35 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
if (reset) begin
|
||||
cmd_scope_reading <= 0;
|
||||
cmd_scope_writing <= 0;
|
||||
scope_bus_in <= 0;
|
||||
scope_bus_in <= 0;
|
||||
end else begin
|
||||
scope_bus_in <= 0;
|
||||
if (scope_bus_out) begin
|
||||
cmd_scope_reading <= 1;
|
||||
scope_bus_ctr <= 63;
|
||||
end
|
||||
scope_bus_in <= 0;
|
||||
if (cp2af_sRxPort.c0.mmioWrValid
|
||||
&& (MMIO_SCOPE_WRITE == mmio_hdr.address)) begin
|
||||
&& (MMIO_SCOPE_WRITE == mmio_req_hdr.address)) begin
|
||||
cmd_scope_wdata <= 64'(cp2af_sRxPort.c0.data);
|
||||
cmd_scope_writing <= 1;
|
||||
scope_bus_ctr <= 63;
|
||||
scope_bus_in <= 1;
|
||||
end
|
||||
end
|
||||
if (cmd_scope_writing) begin
|
||||
scope_bus_in <= 1'(cmd_scope_wdata >> scope_bus_ctr);
|
||||
scope_bus_ctr <= scope_bus_ctr - 1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_writing <= 0;
|
||||
if (cmd_scope_writing) begin
|
||||
scope_bus_in <= cmd_scope_wdata[scope_bus_ctr];
|
||||
scope_bus_ctr <= scope_bus_ctr - 6'd1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_writing <= 0;
|
||||
scope_bus_ctr <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
if (cmd_scope_reading) begin
|
||||
cmd_scope_rdata <= {cmd_scope_rdata[62:0], scope_bus_out};
|
||||
scope_bus_ctr <= scope_bus_ctr - 1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_reading <= 0;
|
||||
if (cmd_scope_reading) begin
|
||||
cmd_scope_rdata <= {cmd_scope_rdata[62:0], scope_bus_out};
|
||||
scope_bus_ctr <= scope_bus_ctr - 6'd1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_reading <= 0;
|
||||
scope_bus_ctr <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -206,6 +214,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
wire [COUT_QUEUE_DATAW-1:0] cout_q_dout;
|
||||
wire cout_q_full, cout_q_empty;
|
||||
|
||||
wire [COUT_QUEUE_DATAW-1:0] cout_q_dout_s = cout_q_dout & {COUT_QUEUE_DATAW{!cout_q_empty}};
|
||||
|
||||
`ifdef SIMULATION
|
||||
`ifndef VERILATOR
|
||||
// disable assertions until full reset
|
||||
|
@ -226,60 +236,22 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
`endif
|
||||
`endif
|
||||
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
// Handle MMIO read requests
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
mmio_tx.mmioRdValid <= 0;
|
||||
mmio_tx.hdr <= '0;
|
||||
mmio_rsp.mmioRdValid <= 0;
|
||||
end else begin
|
||||
mmio_tx.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid;
|
||||
mmio_tx.hdr.tid <= mmio_hdr.tid;
|
||||
end
|
||||
// serve MMIO write request
|
||||
if (cp2af_sRxPort.c0.mmioWrValid) begin
|
||||
case (mmio_hdr.address)
|
||||
MMIO_CMD_ARG0: begin
|
||||
cmd_args[0] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_ARG1: begin
|
||||
cmd_args[1] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_ARG2: begin
|
||||
cmd_args[2] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_TYPE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
`ifdef SCOPE
|
||||
MMIO_SCOPE_WRITE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata));
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
mmio_rsp.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid;
|
||||
end
|
||||
|
||||
// serve MMIO read requests
|
||||
mmio_rsp.hdr.tid <= mmio_req_hdr.tid;
|
||||
|
||||
if (cp2af_sRxPort.c0.mmioRdValid) begin
|
||||
case (mmio_hdr.address)
|
||||
case (mmio_req_hdr.address)
|
||||
// AFU header
|
||||
16'h0000: mmio_tx.data <= {
|
||||
16'h0000: mmio_rsp.data <= {
|
||||
4'b0001, // Feature type = AFU
|
||||
8'b0, // reserved
|
||||
4'b0, // afu minor revision = 0
|
||||
|
@ -289,105 +261,140 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
4'b0, // afu major revision = 0
|
||||
12'b0 // feature ID = 0
|
||||
};
|
||||
AFU_ID_L: mmio_tx.data <= afu_id[63:0]; // afu id low
|
||||
AFU_ID_H: mmio_tx.data <= afu_id[127:64]; // afu id hi
|
||||
16'h0006: mmio_tx.data <= 64'h0; // next AFU
|
||||
16'h0008: mmio_tx.data <= 64'h0; // reserved
|
||||
AFU_ID_L: mmio_rsp.data <= afu_id[63:0]; // afu id low
|
||||
AFU_ID_H: mmio_rsp.data <= afu_id[127:64]; // afu id hi
|
||||
16'h0006: mmio_rsp.data <= 64'h0; // next AFU
|
||||
16'h0008: mmio_rsp.data <= 64'h0; // reserved
|
||||
MMIO_STATUS: begin
|
||||
mmio_tx.data <= 64'({cout_q_dout, !cout_q_empty, 8'(state)});
|
||||
mmio_rsp.data <= 64'({cout_q_dout_s, !cout_q_empty, 8'(state)});
|
||||
`ifdef DBG_TRACE_AFU
|
||||
if (state != STATE_WIDTH'(mmio_tx.data)) begin
|
||||
`TRACE(2, ("%d: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_hdr.address, state));
|
||||
if (state != STATE_WIDTH'(mmio_rsp.data)) begin
|
||||
`TRACE(2, ("%t: AFU: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state))
|
||||
end
|
||||
`endif
|
||||
end
|
||||
`ifdef SCOPE
|
||||
MMIO_SCOPE_READ: begin
|
||||
mmio_tx.data <= cmd_scope_rdata;
|
||||
mmio_rsp.data <= cmd_scope_rdata;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata));
|
||||
`TRACE(2, ("%t: AFU: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata))
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
MMIO_DEV_CAPS: begin
|
||||
mmio_tx.data <= dev_caps;
|
||||
mmio_rsp.data <= dev_caps;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps));
|
||||
`TRACE(2, ("%t: AFU: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps))
|
||||
`endif
|
||||
end
|
||||
MMIO_ISA_CAPS: begin
|
||||
mmio_tx.data <= isa_caps;
|
||||
mmio_rsp.data <= isa_caps;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
if (state != STATE_WIDTH'(mmio_tx.data)) begin
|
||||
`TRACE(2, ("%d: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps));
|
||||
if (state != STATE_WIDTH'(mmio_rsp.data)) begin
|
||||
`TRACE(2, ("%t: AFU: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps))
|
||||
end
|
||||
`endif
|
||||
end
|
||||
default: begin
|
||||
mmio_tx.data <= 64'h0;
|
||||
mmio_rsp.data <= 64'h0;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_hdr.address));
|
||||
`TRACE(2, ("%t: AFU: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address))
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// Handle MMIO write requests
|
||||
always @(posedge clk) begin
|
||||
if (cp2af_sRxPort.c0.mmioWrValid) begin
|
||||
case (mmio_req_hdr.address)
|
||||
MMIO_CMD_ARG0: begin
|
||||
cmd_args[0] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)))
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_ARG1: begin
|
||||
cmd_args[1] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)))
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_ARG2: begin
|
||||
cmd_args[2] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data)))
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_TYPE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data)))
|
||||
`endif
|
||||
end
|
||||
`ifdef SCOPE
|
||||
MMIO_SCOPE_WRITE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: MMIO_SCOPE_WRITE: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)))
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_req_hdr.address, 64'(cp2af_sRxPort.c0.data)))
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// COMMAND FSM ////////////////////////////////////////////////////////////////
|
||||
|
||||
wire cmd_mem_rd_done;
|
||||
reg cmd_mem_wr_done;
|
||||
|
||||
reg [RESET_CTR_WIDTH-1:0] vx_reset_ctr;
|
||||
reg vx_busy_wait;
|
||||
reg vx_running;
|
||||
reg vx_reset = 1; // asserted at initialization
|
||||
wire vx_busy;
|
||||
|
||||
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
|
||||
always @(posedge clk) begin
|
||||
if (state == STATE_RUN) begin
|
||||
vx_reset_ctr <= vx_reset_ctr + $bits(vx_reset_ctr)'(1);
|
||||
end else begin
|
||||
vx_reset_ctr <= '0;
|
||||
end
|
||||
end
|
||||
|
||||
wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address);
|
||||
wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_req_hdr.address);
|
||||
wire [CMD_TYPE_WIDTH-1:0] cmd_type = is_mmio_wr_cmd ?
|
||||
CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(0);
|
||||
CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(CMD_IDLE);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
vx_busy_wait <= 0;
|
||||
vx_running <= 0;
|
||||
state <= STATE_IDLE;
|
||||
vx_reset <= 1;
|
||||
end else begin
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
case (cmd_type)
|
||||
CMD_MEM_READ: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size))
|
||||
`endif
|
||||
state <= STATE_MEM_READ;
|
||||
end
|
||||
CMD_MEM_WRITE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size))
|
||||
`endif
|
||||
state <= STATE_MEM_WRITE;
|
||||
end
|
||||
CMD_DCR_WRITE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data))
|
||||
`endif
|
||||
state <= STATE_DCR_WRITE;
|
||||
end
|
||||
CMD_RUN: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE RUN\n", $time));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE RUN\n", $time))
|
||||
`endif
|
||||
state <= STATE_RUN;
|
||||
vx_running <= 0;
|
||||
vx_reset_ctr <= RESET_CTR_WIDTH'(`RESET_DELAY-1);
|
||||
vx_reset <= 1;
|
||||
end
|
||||
default: begin
|
||||
state <= state;
|
||||
|
@ -398,54 +405,56 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
if (cmd_mem_rd_done) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE IDLE\n", $time));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time))
|
||||
`endif
|
||||
end
|
||||
end
|
||||
STATE_MEM_WRITE: begin
|
||||
if (cmd_mem_wr_done) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE IDLE\n", $time));
|
||||
`endif
|
||||
end
|
||||
end
|
||||
STATE_DCR_WRITE: begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE IDLE\n", $time));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time))
|
||||
`endif
|
||||
end
|
||||
STATE_RUN: begin
|
||||
if (vx_running) begin
|
||||
if (vx_busy_wait) begin
|
||||
// wait until the gpu goes busy
|
||||
if (vx_busy) begin
|
||||
vx_busy_wait <= 0;
|
||||
end
|
||||
end else begin
|
||||
// wait until the gpu is not busy
|
||||
if (~vx_busy) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: AFU: End execution\n", $time));
|
||||
`TRACE(2, ("%d: STATE IDLE\n", $time));
|
||||
`endif
|
||||
end
|
||||
end
|
||||
if (vx_reset) begin
|
||||
// wait until the reset network is ready
|
||||
if (vx_reset_ctr == RESET_CTR_WIDTH'(0)) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: Begin execution\n", $time))
|
||||
`endif
|
||||
vx_busy_wait <= 1;
|
||||
vx_reset <= 0;
|
||||
end
|
||||
end else begin
|
||||
// wait until the reset sequence is complete
|
||||
if (vx_reset_ctr == (`RESET_DELAY-1)) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: AFU: Begin execution\n", $time));
|
||||
`endif
|
||||
vx_running <= 1;
|
||||
vx_busy_wait <= 1;
|
||||
end
|
||||
if (vx_busy_wait) begin
|
||||
// wait until processor goes busy
|
||||
if (vx_busy) begin
|
||||
vx_busy_wait <= 0;
|
||||
end
|
||||
end else begin
|
||||
// wait until the processor is not busy
|
||||
if (~vx_busy) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: End execution\n", $time))
|
||||
`TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time))
|
||||
`endif
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
|
||||
// ensure reset network initialization
|
||||
if (vx_reset_ctr != RESET_CTR_WIDTH'(0)) begin
|
||||
vx_reset_ctr <= vx_reset_ctr - RESET_CTR_WIDTH'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -475,8 +484,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.TAG_WIDTH (AVS_REQ_TAGW)
|
||||
) cci_vx_mem_bus_if[2]();
|
||||
|
||||
`RESET_RELAY (cci_adapter_reset, reset);
|
||||
|
||||
VX_mem_adapter #(
|
||||
.SRC_DATA_WIDTH (CCI_DATA_WIDTH),
|
||||
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
|
@ -488,7 +495,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.RSP_OUT_BUF (0)
|
||||
) cci_mem_adapter (
|
||||
.clk (clk),
|
||||
.reset (cci_adapter_reset),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid_in (cci_mem_req_valid),
|
||||
.mem_req_addr_in (cci_mem_req_addr),
|
||||
|
@ -517,8 +524,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.mem_rsp_ready_out (cci_vx_mem_bus_if[1].rsp_ready)
|
||||
);
|
||||
|
||||
assign cci_vx_mem_bus_if[1].req_data.atype = '0;
|
||||
`UNUSED_VAR (cci_vx_mem_bus_if[1].req_data.atype)
|
||||
assign cci_vx_mem_bus_if[1].req_data.flags = '0;
|
||||
|
||||
//--
|
||||
|
||||
|
@ -528,8 +534,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
assign vx_mem_req_valid_qual = vx_mem_req_valid && ~vx_mem_is_cout;
|
||||
|
||||
`RESET_RELAY (vx_adapter_reset, reset);
|
||||
|
||||
VX_mem_adapter #(
|
||||
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
|
@ -541,7 +545,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.RSP_OUT_BUF (2)
|
||||
) vx_mem_adapter (
|
||||
.clk (clk),
|
||||
.reset (vx_adapter_reset),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid_in (vx_mem_req_valid_qual),
|
||||
.mem_req_addr_in (vx_mem_req_addr),
|
||||
|
@ -570,8 +574,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.mem_rsp_ready_out (cci_vx_mem_bus_if[0].rsp_ready)
|
||||
);
|
||||
|
||||
assign cci_vx_mem_bus_if[0].req_data.atype = '0;
|
||||
`UNUSED_VAR (cci_vx_mem_bus_if[0].req_data.atype)
|
||||
assign cci_vx_mem_bus_if[0].req_data.flags = '0;
|
||||
|
||||
//--
|
||||
VX_mem_bus_if #(
|
||||
|
@ -597,20 +600,20 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
//--
|
||||
|
||||
`RESET_RELAY (avs_adapter_reset, reset);
|
||||
|
||||
VX_avs_adapter #(
|
||||
.DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.ADDR_WIDTH_IN (LMEM_ADDR_WIDTH),
|
||||
.ADDR_WIDTH_OUT($bits(t_local_mem_addr)),
|
||||
.BURST_WIDTH (LMEM_BURST_CTRW),
|
||||
.NUM_BANKS (NUM_LOCAL_MEM_BANKS),
|
||||
.TAG_WIDTH (AVS_REQ_TAGW + 1),
|
||||
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE),
|
||||
.BANK_INTERLEAVE(`PLATFORM_MEMORY_INTERLEAVE),
|
||||
.REQ_OUT_BUF (2),
|
||||
.RSP_OUT_BUF (0)
|
||||
) avs_adapter (
|
||||
.clk (clk),
|
||||
.reset (avs_adapter_reset),
|
||||
.reset (reset),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (mem_bus_if[0].req_valid),
|
||||
|
@ -639,8 +642,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.avs_readdatavalid(avs_readdatavalid)
|
||||
);
|
||||
|
||||
assign mem_bus_if[0].req_data.atype = '0;
|
||||
`UNUSED_VAR (mem_bus_if[0].req_data.atype)
|
||||
`UNUSED_VAR (mem_bus_if[0].req_data.flags)
|
||||
|
||||
// CCI-P Read Request ///////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -748,7 +750,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
cci_rd_req_addr <= cci_rd_req_addr + 1;
|
||||
cci_rd_req_ctr <= cci_rd_req_ctr + $bits(cci_rd_req_ctr)'(1);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads));
|
||||
`TRACE(2, ("%t: AFU: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads))
|
||||
`endif
|
||||
end
|
||||
|
||||
|
@ -758,13 +760,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE);
|
||||
end
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data));
|
||||
`TRACE(2, ("%t: AFU: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data))
|
||||
`endif
|
||||
end
|
||||
|
||||
if (cci_rdq_pop) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads));
|
||||
`TRACE(2, ("%t: AFU: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads))
|
||||
`endif
|
||||
end
|
||||
|
||||
|
@ -902,13 +904,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
cci_wr_req_done <= 1;
|
||||
end
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data));
|
||||
`TRACE(2, ("%t: AFU: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data))
|
||||
`endif
|
||||
end
|
||||
|
||||
if (cci_wr_rsp_fire) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes));
|
||||
`TRACE(2, ("%t: AFU: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes))
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
@ -926,17 +928,17 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
// Vortex ///////////////////////////////////////////////////////////////////
|
||||
|
||||
wire vx_dcr_wr_valid = (STATE_DCR_WRITE == state);
|
||||
wire vx_dcr_wr_valid = (STATE_DCR_WRITE == state);
|
||||
wire [`VX_DCR_ADDR_WIDTH-1:0] vx_dcr_wr_addr = cmd_dcr_addr;
|
||||
wire [`VX_DCR_DATA_WIDTH-1:0] vx_dcr_wr_data = cmd_dcr_data;
|
||||
|
||||
`SCOPE_IO_SWITCH (2)
|
||||
`SCOPE_IO_SWITCH (2);
|
||||
|
||||
Vortex vortex (
|
||||
`SCOPE_IO_BIND (1)
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset || ~vx_running),
|
||||
.reset (vx_reset),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (vx_mem_req_valid),
|
||||
|
@ -966,7 +968,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
wire [COUT_TID_WIDTH-1:0] cout_tid;
|
||||
|
||||
VX_onehot_encoder #(
|
||||
VX_encoder #(
|
||||
.N (`VX_MEM_BYTEEN_WIDTH)
|
||||
) cout_tid_enc (
|
||||
.data_in (vx_mem_req_byteen),
|
||||
|
@ -987,7 +989,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
wire cout_q_push = vx_mem_req_valid && vx_mem_is_cout && ~cout_q_full;
|
||||
|
||||
wire cout_q_pop = cp2af_sRxPort.c0.mmioRdValid
|
||||
&& (mmio_hdr.address == MMIO_STATUS)
|
||||
&& (mmio_req_hdr.address == MMIO_STATUS)
|
||||
&& ~cout_q_empty;
|
||||
|
||||
VX_fifo_queue #(
|
||||
|
@ -1010,58 +1012,59 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
// SCOPE //////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_SCOPE_AFU
|
||||
wire mem_req_fire = mem_bus_if[0].req_valid && mem_bus_if[0].req_ready;
|
||||
wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready;
|
||||
wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0];
|
||||
wire avs_read_fire = avs_read[0] && ~avs_waitrequest[0];
|
||||
wire [$bits(t_local_mem_addr)-1:0] mem_bus_if_addr = mem_bus_if[0].req_data.addr;
|
||||
|
||||
reg [STATE_WIDTH-1:0] state_prev;
|
||||
always @(posedge clk) begin
|
||||
state_prev <= state;
|
||||
end
|
||||
wire state_changed = (state != state_prev);
|
||||
wire state_changed = (state != state_prev);
|
||||
wire vx_mem_req_fire = vx_mem_req_valid && vx_mem_req_ready;
|
||||
wire vx_mem_rsp_fire = vx_mem_rsp_valid && vx_mem_rsp_ready;
|
||||
wire avs_req_fire = (avs_write[0] || avs_read[0]) && ~avs_waitrequest[0];
|
||||
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (0),
|
||||
.TRIGGERW (24),
|
||||
.PROBEW (431)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset_w[0]),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
state_changed,
|
||||
mem_req_fire,
|
||||
mem_rsp_fire,
|
||||
avs_write_fire,
|
||||
avs_read_fire,
|
||||
`NEG_EDGE (reset_negedge, reset);
|
||||
`SCOPE_TAP (0, 0, {
|
||||
vx_reset,
|
||||
vx_busy,
|
||||
vx_mem_req_valid,
|
||||
vx_mem_req_ready,
|
||||
vx_mem_rsp_valid,
|
||||
vx_mem_rsp_ready,
|
||||
avs_read[0],
|
||||
avs_write[0],
|
||||
avs_waitrequest[0],
|
||||
avs_readdatavalid[0],
|
||||
cp2af_sRxPort.c0.mmioRdValid,
|
||||
cp2af_sRxPort.c0.mmioWrValid,
|
||||
cp2af_sRxPort.c0.rspValid,
|
||||
cp2af_sRxPort.c1.rspValid,
|
||||
af2cp_sTxPort.c0.valid,
|
||||
af2cp_sTxPort.c1.valid,
|
||||
cp2af_sRxPort.c0TxAlmFull,
|
||||
cp2af_sRxPort.c1TxAlmFull,
|
||||
af2cp_sTxPort.c2.mmioRdValid,
|
||||
cci_wr_req_fire,
|
||||
cci_wr_rsp_fire,
|
||||
cp2af_sRxPort.c1TxAlmFull
|
||||
},{
|
||||
state_changed,
|
||||
vx_dcr_wr_valid, // ack-free
|
||||
avs_readdatavalid[0], // ack-free
|
||||
cp2af_sRxPort.c0.mmioRdValid, // ack-free
|
||||
cp2af_sRxPort.c0.mmioWrValid, // ack-free
|
||||
af2cp_sTxPort.c2.mmioRdValid, // ack-free
|
||||
cp2af_sRxPort.c0.rspValid, // ack-free
|
||||
cp2af_sRxPort.c1.rspValid, // ack-free
|
||||
cci_rd_req_fire,
|
||||
cci_rd_rsp_fire,
|
||||
cci_pending_reads_full,
|
||||
cci_pending_writes_empty,
|
||||
cci_pending_writes_full
|
||||
}),
|
||||
.probes({
|
||||
cci_wr_req_fire,
|
||||
avs_req_fire,
|
||||
vx_mem_req_fire,
|
||||
vx_mem_rsp_fire
|
||||
},{
|
||||
cmd_type,
|
||||
state,
|
||||
mmio_hdr.address,
|
||||
mmio_hdr.length,
|
||||
vx_mem_req_rw,
|
||||
vx_mem_req_byteen,
|
||||
vx_mem_req_addr,
|
||||
vx_mem_req_data,
|
||||
vx_mem_req_tag,
|
||||
vx_mem_rsp_data,
|
||||
vx_mem_rsp_tag,
|
||||
vx_dcr_wr_addr,
|
||||
vx_dcr_wr_data,
|
||||
mmio_req_hdr.address,
|
||||
cp2af_sRxPort.c0.hdr.mdata,
|
||||
af2cp_sTxPort.c0.hdr.address,
|
||||
af2cp_sTxPort.c0.hdr.mdata,
|
||||
|
@ -1073,14 +1076,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
cci_mem_wr_req_ctr,
|
||||
cci_rd_req_ctr,
|
||||
cci_rd_rsp_ctr,
|
||||
cci_wr_req_ctr,
|
||||
mem_bus_if_addr
|
||||
}),
|
||||
.bus_in(scope_bus_in_w[0]),
|
||||
.bus_out(scope_bus_out_w[0])
|
||||
);
|
||||
cci_wr_req_ctr
|
||||
},
|
||||
reset_negedge, 1'b0, 4096
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED_W(0)
|
||||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -1089,13 +1090,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin
|
||||
if (avs_write[i] && ~avs_waitrequest[i]) begin
|
||||
`TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]));
|
||||
`TRACE(2, ("%t: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]))
|
||||
end
|
||||
if (avs_read[i] && ~avs_waitrequest[i]) begin
|
||||
`TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i]));
|
||||
`TRACE(2, ("%t: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i]))
|
||||
end
|
||||
if (avs_readdatavalid[i]) begin
|
||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i]));
|
||||
`TRACE(2, ("%t: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i]))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,9 +17,9 @@
|
|||
`define AFU_ACCEL_NAME "vortex_afu"
|
||||
`define AFU_ACCEL_UUID 128'h35F9452B_25C2_434C_93D5_6F8C60DB361C
|
||||
|
||||
`define AFU_IMAGE_CMD_MEM_READ 1
|
||||
`define AFU_IMAGE_CMD_MEM_READ 1
|
||||
`define AFU_IMAGE_CMD_MEM_WRITE 2
|
||||
`define AFU_IMAGE_CMD_RUN 3
|
||||
`define AFU_IMAGE_CMD_RUN 3
|
||||
`define AFU_IMAGE_CMD_DCR_WRITE 4
|
||||
`define AFU_IMAGE_CMD_MAX_VALUE 4
|
||||
|
||||
|
|
|
@ -14,22 +14,20 @@
|
|||
`include "vortex_afu.vh"
|
||||
|
||||
module VX_afu_ctrl #(
|
||||
parameter AXI_ADDR_WIDTH = 8,
|
||||
parameter AXI_DATA_WIDTH = 32,
|
||||
parameter AXI_NUM_BANKS = 1
|
||||
parameter S_AXI_ADDR_WIDTH = 8,
|
||||
parameter S_AXI_DATA_WIDTH = 32
|
||||
) (
|
||||
// axi4 lite slave signals
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk_en,
|
||||
|
||||
input wire s_axi_awvalid,
|
||||
input wire [AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
|
||||
input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
|
||||
output wire s_axi_awready,
|
||||
|
||||
input wire s_axi_wvalid,
|
||||
input wire [AXI_DATA_WIDTH-1:0] s_axi_wdata,
|
||||
input wire [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb,
|
||||
input wire [S_AXI_DATA_WIDTH-1:0] s_axi_wdata,
|
||||
input wire [S_AXI_DATA_WIDTH/8-1:0]s_axi_wstrb,
|
||||
output wire s_axi_wready,
|
||||
|
||||
output wire s_axi_bvalid,
|
||||
|
@ -37,11 +35,11 @@ module VX_afu_ctrl #(
|
|||
input wire s_axi_bready,
|
||||
|
||||
input wire s_axi_arvalid,
|
||||
input wire [AXI_ADDR_WIDTH-1:0] s_axi_araddr,
|
||||
input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_araddr,
|
||||
output wire s_axi_arready,
|
||||
|
||||
output wire s_axi_rvalid,
|
||||
output wire [AXI_DATA_WIDTH-1:0] s_axi_rdata,
|
||||
output wire [S_AXI_DATA_WIDTH-1:0] s_axi_rdata,
|
||||
output wire [1:0] s_axi_rresp,
|
||||
input wire s_axi_rready,
|
||||
|
||||
|
@ -57,8 +55,6 @@ module VX_afu_ctrl #(
|
|||
output wire scope_bus_out,
|
||||
`endif
|
||||
|
||||
output wire [63:0] mem_base [AXI_NUM_BANKS],
|
||||
|
||||
output wire dcr_wr_valid,
|
||||
output wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
|
||||
output wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data
|
||||
|
@ -110,39 +106,36 @@ module VX_afu_ctrl #(
|
|||
|
||||
ADDR_DEV_0 = 8'h10,
|
||||
ADDR_DEV_1 = 8'h14,
|
||||
//ADDR_DEV_CTRL = 8'h18,
|
||||
|
||||
ADDR_ISA_0 = 8'h1C,
|
||||
ADDR_ISA_1 = 8'h20,
|
||||
//ADDR_ISA_CTRL = 8'h24,
|
||||
ADDR_ISA_0 = 8'h18,
|
||||
ADDR_ISA_1 = 8'h1C,
|
||||
|
||||
ADDR_DCR_0 = 8'h28,
|
||||
ADDR_DCR_1 = 8'h2C,
|
||||
//ADDR_DCR_CTRL = 8'h30,
|
||||
ADDR_DCR_0 = 8'h20,
|
||||
ADDR_DCR_1 = 8'h24,
|
||||
|
||||
`ifdef SCOPE
|
||||
ADDR_SCP_0 = 8'h34,
|
||||
ADDR_SCP_1 = 8'h38,
|
||||
//ADDR_SCP_CTRL = 8'h3C,
|
||||
ADDR_SCP_0 = 8'h28,
|
||||
ADDR_SCP_1 = 8'h2C,
|
||||
`endif
|
||||
|
||||
ADDR_MEM_0 = 8'h40,
|
||||
ADDR_MEM_1 = 8'h44,
|
||||
//ADDR_MEM_CTRL = 8'h48,
|
||||
|
||||
ADDR_BITS = 8;
|
||||
|
||||
localparam
|
||||
WSTATE_IDLE = 2'd0,
|
||||
WSTATE_ADDR = 2'd0,
|
||||
WSTATE_DATA = 2'd1,
|
||||
WSTATE_RESP = 2'd2;
|
||||
WSTATE_RESP = 2'd2,
|
||||
WSTATE_WIDTH = 2;
|
||||
|
||||
localparam
|
||||
RSTATE_IDLE = 2'd0,
|
||||
RSTATE_DATA = 2'd1;
|
||||
RSTATE_ADDR = 2'd0,
|
||||
RSTATE_DATA = 2'd1,
|
||||
RSTATE_RESP = 2'd2,
|
||||
RSTATE_WIDTH = 2;
|
||||
|
||||
// device caps
|
||||
wire [63:0] dev_caps = {16'b0,
|
||||
wire [63:0] dev_caps = {8'b0,
|
||||
5'(`PLATFORM_MEMORY_ADDR_WIDTH-20),
|
||||
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
|
||||
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
|
||||
16'(`NUM_CORES * `NUM_CLUSTERS),
|
||||
8'(`NUM_WARPS),
|
||||
|
@ -153,16 +146,18 @@ module VX_afu_ctrl #(
|
|||
2'(`CLOG2(`XLEN)-4),
|
||||
30'(`MISA_STD)};
|
||||
|
||||
reg [1:0] wstate;
|
||||
reg [WSTATE_WIDTH-1:0] wstate;
|
||||
reg [ADDR_BITS-1:0] waddr;
|
||||
wire [31:0] wmask;
|
||||
wire s_axi_aw_fire;
|
||||
wire s_axi_w_fire;
|
||||
wire s_axi_b_fire;
|
||||
|
||||
reg [1:0] rstate;
|
||||
logic [RSTATE_WIDTH-1:0] rstate;
|
||||
reg [31:0] rdata;
|
||||
wire [ADDR_BITS-1:0] raddr;
|
||||
reg [ADDR_BITS-1:0] raddr;
|
||||
wire s_axi_ar_fire;
|
||||
wire s_axi_r_fire;
|
||||
|
||||
reg ap_reset_r;
|
||||
reg ap_start_r;
|
||||
|
@ -170,20 +165,23 @@ module VX_afu_ctrl #(
|
|||
reg gie_r;
|
||||
reg [1:0] ier_r;
|
||||
reg [1:0] isr_r;
|
||||
reg [63:0] mem_r [AXI_NUM_BANKS];
|
||||
reg [31:0] dcra_r;
|
||||
reg [31:0] dcrv_r;
|
||||
reg dcr_wr_valid_r;
|
||||
|
||||
logic wready_stall;
|
||||
logic rvalid_stall;
|
||||
|
||||
`ifdef SCOPE
|
||||
|
||||
reg [63:0] scope_bus_wdata;
|
||||
reg [63:0] scope_bus_rdata;
|
||||
reg [63:0] scope_bus_wdata, scope_bus_rdata;
|
||||
reg [5:0] scope_bus_ctr;
|
||||
|
||||
reg cmd_scope_reading;
|
||||
reg cmd_scope_writing;
|
||||
reg cmd_scope_writing, cmd_scope_reading;
|
||||
reg scope_bus_out_r;
|
||||
reg scope_rdata_valid;
|
||||
|
||||
reg is_scope_waddr, is_scope_raddr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -191,18 +189,33 @@ module VX_afu_ctrl #(
|
|||
cmd_scope_writing <= 0;
|
||||
scope_bus_ctr <= '0;
|
||||
scope_bus_out_r <= 0;
|
||||
end else if (clk_en) begin
|
||||
is_scope_waddr <= 0;
|
||||
is_scope_raddr <= 0;
|
||||
scope_bus_rdata <= '0;
|
||||
scope_rdata_valid <= 0;
|
||||
end else begin
|
||||
scope_bus_out_r <= 0;
|
||||
if (s_axi_aw_fire) begin
|
||||
is_scope_waddr <= (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_0)
|
||||
|| (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_1);
|
||||
end
|
||||
if (s_axi_ar_fire) begin
|
||||
is_scope_raddr <= (s_axi_araddr[ADDR_BITS-1:0] == ADDR_SCP_0)
|
||||
|| (s_axi_araddr[ADDR_BITS-1:0] == ADDR_SCP_1);
|
||||
end
|
||||
if (s_axi_w_fire && waddr == ADDR_SCP_0) begin
|
||||
scope_bus_wdata[31:0] <= (s_axi_wdata & wmask) | (scope_bus_wdata[31:0] & ~wmask);
|
||||
end
|
||||
if (s_axi_w_fire && waddr == ADDR_SCP_1) begin
|
||||
scope_bus_wdata[63:32] <= (s_axi_wdata & wmask) | (scope_bus_wdata[63:32] & ~wmask);
|
||||
cmd_scope_writing <= 1;
|
||||
scope_rdata_valid <= 0;
|
||||
scope_bus_out_r <= 1;
|
||||
scope_bus_ctr <= 63;
|
||||
end
|
||||
if (scope_bus_in) begin
|
||||
cmd_scope_reading <= 1;
|
||||
scope_bus_rdata <= '0;
|
||||
scope_bus_ctr <= 63;
|
||||
end
|
||||
if (cmd_scope_reading) begin
|
||||
|
@ -210,13 +223,16 @@ module VX_afu_ctrl #(
|
|||
scope_bus_ctr <= scope_bus_ctr - 1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_reading <= 0;
|
||||
scope_rdata_valid <= 1;
|
||||
scope_bus_ctr <= 0;
|
||||
end
|
||||
end
|
||||
if (cmd_scope_writing) begin
|
||||
scope_bus_out_r <= 1'(scope_bus_wdata >> scope_bus_ctr);
|
||||
scope_bus_out_r <= scope_bus_wdata[scope_bus_ctr];
|
||||
scope_bus_ctr <= scope_bus_ctr - 1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_writing <= 0;
|
||||
scope_bus_ctr <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -224,41 +240,50 @@ module VX_afu_ctrl #(
|
|||
|
||||
assign scope_bus_out = scope_bus_out_r;
|
||||
|
||||
assign wready_stall = is_scope_waddr && cmd_scope_writing;
|
||||
assign rvalid_stall = is_scope_raddr && ~scope_rdata_valid;
|
||||
|
||||
`else
|
||||
|
||||
assign wready_stall = 0;
|
||||
assign rvalid_stall = 0;
|
||||
|
||||
`endif
|
||||
|
||||
// AXI Write
|
||||
// AXI Write Request
|
||||
assign s_axi_awready = (wstate == WSTATE_ADDR);
|
||||
assign s_axi_wready = (wstate == WSTATE_DATA) && ~wready_stall;
|
||||
|
||||
assign s_axi_awready = (wstate == WSTATE_IDLE);
|
||||
assign s_axi_wready = (wstate == WSTATE_DATA);
|
||||
// AXI Write Response
|
||||
assign s_axi_bvalid = (wstate == WSTATE_RESP);
|
||||
assign s_axi_bresp = 2'b00; // OKAY
|
||||
|
||||
assign s_axi_aw_fire = s_axi_awvalid && s_axi_awready;
|
||||
assign s_axi_w_fire = s_axi_wvalid && s_axi_wready;
|
||||
|
||||
for (genvar i = 0; i < 4; ++i) begin
|
||||
for (genvar i = 0; i < 4; ++i) begin : g_wmask
|
||||
assign wmask[8 * i +: 8] = {8{s_axi_wstrb[i]}};
|
||||
end
|
||||
|
||||
assign s_axi_aw_fire = s_axi_awvalid && s_axi_awready;
|
||||
assign s_axi_w_fire = s_axi_wvalid && s_axi_wready;
|
||||
assign s_axi_b_fire = s_axi_bvalid && s_axi_bready;
|
||||
|
||||
// wstate
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wstate <= WSTATE_IDLE;
|
||||
end else if (clk_en) begin
|
||||
wstate <= WSTATE_ADDR;
|
||||
end else begin
|
||||
case (wstate)
|
||||
WSTATE_IDLE: wstate <= s_axi_awvalid ? WSTATE_DATA : WSTATE_IDLE;
|
||||
WSTATE_DATA: wstate <= s_axi_wvalid ? WSTATE_RESP : WSTATE_DATA;
|
||||
WSTATE_RESP: wstate <= s_axi_bready ? WSTATE_IDLE : WSTATE_RESP;
|
||||
default: wstate <= WSTATE_IDLE;
|
||||
WSTATE_ADDR: wstate <= s_axi_aw_fire ? WSTATE_DATA : WSTATE_ADDR;
|
||||
WSTATE_DATA: wstate <= s_axi_w_fire ? WSTATE_RESP : WSTATE_DATA;
|
||||
WSTATE_RESP: wstate <= s_axi_b_fire ? WSTATE_ADDR : WSTATE_RESP;
|
||||
default: wstate <= WSTATE_ADDR;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// waddr
|
||||
always @(posedge clk) begin
|
||||
if (clk_en) begin
|
||||
if (s_axi_aw_fire)
|
||||
waddr <= s_axi_awaddr[ADDR_BITS-1:0];
|
||||
if (s_axi_aw_fire) begin
|
||||
waddr <= s_axi_awaddr[ADDR_BITS-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -276,16 +301,13 @@ module VX_afu_ctrl #(
|
|||
dcra_r <= '0;
|
||||
dcrv_r <= '0;
|
||||
dcr_wr_valid_r <= 0;
|
||||
end else begin
|
||||
dcr_wr_valid_r <= 0;
|
||||
ap_reset_r <= 0;
|
||||
|
||||
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
|
||||
mem_r[i] <= '0;
|
||||
end
|
||||
end else if (clk_en) begin
|
||||
if (ap_ready)
|
||||
ap_start_r <= auto_restart_r;
|
||||
|
||||
dcr_wr_valid_r <= 0;
|
||||
|
||||
if (s_axi_w_fire) begin
|
||||
case (waddr)
|
||||
ADDR_AP_CTRL: begin
|
||||
|
@ -317,16 +339,7 @@ module VX_afu_ctrl #(
|
|||
dcrv_r <= (s_axi_wdata & wmask) | (dcrv_r & ~wmask);
|
||||
dcr_wr_valid_r <= 1;
|
||||
end
|
||||
default: begin
|
||||
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
|
||||
if (waddr == (ADDR_MEM_0 + 8'(i) * 8'd12)) begin
|
||||
mem_r[i][31:0] <= (s_axi_wdata & wmask) | (mem_r[i][31:0] & ~wmask);
|
||||
end
|
||||
if (waddr == (ADDR_MEM_1 + 8'(i) * 8'd12)) begin
|
||||
mem_r[i][63:32] <= (s_axi_wdata & wmask) | (mem_r[i][63:32] & ~wmask);
|
||||
end
|
||||
end
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
|
||||
if (ier_r[0] & ap_done)
|
||||
|
@ -337,83 +350,86 @@ module VX_afu_ctrl #(
|
|||
end
|
||||
end
|
||||
|
||||
// AXI Read
|
||||
// AXI Read Request
|
||||
assign s_axi_arready = (rstate == RSTATE_ADDR);
|
||||
|
||||
assign s_axi_arready = (rstate == RSTATE_IDLE);
|
||||
assign s_axi_rvalid = (rstate == RSTATE_DATA);
|
||||
// AXI Read Response
|
||||
assign s_axi_rvalid = (rstate == RSTATE_RESP);
|
||||
assign s_axi_rdata = rdata;
|
||||
assign s_axi_rresp = 2'b00; // OKAY
|
||||
|
||||
assign s_axi_ar_fire = s_axi_arvalid && s_axi_arready;
|
||||
assign raddr = s_axi_araddr[ADDR_BITS-1:0];
|
||||
assign s_axi_r_fire = s_axi_rvalid && s_axi_rready;
|
||||
|
||||
// rstate
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rstate <= RSTATE_IDLE;
|
||||
end else if (clk_en) begin
|
||||
rstate <= RSTATE_ADDR;
|
||||
end else begin
|
||||
case (rstate)
|
||||
RSTATE_IDLE: rstate <= s_axi_arvalid ? RSTATE_DATA : RSTATE_IDLE;
|
||||
RSTATE_DATA: rstate <= (s_axi_rready & s_axi_rvalid) ? RSTATE_IDLE : RSTATE_DATA;
|
||||
default: rstate <= RSTATE_IDLE;
|
||||
RSTATE_ADDR: rstate <= s_axi_ar_fire ? RSTATE_DATA : RSTATE_ADDR;
|
||||
RSTATE_DATA: rstate <= (~rvalid_stall) ? RSTATE_RESP : RSTATE_DATA;
|
||||
RSTATE_RESP: rstate <= s_axi_r_fire ? RSTATE_ADDR : RSTATE_RESP;
|
||||
default: rstate <= RSTATE_ADDR;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// raddr
|
||||
always @(posedge clk) begin
|
||||
if (s_axi_ar_fire) begin
|
||||
raddr <= s_axi_araddr[ADDR_BITS-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
// rdata
|
||||
always @(posedge clk) begin
|
||||
if (clk_en) begin
|
||||
if (s_axi_ar_fire) begin
|
||||
rdata <= '0;
|
||||
case (raddr)
|
||||
ADDR_AP_CTRL: begin
|
||||
rdata[0] <= ap_start_r;
|
||||
rdata[1] <= ap_done;
|
||||
rdata[2] <= ap_idle;
|
||||
rdata[3] <= ap_ready;
|
||||
rdata[7] <= auto_restart_r;
|
||||
end
|
||||
ADDR_GIE: begin
|
||||
rdata <= 32'(gie_r);
|
||||
end
|
||||
ADDR_IER: begin
|
||||
rdata <= 32'(ier_r);
|
||||
end
|
||||
ADDR_ISR: begin
|
||||
rdata <= 32'(isr_r);
|
||||
end
|
||||
ADDR_DEV_0: begin
|
||||
rdata <= dev_caps[31:0];
|
||||
end
|
||||
ADDR_DEV_1: begin
|
||||
rdata <= dev_caps[63:32];
|
||||
end
|
||||
ADDR_ISA_0: begin
|
||||
rdata <= isa_caps[31:0];
|
||||
end
|
||||
ADDR_ISA_1: begin
|
||||
rdata <= isa_caps[63:32];
|
||||
end
|
||||
`ifdef SCOPE
|
||||
ADDR_SCP_0: begin
|
||||
rdata <= scope_bus_rdata[31:0];
|
||||
end
|
||||
ADDR_SCP_1: begin
|
||||
rdata <= scope_bus_rdata[63:32];
|
||||
end
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
rdata <= '0;
|
||||
case (raddr)
|
||||
ADDR_AP_CTRL: begin
|
||||
rdata[0] <= ap_start_r;
|
||||
rdata[1] <= ap_done;
|
||||
rdata[2] <= ap_idle;
|
||||
rdata[3] <= ap_ready;
|
||||
rdata[7] <= auto_restart_r;
|
||||
end
|
||||
end
|
||||
ADDR_GIE: begin
|
||||
rdata <= 32'(gie_r);
|
||||
end
|
||||
ADDR_IER: begin
|
||||
rdata <= 32'(ier_r);
|
||||
end
|
||||
ADDR_ISR: begin
|
||||
rdata <= 32'(isr_r);
|
||||
end
|
||||
ADDR_DEV_0: begin
|
||||
rdata <= dev_caps[31:0];
|
||||
end
|
||||
ADDR_DEV_1: begin
|
||||
rdata <= dev_caps[63:32];
|
||||
end
|
||||
ADDR_ISA_0: begin
|
||||
rdata <= isa_caps[31:0];
|
||||
end
|
||||
ADDR_ISA_1: begin
|
||||
rdata <= isa_caps[63:32];
|
||||
end
|
||||
`ifdef SCOPE
|
||||
ADDR_SCP_0: begin
|
||||
rdata <= scope_bus_rdata[31:0];
|
||||
end
|
||||
ADDR_SCP_1: begin
|
||||
rdata <= scope_bus_rdata[63:32];
|
||||
end
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign ap_reset = ap_reset_r;
|
||||
assign ap_start = ap_start_r;
|
||||
assign interrupt = gie_r & (| isr_r);
|
||||
|
||||
assign mem_base = mem_r;
|
||||
|
||||
assign dcr_wr_valid = dcr_wr_valid_r;
|
||||
assign dcr_wr_addr = `VX_DCR_ADDR_WIDTH'(dcra_r);
|
||||
assign dcr_wr_data = `VX_DCR_DATA_WIDTH'(dcrv_r);
|
||||
|
|
|
@ -16,17 +16,21 @@
|
|||
module VX_afu_wrap #(
|
||||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = 512,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = 25,
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = 2
|
||||
) (
|
||||
// System signals
|
||||
input wire ap_clk,
|
||||
input wire ap_rst_n,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// AXI4 master interface
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
`REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`else
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`endif
|
||||
// AXI4-Lite slave interface
|
||||
input wire s_axi_ctrl_awvalid,
|
||||
output wire s_axi_ctrl_awready,
|
||||
|
@ -48,11 +52,18 @@ module VX_afu_wrap #(
|
|||
|
||||
output wire interrupt
|
||||
);
|
||||
localparam C_M_AXI_MEM_NUM_BANKS = `M_AXI_MEM_NUM_BANKS;
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_BANKS);
|
||||
`else
|
||||
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH;
|
||||
`endif
|
||||
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_RUN = 1;
|
||||
|
||||
localparam PENDING_SIZEW = 12; // max outstanding requests size
|
||||
localparam C_M_AXI_MEM_NUM_BANKS_SW = `CLOG2(C_M_AXI_MEM_NUM_BANKS+1);
|
||||
|
||||
wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
@ -80,19 +91,18 @@ module VX_afu_wrap #(
|
|||
wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
// convert memory interface to array
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||
|
||||
wire reset = ~ap_rst_n;
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
`REPEAT (1, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||
`else
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||
`endif
|
||||
|
||||
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
|
||||
reg [15:0] vx_pending_writes;
|
||||
reg [PENDING_SIZEW-1:0] vx_pending_writes;
|
||||
reg vx_busy_wait;
|
||||
reg vx_running;
|
||||
|
||||
reg vx_reset = 1; // asserted at initialization
|
||||
wire vx_busy;
|
||||
|
||||
wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
wire dcr_wr_valid;
|
||||
wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr;
|
||||
wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data;
|
||||
|
@ -101,8 +111,8 @@ module VX_afu_wrap #(
|
|||
|
||||
wire ap_reset;
|
||||
wire ap_start;
|
||||
wire ap_idle = ~vx_running;
|
||||
wire ap_done = ~(state == STATE_RUN || vx_pending_writes != 0);
|
||||
wire ap_idle = vx_reset;
|
||||
wire ap_done = (state == STATE_IDLE) && (vx_pending_writes == '0);
|
||||
wire ap_ready = 1'b1;
|
||||
|
||||
`ifdef SCOPE
|
||||
|
@ -111,24 +121,33 @@ module VX_afu_wrap #(
|
|||
wire scope_reset = reset;
|
||||
`endif
|
||||
|
||||
always @(posedge ap_clk) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset || ap_reset) begin
|
||||
state <= STATE_IDLE;
|
||||
vx_busy_wait <= 0;
|
||||
vx_running <= 0;
|
||||
state <= STATE_IDLE;
|
||||
vx_reset <= 1;
|
||||
end else begin
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (ap_start) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE RUN\n", $time));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE RUN\n", $time))
|
||||
`endif
|
||||
state <= STATE_RUN;
|
||||
vx_running <= 0;
|
||||
vx_reset_ctr <= (`RESET_DELAY-1);
|
||||
vx_reset <= 1;
|
||||
end
|
||||
end
|
||||
STATE_RUN: begin
|
||||
if (vx_running) begin
|
||||
if (vx_reset) begin
|
||||
// wait until the reset network is ready
|
||||
if (vx_reset_ctr == 0) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: Begin execution\n", $time))
|
||||
`endif
|
||||
vx_busy_wait <= 1;
|
||||
vx_reset <= 0;
|
||||
end
|
||||
end else begin
|
||||
if (vx_busy_wait) begin
|
||||
// wait until processor goes busy
|
||||
if (vx_busy) begin
|
||||
|
@ -137,67 +156,63 @@ module VX_afu_wrap #(
|
|||
end else begin
|
||||
// wait until the processor is not busy
|
||||
if (~vx_busy) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: AFU: End execution\n", $time));
|
||||
`TRACE(2, ("%d: STATE IDLE\n", $time));
|
||||
`TRACE(2, ("%t: AFU: End execution\n", $time))
|
||||
`TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time))
|
||||
`endif
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
// wait until the reset sequence is complete
|
||||
if (vx_reset_ctr == (`RESET_DELAY-1)) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: AFU: Begin execution\n", $time));
|
||||
`endif
|
||||
vx_running <= 1;
|
||||
vx_busy_wait <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
endcase
|
||||
|
||||
// ensure reset network initialization
|
||||
if (vx_reset_ctr != '0) begin
|
||||
vx_reset_ctr <= vx_reset_ctr - 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg m_axi_mem_wfire;
|
||||
reg m_axi_mem_bfire;
|
||||
wire [C_M_AXI_MEM_NUM_BANKS-1:0] m_axi_wr_req_fire, m_axi_wr_rsp_fire;
|
||||
wire [C_M_AXI_MEM_NUM_BANKS_SW-1:0] cur_wr_reqs, cur_wr_rsps;
|
||||
|
||||
always @(*) begin
|
||||
m_axi_mem_wfire = 0;
|
||||
m_axi_mem_bfire = 0;
|
||||
for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
|
||||
m_axi_mem_wfire |= m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i];
|
||||
m_axi_mem_bfire |= m_axi_mem_bvalid_a[i] && m_axi_mem_bready_a[i];
|
||||
end
|
||||
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_awfire
|
||||
VX_axi_write_ack axi_write_ack (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.awvalid(m_axi_mem_awvalid_a[i]),
|
||||
.awready(m_axi_mem_awready_a[i]),
|
||||
.wvalid (m_axi_mem_wvalid_a[i]),
|
||||
.wready (m_axi_mem_wready_a[i]),
|
||||
.tx_ack (m_axi_wr_req_fire[i]),
|
||||
`UNUSED_PIN (aw_ack),
|
||||
`UNUSED_PIN (w_ack),
|
||||
`UNUSED_PIN (tx_rdy)
|
||||
);
|
||||
assign m_axi_wr_rsp_fire[i] = m_axi_mem_bvalid_a[i] & m_axi_mem_bready_a[i];
|
||||
end
|
||||
|
||||
always @(posedge ap_clk) begin
|
||||
if (reset || ap_reset) begin
|
||||
`POP_COUNT(cur_wr_reqs, m_axi_wr_req_fire);
|
||||
`POP_COUNT(cur_wr_rsps, m_axi_wr_rsp_fire);
|
||||
|
||||
wire signed [C_M_AXI_MEM_NUM_BANKS_SW:0] reqs_sub = (C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_reqs) -
|
||||
(C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_rsps);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
vx_pending_writes <= '0;
|
||||
end else begin
|
||||
if (m_axi_mem_wfire && ~m_axi_mem_bfire)
|
||||
vx_pending_writes <= vx_pending_writes + 1;
|
||||
if (~m_axi_mem_wfire && m_axi_mem_bfire)
|
||||
vx_pending_writes <= vx_pending_writes - 1;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge ap_clk) begin
|
||||
if (state == STATE_RUN) begin
|
||||
vx_reset_ctr <= vx_reset_ctr + 1;
|
||||
end else begin
|
||||
vx_reset_ctr <= '0;
|
||||
vx_pending_writes <= vx_pending_writes + PENDING_SIZEW'(reqs_sub);
|
||||
end
|
||||
end
|
||||
|
||||
VX_afu_ctrl #(
|
||||
.AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
|
||||
.AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
|
||||
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
.S_AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
|
||||
.S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH)
|
||||
) afu_ctrl (
|
||||
.clk (ap_clk),
|
||||
.reset (reset || ap_reset),
|
||||
.clk_en (1'b1),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.s_axi_awvalid (s_axi_ctrl_awvalid),
|
||||
.s_axi_awready (s_axi_ctrl_awready),
|
||||
|
@ -229,37 +244,36 @@ module VX_afu_wrap #(
|
|||
.scope_bus_out (scope_bus_in),
|
||||
`endif
|
||||
|
||||
.mem_base (mem_base),
|
||||
|
||||
.dcr_wr_valid (dcr_wr_valid),
|
||||
.dcr_wr_addr (dcr_wr_addr),
|
||||
.dcr_wr_data (dcr_wr_data)
|
||||
);
|
||||
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_w [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_w [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
|
||||
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
|
||||
assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
|
||||
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing
|
||||
localparam [C_M_AXI_MEM_ADDR_WIDTH-1:0] BANK_OFFSET = C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET) + C_M_AXI_MEM_ADDR_WIDTH'(i) << M_AXI_MEM_ADDR_WIDTH;
|
||||
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + BANK_OFFSET;
|
||||
assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + BANK_OFFSET;
|
||||
end
|
||||
|
||||
`SCOPE_IO_SWITCH (2)
|
||||
`SCOPE_IO_SWITCH (2);
|
||||
|
||||
Vortex_axi #(
|
||||
.AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
|
||||
.AXI_ADDR_WIDTH (`MEM_ADDR_WIDTH),
|
||||
.AXI_ADDR_WIDTH (M_AXI_MEM_ADDR_WIDTH),
|
||||
.AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
|
||||
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
) vortex_axi (
|
||||
`SCOPE_IO_BIND (1)
|
||||
|
||||
.clk (ap_clk),
|
||||
.reset (reset || ap_reset || ~vx_running),
|
||||
.clk (clk),
|
||||
.reset (vx_reset),
|
||||
|
||||
.m_axi_awvalid (m_axi_mem_awvalid_a),
|
||||
.m_axi_awready (m_axi_mem_awready_a),
|
||||
.m_axi_awaddr (m_axi_mem_awaddr_w),
|
||||
.m_axi_awaddr (m_axi_mem_awaddr_u),
|
||||
.m_axi_awid (m_axi_mem_awid_a),
|
||||
.m_axi_awlen (m_axi_mem_awlen_a),
|
||||
`UNUSED_PIN (m_axi_awsize),
|
||||
|
@ -283,7 +297,7 @@ module VX_afu_wrap #(
|
|||
|
||||
.m_axi_arvalid (m_axi_mem_arvalid_a),
|
||||
.m_axi_arready (m_axi_mem_arready_a),
|
||||
.m_axi_araddr (m_axi_mem_araddr_w),
|
||||
.m_axi_araddr (m_axi_mem_araddr_u),
|
||||
.m_axi_arid (m_axi_mem_arid_a),
|
||||
.m_axi_arlen (m_axi_mem_arlen_a),
|
||||
`UNUSED_PIN (m_axi_arsize),
|
||||
|
@ -310,38 +324,75 @@ module VX_afu_wrap #(
|
|||
|
||||
// SCOPE //////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef SCOPE
|
||||
`ifdef DBG_SCOPE_AFU
|
||||
`define TRIGGERS { \
|
||||
reset, \
|
||||
ap_start, \
|
||||
ap_done, \
|
||||
ap_idle, \
|
||||
interrupt, \
|
||||
vx_busy_wait, \
|
||||
vx_busy, \
|
||||
vx_running \
|
||||
}
|
||||
wire m_axi_mem_awfire_0 = m_axi_mem_awvalid_a[0] & m_axi_mem_awready_a[0];
|
||||
wire m_axi_mem_arfire_0 = m_axi_mem_arvalid_a[0] & m_axi_mem_arready_a[0];
|
||||
wire m_axi_mem_wfire_0 = m_axi_mem_wvalid_a[0] & m_axi_mem_wready_a[0];
|
||||
wire m_axi_mem_bfire_0 = m_axi_mem_bvalid_a[0] & m_axi_mem_bready_a[0];
|
||||
|
||||
`define PROBES { \
|
||||
vx_pending_writes \
|
||||
}
|
||||
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (0),
|
||||
.TRIGGERW ($bits(`TRIGGERS)),
|
||||
.PROBEW ($bits(`PROBES))
|
||||
) scope_tap (
|
||||
.clk (clk),
|
||||
.reset (scope_reset_w[0]),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers (`TRIGGERS),
|
||||
.probes (`PROBES),
|
||||
.bus_in (scope_bus_in_w[0]),
|
||||
.bus_out (scope_bus_out_w[0])
|
||||
);
|
||||
`NEG_EDGE (reset_negedge, reset);
|
||||
`SCOPE_TAP (0, 0, {
|
||||
ap_reset,
|
||||
ap_start,
|
||||
ap_done,
|
||||
ap_idle,
|
||||
interrupt,
|
||||
vx_reset,
|
||||
vx_busy,
|
||||
m_axi_mem_awvalid_a[0],
|
||||
m_axi_mem_awready_a[0],
|
||||
m_axi_mem_wvalid_a[0],
|
||||
m_axi_mem_wready_a[0],
|
||||
m_axi_mem_bvalid_a[0],
|
||||
m_axi_mem_bready_a[0],
|
||||
m_axi_mem_arvalid_a[0],
|
||||
m_axi_mem_arready_a[0],
|
||||
m_axi_mem_rvalid_a[0],
|
||||
m_axi_mem_rready_a[0]
|
||||
}, {
|
||||
dcr_wr_valid,
|
||||
m_axi_mem_awfire_0,
|
||||
m_axi_mem_arfire_0,
|
||||
m_axi_mem_wfire_0,
|
||||
m_axi_mem_bfire_0
|
||||
},{
|
||||
dcr_wr_addr,
|
||||
dcr_wr_data,
|
||||
vx_pending_writes,
|
||||
m_axi_mem_awaddr_u[0],
|
||||
m_axi_mem_awid_a[0],
|
||||
m_axi_mem_bid_a[0],
|
||||
m_axi_mem_araddr_u[0],
|
||||
m_axi_mem_arid_a[0],
|
||||
m_axi_mem_rid_a[0]
|
||||
},
|
||||
reset_negedge, 1'b0, 4096
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED_W(0)
|
||||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_afu ila_afu_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({
|
||||
ap_reset,
|
||||
ap_start,
|
||||
ap_done,
|
||||
ap_idle,
|
||||
interrupt
|
||||
}),
|
||||
.probe1 ({
|
||||
vx_pending_writes,
|
||||
vx_busy_wait,
|
||||
vx_busy,
|
||||
vx_reset,
|
||||
dcr_wr_valid,
|
||||
dcr_wr_addr,
|
||||
dcr_wr_data
|
||||
})
|
||||
);
|
||||
`endif
|
||||
|
||||
`ifdef SIMULATION
|
||||
|
@ -352,7 +403,7 @@ module VX_afu_wrap #(
|
|||
initial begin
|
||||
$assertoff(0, vortex_axi);
|
||||
end
|
||||
always @(posedge ap_clk) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
assert_delay_ctr <= '0;
|
||||
assert_enabled <= 0;
|
||||
|
@ -371,19 +422,19 @@ module VX_afu_wrap #(
|
|||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_AFU
|
||||
always @(posedge ap_clk) begin
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
|
||||
if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]));
|
||||
`TRACE(2, ("%t: AXI Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]))
|
||||
end
|
||||
if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i]));
|
||||
`TRACE(2, ("%t: AXI Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i]))
|
||||
end
|
||||
if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i]));
|
||||
`TRACE(2, ("%t: AXI Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i]))
|
||||
end
|
||||
if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin
|
||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]));
|
||||
`TRACE(2, ("%t: AXI Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,16 +16,25 @@
|
|||
module vortex_afu #(
|
||||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = 1
|
||||
`else
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
|
||||
`endif
|
||||
) (
|
||||
// System signals
|
||||
input wire ap_clk,
|
||||
input wire ap_rst_n,
|
||||
|
||||
|
||||
// AXI4 master interface
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
`REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`else
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`endif
|
||||
|
||||
// AXI4-Lite slave interface
|
||||
input wire s_axi_ctrl_awvalid,
|
||||
|
@ -45,8 +54,8 @@ module vortex_afu #(
|
|||
output wire s_axi_ctrl_bvalid,
|
||||
input wire s_axi_ctrl_bready,
|
||||
output wire [1:0] s_axi_ctrl_bresp,
|
||||
|
||||
output wire interrupt
|
||||
|
||||
output wire interrupt
|
||||
);
|
||||
|
||||
VX_afu_wrap #(
|
||||
|
@ -54,16 +63,19 @@ module vortex_afu #(
|
|||
.C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
|
||||
.C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
|
||||
.C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
|
||||
.C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH)
|
||||
.C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
|
||||
.C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
) afu_wrap (
|
||||
.ap_clk (ap_clk),
|
||||
.ap_rst_n (ap_rst_n),
|
||||
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
|
||||
.clk (ap_clk),
|
||||
.reset (~ap_rst_n),
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
`REPEAT (1, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
`else
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
`endif
|
||||
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
|
||||
.s_axi_ctrl_awready (s_axi_ctrl_awready),
|
||||
.s_axi_ctrl_awaddr (s_axi_ctrl_awaddr),
|
||||
.s_axi_ctrl_awaddr (s_axi_ctrl_awaddr),
|
||||
.s_axi_ctrl_wvalid (s_axi_ctrl_wvalid),
|
||||
.s_axi_ctrl_wready (s_axi_ctrl_wready),
|
||||
.s_axi_ctrl_wdata (s_axi_ctrl_wdata),
|
||||
|
@ -81,5 +93,5 @@ module vortex_afu #(
|
|||
|
||||
.interrupt (interrupt)
|
||||
);
|
||||
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,12 +14,24 @@
|
|||
`ifndef VORTEX_AFU_VH
|
||||
`define VORTEX_AFU_VH
|
||||
|
||||
`ifndef M_AXI_MEM_NUM_BANKS
|
||||
`define M_AXI_MEM_NUM_BANKS 1
|
||||
`ifndef PLATFORM_MEMORY_BANKS
|
||||
`define PLATFORM_MEMORY_BANKS 2
|
||||
`endif
|
||||
|
||||
`ifndef M_AXI_MEM_ID_WIDTH
|
||||
`define M_AXI_MEM_ID_WIDTH 32
|
||||
`ifndef PLATFORM_MEMORY_ADDR_WIDTH
|
||||
`define PLATFORM_MEMORY_ADDR_WIDTH 31
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_DATA_WIDTH
|
||||
`define PLATFORM_MEMORY_DATA_WIDTH 512
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_OFFSET
|
||||
`define PLATFORM_MEMORY_OFFSET 0
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_ID_WIDTH
|
||||
`define PLATFORM_MEMORY_ID_WIDTH 32
|
||||
`endif
|
||||
|
||||
`define GEN_AXI_MEM(i) \
|
||||
|
|
18
hw/rtl/cache/VX_bank_flush.sv
vendored
18
hw/rtl/cache/VX_bank_flush.sv
vendored
|
@ -113,14 +113,16 @@ module VX_bank_flush #(
|
|||
assign flush_valid = (state_r == STATE_FLUSH);
|
||||
assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin
|
||||
reg [NUM_WAYS-1:0] flush_way_r;
|
||||
always @(*) begin
|
||||
flush_way_r = '0;
|
||||
flush_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
|
||||
end
|
||||
assign flush_way = flush_way_r;
|
||||
end else begin
|
||||
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way
|
||||
VX_decoder #(
|
||||
.N (`CS_WAY_SEL_BITS),
|
||||
.D (NUM_WAYS)
|
||||
) ctr_decoder (
|
||||
.data_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]),
|
||||
.valid_in (1'b1),
|
||||
.data_out (flush_way)
|
||||
);
|
||||
end else begin : g_flush_way_all
|
||||
assign flush_way = {NUM_WAYS{1'b1}};
|
||||
end
|
||||
|
||||
|
|
248
hw/rtl/cache/VX_cache.sv
vendored
248
hw/rtl/cache/VX_cache.sv
vendored
|
@ -83,7 +83,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
|
||||
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
localparam MEM_TAG_WIDTH = `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH);
|
||||
localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE;
|
||||
localparam WORD_WIDTH = WORD_SIZE * 8;
|
||||
localparam WORD_SEL_BITS = `CLOG2(WORDS_PER_LINE);
|
||||
|
@ -92,9 +92,10 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1;
|
||||
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
|
||||
localparam BANK_MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH;
|
||||
|
||||
localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
|
||||
localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1);
|
||||
localparam CORE_RSP_REG_DISABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
|
||||
localparam MEM_REQ_REG_DISABLE = (NUM_BANKS != 1);
|
||||
|
||||
localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0;
|
||||
|
||||
|
@ -110,6 +111,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
) core_bus2_if[NUM_REQS]();
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_begin;
|
||||
wire [`UP(UUID_WIDTH)-1:0] flush_uuid;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_end;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_fire;
|
||||
|
@ -117,6 +119,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
VX_cache_flush #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.UUID_WIDTH(UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
|
@ -125,6 +129,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.core_bus_out_if (core_bus2_if),
|
||||
.bank_req_fire (per_bank_core_req_fire),
|
||||
.flush_begin (per_bank_flush_begin),
|
||||
.flush_uuid (flush_uuid),
|
||||
.flush_end (per_bank_flush_end)
|
||||
);
|
||||
|
||||
|
@ -136,17 +141,14 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_s;
|
||||
|
||||
`RESET_RELAY_EX (core_rsp_reset, reset, NUM_REQS, `MAX_FANOUT);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_buf
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
|
||||
.SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
|
||||
.SIZE (CORE_RSP_REG_DISABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
) core_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (core_rsp_reset[i]),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_valid_s[i]),
|
||||
.ready_in (core_rsp_ready_s[i]),
|
||||
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
|
||||
|
@ -158,38 +160,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Memory request buffering
|
||||
wire mem_req_valid_s;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
|
||||
wire mem_req_rw_s;
|
||||
wire [LINE_SIZE-1:0] mem_req_byteen_s;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
|
||||
wire mem_req_flush_s;
|
||||
wire mem_req_ready_s;
|
||||
|
||||
wire mem_bus_if_flush;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_valid_s),
|
||||
.ready_in (mem_req_ready_s),
|
||||
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}),
|
||||
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag, mem_bus_if_flush}),
|
||||
.valid_out (mem_bus_if.req_valid),
|
||||
.ready_out (mem_bus_if.req_ready)
|
||||
);
|
||||
|
||||
assign mem_bus_if.req_data.atype = mem_bus_if_flush ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (MEM_TAG_WIDTH)
|
||||
) mem_bus_tmp_if();
|
||||
|
||||
// Memory response buffering
|
||||
|
||||
wire mem_rsp_valid_s;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||
|
@ -202,14 +179,61 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
) mem_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_bus_if.rsp_valid),
|
||||
.ready_in (mem_bus_if.rsp_ready),
|
||||
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
|
||||
.valid_in (mem_bus_tmp_if.rsp_valid),
|
||||
.ready_in (mem_bus_tmp_if.rsp_ready),
|
||||
.data_in ({mem_bus_tmp_if.rsp_data.tag, mem_bus_tmp_if.rsp_data.data}),
|
||||
.data_out ({mem_rsp_tag_s, mem_rsp_data_s}),
|
||||
.valid_out (mem_rsp_valid_s),
|
||||
.ready_out (mem_rsp_ready_s)
|
||||
);
|
||||
|
||||
wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_rsp_tag;
|
||||
wire [`UP(`CS_BANK_SEL_BITS)-1:0] mem_rsp_bank_id;
|
||||
|
||||
if (NUM_BANKS > 1) begin : g_mem_rsp_tag_s_with_banks
|
||||
assign bank_mem_rsp_tag = mem_rsp_tag_s[MEM_TAG_WIDTH-1:`CS_BANK_SEL_BITS];
|
||||
assign mem_rsp_bank_id = mem_rsp_tag_s[`CS_BANK_SEL_BITS-1:0];
|
||||
end else begin : g_mem_rsp_tag_s_no_bank
|
||||
assign bank_mem_rsp_tag = mem_rsp_tag_s;
|
||||
assign mem_rsp_bank_id = 0;
|
||||
end
|
||||
|
||||
// Memory request buffering
|
||||
|
||||
wire mem_req_valid;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr;
|
||||
wire mem_req_rw;
|
||||
wire [LINE_SIZE-1:0] mem_req_byteen;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_flush;
|
||||
wire mem_req_ready;
|
||||
|
||||
wire mem_req_flush_b;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.SIZE (MEM_REQ_REG_DISABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_valid),
|
||||
.ready_in (mem_req_ready),
|
||||
.data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}),
|
||||
.data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}),
|
||||
.valid_out (mem_bus_tmp_if.req_valid),
|
||||
.ready_out (mem_bus_tmp_if.req_ready)
|
||||
);
|
||||
|
||||
assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0;
|
||||
|
||||
if (WRITE_ENABLE) begin : g_mem_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if);
|
||||
end else begin : g_mem_bus_if_ro
|
||||
`ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if);
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
|
||||
|
@ -234,7 +258,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
|
||||
wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
|
||||
wire [NUM_BANKS-1:0][BANK_MEM_TAG_WIDTH-1:0] per_bank_mem_req_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_flush;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
|
||||
|
||||
|
@ -242,11 +266,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign mem_rsp_ready_s = per_bank_mem_rsp_ready;
|
||||
end else begin
|
||||
assign mem_rsp_ready_s = per_bank_mem_rsp_ready[`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s)];
|
||||
end
|
||||
assign mem_rsp_ready_s = per_bank_mem_rsp_ready[mem_rsp_bank_id];
|
||||
|
||||
// Bank requests dispatch
|
||||
|
||||
|
@ -266,35 +286,38 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
|
||||
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req
|
||||
assign core_req_valid[i] = core_bus2_if[i].req_valid;
|
||||
assign core_req_rw[i] = core_bus2_if[i].req_data.rw;
|
||||
assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen;
|
||||
assign core_req_addr[i] = core_bus2_if[i].req_data.addr;
|
||||
assign core_req_data[i] = core_bus2_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus2_if[i].req_data.tag;
|
||||
assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
|
||||
assign core_req_flush[i] = core_bus2_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH];
|
||||
assign core_bus2_if[i].req_ready = core_req_ready[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_wsel
|
||||
if (WORDS_PER_LINE > 1) begin : g_wsel
|
||||
assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS];
|
||||
end else begin
|
||||
end else begin : g_no_wsel
|
||||
assign core_req_wsel[i] = '0;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_line_addr
|
||||
assign core_req_line_addr[i] = core_req_addr[i][(BANK_SEL_BITS + WORD_SEL_BITS) +: LINE_ADDR_WIDTH];
|
||||
end
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_bid
|
||||
if (NUM_BANKS > 1) begin : g_multibanks
|
||||
assign core_req_bid[i] = core_req_addr[i][WORD_SEL_BITS +: BANK_SEL_BITS];
|
||||
end else begin : g_singlebank
|
||||
assign core_req_bid[i] = '0;
|
||||
end
|
||||
end else begin
|
||||
assign core_req_bid = '0;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_data_in
|
||||
assign core_req_data_in[i] = {
|
||||
core_req_line_addr[i],
|
||||
core_req_rw[i],
|
||||
|
@ -310,18 +333,16 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [`PERF_CTR_BITS-1:0] perf_collisions;
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (req_xbar_reset, reset);
|
||||
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_REQS),
|
||||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (CORE_REQ_DATAW),
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||
.ARBITER ("F"),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (REQ_XBAR_BUF)
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
.reset (req_xbar_reset),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.collisions(perf_collisions),
|
||||
`else
|
||||
|
@ -337,7 +358,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.ready_out (per_bank_core_req_ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_core_req_data_out
|
||||
assign {
|
||||
per_bank_core_req_addr[i],
|
||||
per_bank_core_req_rw[i],
|
||||
|
@ -350,17 +371,10 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
// Banks access
|
||||
for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks
|
||||
for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : g_banks
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
|
||||
wire curr_bank_mem_rsp_valid;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s;
|
||||
end else begin
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == bank_id);
|
||||
end
|
||||
|
||||
`RESET_RELAY (bank_reset, reset);
|
||||
wire curr_bank_mem_rsp_valid = mem_rsp_valid_s && (mem_rsp_bank_id == bank_id);
|
||||
|
||||
VX_cache_bank #(
|
||||
.BANK_ID (bank_id),
|
||||
|
@ -379,11 +393,11 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF)
|
||||
.CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)),
|
||||
.MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) bank (
|
||||
.clk (clk),
|
||||
.reset (bank_reset),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_read_misses (perf_read_miss_per_bank[bank_id]),
|
||||
|
@ -416,23 +430,25 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.mem_req_rw (per_bank_mem_req_rw[bank_id]),
|
||||
.mem_req_byteen (per_bank_mem_req_byteen[bank_id]),
|
||||
.mem_req_data (per_bank_mem_req_data[bank_id]),
|
||||
.mem_req_id (per_bank_mem_req_id[bank_id]),
|
||||
.mem_req_tag (per_bank_mem_req_tag[bank_id]),
|
||||
.mem_req_flush (per_bank_mem_req_flush[bank_id]),
|
||||
.mem_req_ready (per_bank_mem_req_ready[bank_id]),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (curr_bank_mem_rsp_valid),
|
||||
.mem_rsp_data (mem_rsp_data_s),
|
||||
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
|
||||
.mem_rsp_tag (bank_mem_rsp_tag),
|
||||
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
|
||||
|
||||
// Flush request
|
||||
.flush_begin (per_bank_flush_begin[bank_id]),
|
||||
.flush_uuid (flush_uuid),
|
||||
.flush_end (per_bank_flush_end[bank_id])
|
||||
);
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
if (NUM_BANKS == 1) begin : g_per_bank_mem_req_addr_multibanks
|
||||
assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr;
|
||||
end else begin
|
||||
end else begin : g_per_bank_mem_req_addr_singlebank
|
||||
assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id);
|
||||
end
|
||||
end
|
||||
|
@ -442,20 +458,18 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_in;
|
||||
wire [NUM_REQS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_core_rsp_data_in
|
||||
assign core_rsp_data_in[i] = {per_bank_core_rsp_data[i], per_bank_core_rsp_tag[i]};
|
||||
end
|
||||
|
||||
`RESET_RELAY (rsp_xbar_reset, reset);
|
||||
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.NUM_OUTPUTS (NUM_REQS),
|
||||
.DATAW (CORE_RSP_DATAW),
|
||||
.ARBITER ("F")
|
||||
.ARBITER ("R")
|
||||
) rsp_xbar (
|
||||
.clk (clk),
|
||||
.reset (rsp_xbar_reset),
|
||||
.reset (reset),
|
||||
`UNUSED_PIN (collisions),
|
||||
.valid_in (per_bank_core_rsp_valid),
|
||||
.data_in (core_rsp_data_in),
|
||||
|
@ -467,80 +481,48 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_data_s
|
||||
assign {core_rsp_data_s[i], core_rsp_tag_s[i]} = core_rsp_data_out[i];
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire mem_req_valid_p;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p;
|
||||
wire mem_req_rw_p;
|
||||
wire [LINE_SIZE-1:0] mem_req_byteen_p;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data_p;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_p;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id_p;
|
||||
wire mem_req_flush_p;
|
||||
wire mem_req_ready_p;
|
||||
|
||||
// Memory request arbitration
|
||||
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in;
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_in
|
||||
assign data_in[i] = {
|
||||
per_bank_mem_req_addr[i],
|
||||
per_bank_mem_req_rw[i],
|
||||
per_bank_mem_req_byteen[i],
|
||||
per_bank_mem_req_data[i],
|
||||
per_bank_mem_req_id[i],
|
||||
per_bank_mem_req_tag[i],
|
||||
per_bank_mem_req_flush[i]
|
||||
};
|
||||
end
|
||||
|
||||
wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_req_tag;
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1),
|
||||
.ARBITER ("F")
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1),
|
||||
.ARBITER ("R")
|
||||
) mem_req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_bank_mem_req_valid),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.data_in (data_in),
|
||||
.data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}),
|
||||
.valid_out (mem_req_valid_p),
|
||||
.ready_out (mem_req_ready_p),
|
||||
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flush}),
|
||||
.valid_out (mem_req_valid),
|
||||
.ready_out (mem_req_ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p);
|
||||
assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p});
|
||||
end else begin
|
||||
assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p);
|
||||
end
|
||||
|
||||
// Memory request multi-port handling
|
||||
|
||||
assign mem_req_valid_s = mem_req_valid_p;
|
||||
assign mem_req_addr_s = mem_req_addr_p;
|
||||
assign mem_req_tag_s = mem_req_tag_p;
|
||||
assign mem_req_flush_s = mem_req_flush_p;
|
||||
assign mem_req_ready_p = mem_req_ready_s;
|
||||
|
||||
if (WRITE_ENABLE != 0) begin
|
||||
assign mem_req_rw_s = mem_req_rw_p;
|
||||
assign mem_req_byteen_s = mem_req_byteen_p;
|
||||
assign mem_req_data_s = mem_req_data_p;
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_byteen_p)
|
||||
`UNUSED_VAR (mem_req_data_p)
|
||||
`UNUSED_VAR (mem_req_rw_p)
|
||||
|
||||
assign mem_req_rw_s = 0;
|
||||
assign mem_req_byteen_s = {LINE_SIZE{1'b1}};
|
||||
assign mem_req_data_s = '0;
|
||||
if (NUM_BANKS > 1) begin : g_mem_req_tag_multibanks
|
||||
wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr);
|
||||
assign mem_req_tag = MEM_TAG_WIDTH'({bank_mem_req_tag, mem_req_bank_id});
|
||||
end else begin : g_mem_req_tag
|
||||
assign mem_req_tag = MEM_TAG_WIDTH'(bank_mem_req_tag);
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -567,7 +549,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
|
||||
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_req;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_perf_crsp_stall_per_req
|
||||
assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
|
|
170
hw/rtl/cache/VX_cache_bank.sv
vendored
170
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -53,13 +53,14 @@ module VX_cache_bank #(
|
|||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// Core response output buffer
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
// Core response output register
|
||||
parameter CORE_OUT_REG = 0,
|
||||
|
||||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0,
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_REG = 0,
|
||||
|
||||
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE),
|
||||
parameter MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH,
|
||||
parameter REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS),
|
||||
parameter WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS)
|
||||
) (
|
||||
|
@ -97,18 +98,19 @@ module VX_cache_bank #(
|
|||
output wire mem_req_rw,
|
||||
output wire [LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, // index of the head entry in the mshr
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire mem_req_flush,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// flush
|
||||
input wire flush_begin,
|
||||
input wire [`UP(UUID_WIDTH)-1:0] flush_uuid,
|
||||
output wire flush_end
|
||||
);
|
||||
|
||||
|
@ -241,32 +243,54 @@ module VX_cache_bank #(
|
|||
wire flush_fire = flush_valid && flush_ready;
|
||||
wire core_req_fire = core_req_valid && core_req_ready;
|
||||
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id = mem_rsp_tag[MSHR_ADDR_WIDTH-1:0];
|
||||
|
||||
wire [TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||
if (TAG_WIDTH > MEM_TAG_WIDTH) begin : g_mem_rsp_tag_s_pad
|
||||
assign mem_rsp_tag_s = {mem_rsp_tag, (TAG_WIDTH-MEM_TAG_WIDTH)'(1'b0)};
|
||||
end else begin : g_mem_rsp_tag_s_cut
|
||||
assign mem_rsp_tag_s = mem_rsp_tag[MEM_TAG_WIDTH-1 -: TAG_WIDTH];
|
||||
`UNUSED_VAR (mem_rsp_tag)
|
||||
end
|
||||
|
||||
wire [TAG_WIDTH-1:0] flush_tag;
|
||||
if (UUID_WIDTH != 0) begin : g_flush_tag_uuid
|
||||
assign flush_tag = {flush_uuid, (TAG_WIDTH-UUID_WIDTH)'(1'b0)};
|
||||
end else begin : g_flush_tag_0
|
||||
`UNUSED_VAR (flush_uuid)
|
||||
assign flush_tag = '0;
|
||||
end
|
||||
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
|
||||
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
|
||||
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
|
||||
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
|
||||
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
|
||||
assign tag_sel = replay_valid ? replay_tag : core_req_tag;
|
||||
assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) :
|
||||
(replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag));
|
||||
assign creq_flush_sel = core_req_valid && core_req_flush;
|
||||
|
||||
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
|
||||
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data);
|
||||
end else begin
|
||||
assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0];
|
||||
if (WRITE_ENABLE) begin : g_data_sel
|
||||
for (genvar i = 0; i < `CS_LINE_WIDTH; ++i) begin : g_i
|
||||
if (i < `CS_WORD_WIDTH) begin : g_lo
|
||||
assign data_sel[i] = replay_valid ? replay_data[i] : (mem_rsp_valid ? mem_rsp_data[i] : core_req_data[i]);
|
||||
end else begin : g_hi
|
||||
assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel
|
||||
end
|
||||
end
|
||||
end else begin : g_data_sel_ro
|
||||
assign data_sel = mem_rsp_data;
|
||||
`UNUSED_VAR (core_req_data)
|
||||
`UNUSED_VAR (replay_data)
|
||||
end
|
||||
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin
|
||||
assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel
|
||||
end
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
if (UUID_WIDTH != 0) begin : g_req_uuid_sel
|
||||
assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign req_uuid_sel = 0;
|
||||
end else begin : g_req_uuid_sel_0
|
||||
assign req_uuid_sel = '0;
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
|
@ -280,10 +304,10 @@ module VX_cache_bank #(
|
|||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
if (UUID_WIDTH != 0) begin : g_req_uuid_st0
|
||||
assign req_uuid_st0 = tag_st0[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign req_uuid_st0 = 0;
|
||||
end else begin : g_req_uuid_st0_0
|
||||
assign req_uuid_st0 = '0;
|
||||
end
|
||||
|
||||
wire do_init_st0 = valid_st0 && is_init_st0;
|
||||
|
@ -362,10 +386,10 @@ module VX_cache_bank #(
|
|||
// we have a tag hit
|
||||
wire is_hit_st1 = (| way_sel_st1);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
if (UUID_WIDTH != 0) begin : g_req_uuid_st1
|
||||
assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign req_uuid_st1 = 0;
|
||||
end else begin : g_req_uuid_st1_0
|
||||
assign req_uuid_st1 = '0;
|
||||
end
|
||||
|
||||
wire is_read_st1 = is_creq_st1 && ~rw_st1;
|
||||
|
@ -394,7 +418,7 @@ module VX_cache_bank #(
|
|||
`UNUSED_VAR (do_write_miss_st1)
|
||||
|
||||
// ensure mshr replay always get a hit
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("missed mshr replay"));
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time))
|
||||
|
||||
// both tag and data stores use BRAM with no read-during-write protection.
|
||||
// we ned to stall the pipeline to prevent read-after-write hazards.
|
||||
|
@ -413,14 +437,14 @@ module VX_cache_bank #(
|
|||
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
|
||||
wire [LINE_SIZE-1:0] dirty_byteen_st1;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
reg [LINE_SIZE-1:0] write_byteen_r;
|
||||
if (`CS_WORDS_PER_LINE > 1) begin : g_write_byteen_st1_wsel
|
||||
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen_w;
|
||||
always @(*) begin
|
||||
write_byteen_r = '0;
|
||||
write_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1;
|
||||
write_byteen_w = '0;
|
||||
write_byteen_w[wsel_st1] = byteen_st1;
|
||||
end
|
||||
assign write_byteen_st1 = write_byteen_r;
|
||||
end else begin
|
||||
assign write_byteen_st1 = write_byteen_w;
|
||||
end else begin : g_write_byteen_st1
|
||||
assign write_byteen_st1 = byteen_st1;
|
||||
end
|
||||
|
||||
|
@ -468,9 +492,9 @@ module VX_cache_bank #(
|
|||
|
||||
// release allocated mshr entry if we had a hit
|
||||
wire mshr_release_st1;
|
||||
if (WRITEBACK) begin
|
||||
if (WRITEBACK) begin : g_mshr_release_st1
|
||||
assign mshr_release_st1 = is_hit_st1;
|
||||
end else begin
|
||||
end else begin : g_mshr_release_st1_ro
|
||||
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
|
||||
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
|
||||
// this can happen when writes are sent late, when the fill was already in flight.
|
||||
|
@ -545,7 +569,7 @@ module VX_cache_bank #(
|
|||
|
||||
// check if there are pending requests to same line in the MSHR
|
||||
wire [MSHR_SIZE-1:0] lookup_matches;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches
|
||||
assign lookup_matches[i] = mshr_lookup_pending_st0[i]
|
||||
&& (i != mshr_alloc_id_st0) // exclude current mshr id
|
||||
&& (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough
|
||||
|
@ -567,7 +591,7 @@ module VX_cache_bank #(
|
|||
VX_elastic_buffer #(
|
||||
.DATAW (TAG_WIDTH + `CS_WORD_WIDTH + REQ_SEL_WIDTH),
|
||||
.SIZE (CRSQ_SIZE),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
.OUT_REG (CORE_OUT_REG)
|
||||
) core_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -587,7 +611,7 @@ module VX_cache_bank #(
|
|||
wire [`CS_LINE_WIDTH-1:0] mreq_queue_data;
|
||||
wire [LINE_SIZE-1:0] mreq_queue_byteen;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mreq_queue_id;
|
||||
wire [MEM_TAG_WIDTH-1:0] mreq_queue_tag;
|
||||
wire mreq_queue_rw;
|
||||
wire mreq_queue_flush;
|
||||
|
||||
|
@ -595,16 +619,16 @@ module VX_cache_bank #(
|
|||
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
|
||||
wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1;
|
||||
|
||||
if (WRITEBACK) begin
|
||||
if (DIRTY_BYTES) begin
|
||||
if (WRITEBACK) begin : g_mreq_queue_push
|
||||
if (DIRTY_BYTES) begin : g_dirty_bytes
|
||||
// ensure dirty bytes match the tag info
|
||||
wire has_dirty_bytes = (| dirty_byteen_st1);
|
||||
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)));
|
||||
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)))
|
||||
end
|
||||
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|
||||
|| do_writeback_st1)
|
||||
&& ~rdw_hazard3_st1;
|
||||
end else begin
|
||||
end else begin : g_mreq_queue_push_ro
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1)
|
||||
|
@ -613,33 +637,47 @@ module VX_cache_bank #(
|
|||
|
||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||
assign mreq_queue_addr = addr_st1;
|
||||
assign mreq_queue_id = mshr_id_st1;
|
||||
assign mreq_queue_flush = creq_flush_st1;
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
assign mreq_queue_rw = WRITEBACK ? is_fill_or_flush_st1 : rw_st1;
|
||||
assign mreq_queue_data = WRITEBACK ? dirty_data_st1 : write_data_st1;
|
||||
assign mreq_queue_byteen = WRITEBACK ? dirty_byteen_st1 : write_byteen_st1;
|
||||
end else begin
|
||||
if (WRITE_ENABLE) begin : g_mreq_queue
|
||||
if (WRITEBACK) begin : g_writeback
|
||||
assign mreq_queue_rw = is_fill_or_flush_st1;
|
||||
assign mreq_queue_data = dirty_data_st1;
|
||||
assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1;
|
||||
end else begin : g_writethrough
|
||||
assign mreq_queue_rw = rw_st1;
|
||||
assign mreq_queue_data = write_data_st1;
|
||||
assign mreq_queue_byteen = rw_st1 ? write_byteen_st1 : '1;
|
||||
`UNUSED_VAR (is_fill_or_flush_st1)
|
||||
`UNUSED_VAR (dirty_data_st1)
|
||||
`UNUSED_VAR (dirty_byteen_st1)
|
||||
end
|
||||
end else begin : g_mreq_queue_ro
|
||||
assign mreq_queue_rw = 0;
|
||||
assign mreq_queue_data = 0;
|
||||
assign mreq_queue_byteen = 0;
|
||||
assign mreq_queue_data = '0;
|
||||
assign mreq_queue_byteen = '1;
|
||||
`UNUSED_VAR (dirty_data_st1)
|
||||
`UNUSED_VAR (dirty_byteen_st1)
|
||||
end
|
||||
|
||||
if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid
|
||||
assign mreq_queue_tag = {req_uuid_st1, mshr_id_st1};
|
||||
end else begin : g_mreq_queue_tag
|
||||
assign mreq_queue_tag = mshr_id_st1;
|
||||
end
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1),
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.DEPTH (MREQ_SIZE),
|
||||
.ALM_FULL (MREQ_SIZE-PIPELINE_STAGES),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
.OUT_REG (MEM_OUT_REG)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (mreq_queue_push),
|
||||
.pop (mreq_queue_pop),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_data, mem_req_flush}),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flush}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flush}),
|
||||
.empty (mreq_queue_empty),
|
||||
.alm_full (mreq_queue_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
|
@ -663,30 +701,32 @@ module VX_cache_bank #(
|
|||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
|
||||
always @(posedge clk) begin
|
||||
if (input_stall || pipe_stall) begin
|
||||
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1));
|
||||
`TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1))
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
|
||||
`TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel))
|
||||
end
|
||||
if (replay_fire) begin
|
||||
`TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
|
||||
`TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel))
|
||||
end
|
||||
if (core_req_fire) begin
|
||||
if (core_req_rw)
|
||||
`TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
|
||||
else
|
||||
`TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
|
||||
if (core_req_rw) begin
|
||||
`TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel))
|
||||
end else begin
|
||||
`TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel))
|
||||
end
|
||||
end
|
||||
if (crsp_queue_fire) begin
|
||||
`TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
|
||||
`TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1))
|
||||
end
|
||||
if (mreq_queue_push) begin
|
||||
if (do_creq_wr_st1 && !WRITEBACK)
|
||||
`TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
|
||||
else if (do_writeback_st1)
|
||||
`TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data));
|
||||
else
|
||||
`TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1));
|
||||
if (do_creq_wr_st1 && !WRITEBACK) begin
|
||||
`TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
|
||||
end else if (do_writeback_st1) begin
|
||||
`TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
|
||||
end else begin
|
||||
`TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1))
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
164
hw/rtl/cache/VX_cache_bypass.sv
vendored
164
hw/rtl/cache/VX_cache_bypass.sv
vendored
|
@ -56,7 +56,8 @@ module VX_cache_bypass #(
|
|||
localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1);
|
||||
|
||||
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
|
||||
localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `ADDR_TYPE_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH;
|
||||
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
|
||||
localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH;
|
||||
|
||||
localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE;
|
||||
localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE);
|
||||
|
@ -72,16 +73,16 @@ module VX_cache_bypass #(
|
|||
wire core_req_nc_valid;
|
||||
wire [NUM_REQS-1:0] core_req_nc_valids;
|
||||
wire [NUM_REQS-1:0] core_req_nc_idxs;
|
||||
wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx;
|
||||
wire [REQ_SEL_WIDTH-1:0] core_req_nc_idx;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire core_req_nc_ready;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (PASSTHRU != 0) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_nc
|
||||
if (PASSTHRU != 0) begin : g_passthru
|
||||
assign core_req_nc_idxs[i] = 1'b1;
|
||||
end else if (NC_ENABLE) begin
|
||||
assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO];
|
||||
end else begin
|
||||
end else if (NC_ENABLE) begin : g_nc
|
||||
assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_IO];
|
||||
end else begin : g_no_nc
|
||||
assign core_req_nc_idxs[i] = 1'b0;
|
||||
end
|
||||
assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i];
|
||||
|
@ -100,7 +101,7 @@ module VX_cache_bypass #(
|
|||
.grant_ready (core_req_nc_ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_if
|
||||
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i];
|
||||
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
|
||||
assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i])
|
||||
|
@ -113,7 +114,7 @@ module VX_cache_bypass #(
|
|||
wire mem_req_out_rw;
|
||||
wire [LINE_SIZE-1:0] mem_req_out_byteen;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_out_addr;
|
||||
wire [`ADDR_TYPE_WIDTH-1:0] mem_req_out_atype;
|
||||
wire [`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_out_flags;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_out_data;
|
||||
wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag;
|
||||
wire mem_req_out_ready;
|
||||
|
@ -121,28 +122,28 @@ module VX_cache_bypass #(
|
|||
wire core_req_nc_sel_rw;
|
||||
wire [WORD_SIZE-1:0] core_req_nc_sel_byteen;
|
||||
wire [CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr;
|
||||
wire [`ADDR_TYPE_WIDTH-1:0] core_req_nc_sel_atype;
|
||||
wire [`MEM_REQ_FLAGS_WIDTH-1:0] core_req_nc_sel_flags;
|
||||
wire [CORE_DATA_WIDTH-1:0] core_req_nc_sel_data;
|
||||
wire [CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag;
|
||||
|
||||
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_nc_mux_in
|
||||
assign core_req_nc_mux_in[i] = {
|
||||
core_bus_in_if[i].req_data.rw,
|
||||
core_bus_in_if[i].req_data.byteen,
|
||||
core_bus_in_if[i].req_data.addr,
|
||||
core_bus_in_if[i].req_data.atype,
|
||||
core_bus_in_if[i].req_data.data,
|
||||
core_bus_in_if[i].req_data.byteen,
|
||||
core_bus_in_if[i].req_data.flags,
|
||||
core_bus_in_if[i].req_data.tag
|
||||
};
|
||||
end
|
||||
|
||||
assign {
|
||||
core_req_nc_sel_rw,
|
||||
core_req_nc_sel_byteen,
|
||||
core_req_nc_sel_addr,
|
||||
core_req_nc_sel_atype,
|
||||
core_req_nc_sel_data,
|
||||
core_req_nc_sel_byteen,
|
||||
core_req_nc_sel_flags,
|
||||
core_req_nc_sel_tag
|
||||
} = core_req_nc_mux_in[core_req_nc_idx];
|
||||
|
||||
|
@ -151,83 +152,81 @@ module VX_cache_bypass #(
|
|||
assign mem_req_out_valid = mem_bus_in_if.req_valid || core_req_nc_valid;
|
||||
assign mem_req_out_rw = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.rw : core_req_nc_sel_rw;
|
||||
assign mem_req_out_addr = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.addr : core_req_nc_sel_addr[WSEL_BITS +: MEM_ADDR_WIDTH];
|
||||
assign mem_req_out_atype = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.atype : core_req_nc_sel_atype;
|
||||
assign mem_req_out_flags = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.flags : core_req_nc_sel_flags;
|
||||
|
||||
wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass;
|
||||
|
||||
wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0];
|
||||
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r;
|
||||
reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
|
||||
if (WORDS_PER_LINE > 1) begin : g_mem_req_multi_word_line
|
||||
reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_w;
|
||||
reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_w;
|
||||
|
||||
wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0];
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = '0;
|
||||
mem_req_byteen_in_r[req_wsel] = core_req_nc_sel_byteen;
|
||||
mem_req_byteen_in_w = '0;
|
||||
mem_req_byteen_in_w[req_wsel] = core_req_nc_sel_byteen;
|
||||
|
||||
mem_req_data_in_r = 'x;
|
||||
mem_req_data_in_r[req_wsel] = core_req_nc_sel_data;
|
||||
mem_req_data_in_w = 'x;
|
||||
mem_req_data_in_w[req_wsel] = core_req_nc_sel_data;
|
||||
end
|
||||
|
||||
assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_r;
|
||||
assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r;
|
||||
if (NUM_REQS > 1) begin
|
||||
assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_w;
|
||||
assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_w;
|
||||
if (NUM_REQS > 1) begin : g_multiple_requests
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id});
|
||||
end else begin
|
||||
end else begin : g_single_request
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id});
|
||||
end
|
||||
end else begin
|
||||
end else begin : g_mem_req_single_word_line
|
||||
assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : core_req_nc_sel_byteen;
|
||||
assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : core_req_nc_sel_data;
|
||||
if (NUM_REQS > 1) begin
|
||||
if (NUM_REQS > 1) begin : g_multiple_requests
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id});
|
||||
end else begin
|
||||
end else begin : g_single_request
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_in_id});
|
||||
end
|
||||
end
|
||||
|
||||
wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
if (UUID_WIDTH != 0) begin : g_mem_req_tag_bypass_with_uuid
|
||||
assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass};
|
||||
end else begin
|
||||
end else begin : g_mem_req_tag_bypass
|
||||
assign mem_req_tag_bypass = mem_req_tag_id_bypass;
|
||||
end
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
if (PASSTHRU != 0) begin : g_mem_req_out_tag_passthru
|
||||
assign mem_req_out_tag = mem_req_tag_bypass;
|
||||
`UNUSED_VAR (mem_bus_in_if.req_data.tag)
|
||||
end else begin
|
||||
if (NC_ENABLE) begin
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_OUT_WIDTH-1),
|
||||
.S (1),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) mem_req_tag_in_nc_insert (
|
||||
.data_in (mem_bus_in_if.req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if.req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)),
|
||||
.ins_in (~mem_bus_in_if.req_valid),
|
||||
.data_out (mem_req_out_tag)
|
||||
);
|
||||
end else begin
|
||||
assign mem_req_out_tag = mem_bus_in_if.req_data.tag;
|
||||
end
|
||||
end else if (NC_ENABLE) begin : g_mem_req_out_tag_nc
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_OUT_WIDTH-1),
|
||||
.S (1),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) mem_req_tag_in_nc_insert (
|
||||
.data_in (mem_bus_in_if.req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if.req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)),
|
||||
.ins_in (~mem_bus_in_if.req_valid),
|
||||
.data_out (mem_req_out_tag)
|
||||
);
|
||||
end else begin : g_mem_req_out_tag
|
||||
assign mem_req_out_tag = mem_bus_in_if.req_data.tag;
|
||||
end
|
||||
|
||||
assign mem_bus_in_if.req_ready = mem_req_out_ready;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH),
|
||||
.SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH),
|
||||
.SIZE (DIRECT_PASSTHRU ? 0 : `TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_out_valid),
|
||||
.ready_in (mem_req_out_ready),
|
||||
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}),
|
||||
.data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.atype, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}),
|
||||
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_flags, mem_req_out_data, mem_req_out_tag}),
|
||||
.data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.flags, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}),
|
||||
.valid_out (mem_bus_out_if.req_valid),
|
||||
.ready_out (mem_bus_out_if.req_ready)
|
||||
);
|
||||
|
@ -240,14 +239,12 @@ module VX_cache_bypass #(
|
|||
wire [NUM_REQS-1:0] core_rsp_in_ready;
|
||||
|
||||
wire is_mem_rsp_nc;
|
||||
if (PASSTHRU != 0) begin
|
||||
if (PASSTHRU != 0) begin : g_is_mem_rsp_nc_passthru
|
||||
assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid;
|
||||
end else begin
|
||||
if (NC_ENABLE) begin
|
||||
assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid && mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX];
|
||||
end else begin
|
||||
assign is_mem_rsp_nc = 1'b0;
|
||||
end
|
||||
end else if (NC_ENABLE) begin : g_is_mem_rsp_nc
|
||||
assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid && mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX];
|
||||
end else begin : g_is_no_mem_rsp_nc
|
||||
assign is_mem_rsp_nc = 1'b0;
|
||||
end
|
||||
|
||||
wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc;
|
||||
|
@ -261,57 +258,52 @@ module VX_cache_bypass #(
|
|||
.data_out (mem_rsp_tag_id_nc)
|
||||
);
|
||||
|
||||
wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx;
|
||||
if (NUM_REQS > 1) begin
|
||||
wire [REQ_SEL_WIDTH-1:0] rsp_idx;
|
||||
if (NUM_REQS > 1) begin : g_rsp_idx
|
||||
assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS];
|
||||
end else begin
|
||||
end else begin : g_rsp_idx_0
|
||||
assign rsp_idx = 1'b0;
|
||||
end
|
||||
|
||||
reg [NUM_REQS-1:0] rsp_nc_valid_r;
|
||||
always @(*) begin
|
||||
rsp_nc_valid_r = '0;
|
||||
rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_valid
|
||||
assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i];
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_ready
|
||||
assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i];
|
||||
end
|
||||
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_data
|
||||
if (WORDS_PER_LINE > 1) begin : g_wsel
|
||||
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
|
||||
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ?
|
||||
core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
end else begin : g_no_wsel
|
||||
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data;
|
||||
end
|
||||
end
|
||||
|
||||
wire [(CORE_TAG_ID_BITS + UUID_WIDTH)-1:0] mem_rsp_tag_in_nc2;
|
||||
if (UUID_WIDTH != 0) begin
|
||||
if (UUID_WIDTH != 0) begin : g_mem_rsp_tag_in_nc2_uuid
|
||||
assign mem_rsp_tag_in_nc2 = {mem_rsp_tag_id_nc[(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1 -: UUID_WIDTH], mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]};
|
||||
end else begin
|
||||
end else begin : g_mem_rsp_tag_in_nc2
|
||||
assign mem_rsp_tag_in_nc2 = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (PASSTHRU) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_tag
|
||||
if (PASSTHRU) begin : g_passthru
|
||||
assign core_rsp_in_tag[i] = mem_rsp_tag_in_nc2;
|
||||
end else if (NC_ENABLE) begin
|
||||
end else if (NC_ENABLE) begin : g_nc
|
||||
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2;
|
||||
end else begin
|
||||
end else begin : g_no_nc
|
||||
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_buf
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH),
|
||||
.SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
|
||||
.SIZE (DIRECT_PASSTHRU ? 0 : `TO_OUT_BUF_SIZE(CORE_OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
) core_rsp_buf (
|
||||
.clk (clk),
|
||||
|
@ -327,22 +319,22 @@ module VX_cache_bypass #(
|
|||
|
||||
// handle memory responses ////////////////////////////////////////////////
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
if (PASSTHRU != 0) begin : g_mem_bus_in_if_passthru
|
||||
assign mem_bus_in_if.rsp_valid = 1'b0;
|
||||
assign mem_bus_in_if.rsp_data.data = '0;
|
||||
assign mem_bus_in_if.rsp_data.tag = '0;
|
||||
end else if (NC_ENABLE) begin
|
||||
end else if (NC_ENABLE) begin : g_mem_bus_in_if_nc
|
||||
assign mem_bus_in_if.rsp_valid = mem_bus_out_if.rsp_valid && ~mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX];
|
||||
assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data;
|
||||
assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc[MEM_TAG_IN_WIDTH-1:0];
|
||||
end else begin
|
||||
end else begin : g_mem_bus_in_if
|
||||
assign mem_bus_in_if.rsp_valid = mem_bus_out_if.rsp_valid;
|
||||
assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data;
|
||||
assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc;
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] core_rsp_out_valid;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_out_valid
|
||||
assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid;
|
||||
end
|
||||
|
||||
|
|
33
hw/rtl/cache/VX_cache_cluster.sv
vendored
33
hw/rtl/cache/VX_cache_cluster.sv
vendored
|
@ -82,8 +82,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
localparam PASSTHRU = (NUM_UNITS == 0);
|
||||
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH, UUID_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH));
|
||||
|
||||
`STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter"))
|
||||
|
||||
|
@ -102,9 +102,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (ARB_TAG_WIDTH)
|
||||
) arb_core_bus_if[NUM_CACHES * NUM_REQS]();
|
||||
|
||||
`RESET_RELAY_EX (cache_arb_reset, reset, NUM_REQS, `MAX_FANOUT);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_arb
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
|
@ -115,7 +113,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (ARB_TAG_WIDTH)
|
||||
) arb_core_bus_tmp_if[NUM_CACHES]();
|
||||
|
||||
for (genvar j = 0; j < NUM_INPUTS; ++j) begin
|
||||
for (genvar j = 0; j < NUM_INPUTS; ++j) begin : g_core_bus_tmp_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
|
||||
end
|
||||
|
||||
|
@ -127,23 +125,20 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
.ARBITER ("R"),
|
||||
.REQ_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0),
|
||||
.RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0)
|
||||
) cache_arb (
|
||||
.RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? CORE_OUT_BUF : 0)
|
||||
) core_arb (
|
||||
.clk (clk),
|
||||
.reset (cache_arb_reset[i]),
|
||||
.reset (reset),
|
||||
.bus_in_if (core_bus_tmp_if),
|
||||
.bus_out_if (arb_core_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar k = 0; k < NUM_CACHES; ++k) begin
|
||||
for (genvar k = 0; k < NUM_CACHES; ++k) begin : g_arb_core_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (arb_core_bus_if[k * NUM_REQS + i], arb_core_bus_tmp_if[k]);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_CACHES; ++i) begin : caches
|
||||
|
||||
`RESET_RELAY (cache_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < NUM_CACHES; ++i) begin : g_cache_wrap
|
||||
VX_cache_wrap #(
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
|
@ -171,7 +166,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.cache_perf (perf_cache_unit[i]),
|
||||
`endif
|
||||
.clk (clk),
|
||||
.reset (cache_reset),
|
||||
.reset (reset),
|
||||
.core_bus_if (arb_core_bus_if[i * NUM_REQS +: NUM_REQS]),
|
||||
.mem_bus_if (cache_mem_bus_if[i])
|
||||
);
|
||||
|
@ -188,7 +183,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (MEM_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
.ARBITER ("R"),
|
||||
.REQ_OUT_BUF ((NUM_CACHES > 1) ? 2 : 0),
|
||||
.REQ_OUT_BUF ((NUM_CACHES > 1) ? MEM_OUT_BUF : 0),
|
||||
.RSP_OUT_BUF ((NUM_CACHES > 1) ? 2 : 0)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
|
@ -197,6 +192,10 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.bus_out_if (mem_bus_tmp_if)
|
||||
);
|
||||
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]);
|
||||
if (WRITE_ENABLE) begin : g_mem_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]);
|
||||
end else begin : g_mem_bus_if_ro
|
||||
`ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if[0]);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
100
hw/rtl/cache/VX_cache_data.sv
vendored
100
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -75,64 +75,66 @@ module VX_cache_data #(
|
|||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
|
||||
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
|
||||
|
||||
if (WRITEBACK) begin
|
||||
if (DIRTY_BYTES) begin
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata;
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata;
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]);
|
||||
assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]);
|
||||
end
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (LINE_SIZE * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK)
|
||||
) byteen_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (write || fill || flush),
|
||||
.write (init || write || fill || flush),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.wdata (bs_wdata),
|
||||
.rdata (bs_rdata)
|
||||
);
|
||||
|
||||
assign dirty_byteen = bs_rdata[way_idx];
|
||||
end else begin
|
||||
assign dirty_byteen = {LINE_SIZE{1'b1}};
|
||||
end
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign flipped_rdata[j][i] = line_rdata[i][j];
|
||||
end
|
||||
end
|
||||
assign dirty_data = flipped_rdata[way_idx];
|
||||
end else begin
|
||||
assign dirty_byteen = '0;
|
||||
if (WRITEBACK) begin : g_dirty_data
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata;
|
||||
VX_transpose #(
|
||||
.DATAW (`CS_WORD_WIDTH),
|
||||
.N (`CS_WORDS_PER_LINE),
|
||||
.M (NUM_WAYS)
|
||||
) transpose (
|
||||
.data_in (line_rdata),
|
||||
.data_out (transposed_rdata)
|
||||
);
|
||||
assign dirty_data = transposed_rdata[way_idx];
|
||||
end else begin : g_dirty_data_0
|
||||
assign dirty_data = '0;
|
||||
end
|
||||
|
||||
if (DIRTY_BYTES) begin : g_dirty_byteen
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata;
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata;
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata
|
||||
wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]);
|
||||
assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]);
|
||||
end
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (LINE_SIZE * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK)
|
||||
) byteen_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (write || fill || flush),
|
||||
.write (init || write || fill || flush),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.wdata (bs_wdata),
|
||||
.rdata (bs_rdata)
|
||||
);
|
||||
|
||||
assign dirty_byteen = bs_rdata[way_idx];
|
||||
end else begin : g_dirty_byteen_0
|
||||
assign dirty_byteen = '1;
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last.
|
||||
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||
wire [BYTEENW-1:0] line_wren;
|
||||
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin : g_line_wdata
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_i
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j
|
||||
assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i];
|
||||
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
|
||||
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
|
||||
end
|
||||
end
|
||||
assign line_wren = wren_w;
|
||||
end else begin
|
||||
end else begin : g_line_wdata_ro
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (write_byteen)
|
||||
`UNUSED_VAR (write_data)
|
||||
|
@ -140,7 +142,7 @@ module VX_cache_data #(
|
|||
assign line_wren = fill;
|
||||
end
|
||||
|
||||
VX_onehot_encoder #(
|
||||
VX_encoder #(
|
||||
.N (NUM_WAYS)
|
||||
) way_enc (
|
||||
.data_in (way_sel),
|
||||
|
@ -171,9 +173,9 @@ module VX_cache_data #(
|
|||
);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
if (`CS_WORDS_PER_LINE > 1) begin : g_per_way_rdata_wsel
|
||||
assign per_way_rdata = line_rdata[wsel];
|
||||
end else begin
|
||||
end else begin : g_per_way_rdata
|
||||
`UNUSED_VAR (wsel)
|
||||
assign per_way_rdata = line_rdata;
|
||||
end
|
||||
|
@ -182,16 +184,16 @@ module VX_cache_data #(
|
|||
`ifdef DBG_TRACE_CACHE
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data))
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data));
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data))
|
||||
end
|
||||
if (read && ~stall) begin
|
||||
`TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid));
|
||||
`TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid))
|
||||
end
|
||||
if (write && ~stall) begin
|
||||
`TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid));
|
||||
`TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
1
hw/rtl/cache/VX_cache_define.vh
vendored
1
hw/rtl/cache/VX_cache_define.vh
vendored
|
@ -57,7 +57,6 @@
|
|||
`define CS_LINE_TO_MEM_ADDR(x, i) {x, `CS_BANK_SEL_BITS'(i)}
|
||||
`define CS_MEM_ADDR_TO_BANK_ID(x) x[0 +: `CS_BANK_SEL_BITS]
|
||||
`define CS_MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0]
|
||||
`define CS_MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `CS_BANK_SEL_BITS]
|
||||
|
||||
`define CS_LINE_TO_FULL_ADDR(x, i) {x, (`XLEN-$bits(x))'(i << (`XLEN-$bits(x)-`CS_BANK_SEL_BITS))}
|
||||
`define CS_MEM_TO_FULL_ADDR(x) {x, (`XLEN-$bits(x))'(0)}
|
||||
|
|
39
hw/rtl/cache/VX_cache_flush.sv
vendored
39
hw/rtl/cache/VX_cache_flush.sv
vendored
|
@ -18,6 +18,10 @@ module VX_cache_flush #(
|
|||
parameter NUM_REQS = 4,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
// Bank select latency
|
||||
parameter BANK_SEL_LATENCY = 1
|
||||
) (
|
||||
|
@ -27,6 +31,7 @@ module VX_cache_flush #(
|
|||
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
|
||||
input wire [NUM_BANKS-1:0] bank_req_fire,
|
||||
output wire [NUM_BANKS-1:0] flush_begin,
|
||||
output wire [`UP(UUID_WIDTH)-1:0] flush_uuid,
|
||||
input wire [NUM_BANKS-1:0] flush_end
|
||||
);
|
||||
localparam STATE_IDLE = 0;
|
||||
|
@ -41,13 +46,13 @@ module VX_cache_flush #(
|
|||
|
||||
wire no_inflight_reqs;
|
||||
|
||||
if (BANK_SEL_LATENCY != 0) begin
|
||||
if (BANK_SEL_LATENCY != 0) begin : g_bank_sel_latency
|
||||
|
||||
localparam NUM_REQS_W = `CLOG2(NUM_REQS+1);
|
||||
localparam NUM_BANKS_W = `CLOG2(NUM_BANKS+1);
|
||||
|
||||
wire [NUM_REQS-1:0] core_bus_out_fire;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_fire
|
||||
assign core_bus_out_fire[i] = core_bus_out_if[i].req_valid && core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
|
@ -74,7 +79,7 @@ module VX_cache_flush #(
|
|||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
end else begin
|
||||
end else begin : g_no_bank_sel_latency
|
||||
assign no_inflight_reqs = 0;
|
||||
`UNUSED_VAR (bank_req_fire)
|
||||
end
|
||||
|
@ -82,28 +87,38 @@ module VX_cache_flush #(
|
|||
reg [NUM_BANKS-1:0] flush_done, flush_done_n;
|
||||
|
||||
wire [NUM_REQS-1:0] flush_req_mask;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_flush_req_mask
|
||||
assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH];
|
||||
end
|
||||
wire flush_req_enable = (| flush_req_mask);
|
||||
|
||||
reg [NUM_REQS-1:0] lock_released, lock_released_n;
|
||||
reg [`UP(UUID_WIDTH)-1:0] flush_uuid_r, flush_uuid_n;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_req
|
||||
wire input_enable = ~flush_req_enable || lock_released[i];
|
||||
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && input_enable;
|
||||
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
|
||||
assign core_bus_in_if[i].req_ready = core_bus_out_if[i].req_ready && input_enable;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_in_rsp
|
||||
assign core_bus_in_if[i].rsp_valid = core_bus_out_if[i].rsp_valid;
|
||||
assign core_bus_in_if[i].rsp_data = core_bus_out_if[i].rsp_data;
|
||||
assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
reg [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] core_bus_out_uuid;
|
||||
wire [NUM_REQS-1:0] core_bus_out_ready;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_uuid
|
||||
if (UUID_WIDTH != 0) begin : g_uuid
|
||||
assign core_bus_out_uuid[i] = core_bus_in_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin : g_no_uuid
|
||||
assign core_bus_out_uuid[i] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_ready
|
||||
assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
|
@ -111,10 +126,16 @@ module VX_cache_flush #(
|
|||
state_n = state;
|
||||
flush_done_n = flush_done;
|
||||
lock_released_n = lock_released;
|
||||
flush_uuid_n = flush_uuid_r;
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (flush_req_enable) begin
|
||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH;
|
||||
for (integer i = NUM_REQS-1; i >= 0; --i) begin
|
||||
if (flush_req_mask[i]) begin
|
||||
flush_uuid_n = core_bus_out_uuid[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
STATE_WAIT1: begin
|
||||
|
@ -158,8 +179,10 @@ module VX_cache_flush #(
|
|||
flush_done <= flush_done_n;
|
||||
lock_released <= lock_released_n;
|
||||
end
|
||||
flush_uuid_r <= flush_uuid_n;
|
||||
end
|
||||
|
||||
assign flush_begin = {NUM_BANKS{state == STATE_FLUSH}};
|
||||
assign flush_uuid = flush_uuid_r;
|
||||
|
||||
endmodule
|
||||
|
|
59
hw/rtl/cache/VX_cache_mshr.sv
vendored
59
hw/rtl/cache/VX_cache_mshr.sv
vendored
|
@ -135,7 +135,7 @@ module VX_cache_mshr #(
|
|||
wire dequeue_fire = dequeue_valid && dequeue_ready;
|
||||
|
||||
wire [MSHR_SIZE-1:0] addr_matches;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_addr_matches
|
||||
assign addr_matches[i] = valid_table[i] && (addr_table[i] == lookup_addr);
|
||||
end
|
||||
|
||||
|
@ -148,7 +148,7 @@ module VX_cache_mshr #(
|
|||
.valid_out (allocate_rdy_n)
|
||||
);
|
||||
|
||||
VX_onehot_encoder #(
|
||||
VX_encoder #(
|
||||
.N (MSHR_SIZE)
|
||||
) prev_sel (
|
||||
.data_in (addr_matches & ~next_table_x),
|
||||
|
@ -267,35 +267,42 @@ module VX_cache_mshr #(
|
|||
end else begin
|
||||
show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire;
|
||||
end
|
||||
if (allocate_fire)
|
||||
`TRACE(3, ("%d: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid));
|
||||
if (lookup_valid)
|
||||
`TRACE(3, ("%d: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid));
|
||||
if (finalize_valid)
|
||||
`TRACE(3, ("%d: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid));
|
||||
if (fill_valid)
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id));
|
||||
if (dequeue_fire)
|
||||
`TRACE(3, ("%d: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid));
|
||||
if (allocate_fire) begin
|
||||
`TRACE(3, ("%t: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid))
|
||||
end
|
||||
if (lookup_valid) begin
|
||||
`TRACE(3, ("%t: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid))
|
||||
end
|
||||
if (finalize_valid) begin
|
||||
`TRACE(3, ("%t: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid))
|
||||
end
|
||||
if (fill_valid) begin
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id))
|
||||
end
|
||||
if (dequeue_fire) begin
|
||||
`TRACE(3, ("%t: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid))
|
||||
end
|
||||
if (show_table) begin
|
||||
`TRACE(3, ("%d: %s table", $time, INSTANCE_ID));
|
||||
`TRACE(3, ("%t: %s table", $time, INSTANCE_ID))
|
||||
for (integer i = 0; i < MSHR_SIZE; ++i) begin
|
||||
if (valid_table[i]) begin
|
||||
`TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID)));
|
||||
if (write_table[i])
|
||||
`TRACE(3, ("(w)"));
|
||||
else
|
||||
`TRACE(3, ("(r)"));
|
||||
if (next_table[i])
|
||||
`TRACE(3, ("->%0d", next_index[i]));
|
||||
`TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID)))
|
||||
if (write_table[i]) begin
|
||||
`TRACE(3, ("(w)"))
|
||||
end else begin
|
||||
`TRACE(3, ("(r)"))
|
||||
end
|
||||
if (next_table[i]) begin
|
||||
`TRACE(3, ("->%0d", next_index[i]))
|
||||
end
|
||||
end
|
||||
end
|
||||
`TRACE(3, ("\n"));
|
||||
`TRACE(3, ("\n"))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
36
hw/rtl/cache/VX_cache_tags.sv
vendored
36
hw/rtl/cache/VX_cache_tags.sv
vendored
|
@ -69,7 +69,7 @@ module VX_cache_tags #(
|
|||
wire [NUM_WAYS-1:0] read_valid;
|
||||
wire [NUM_WAYS-1:0] read_dirty;
|
||||
|
||||
if (NUM_WAYS > 1) begin
|
||||
if (NUM_WAYS > 1) begin : g_evict_way
|
||||
reg [NUM_WAYS-1:0] evict_way_r;
|
||||
// cyclic assignment of replacement way
|
||||
always @(posedge clk) begin
|
||||
|
@ -90,7 +90,7 @@ module VX_cache_tags #(
|
|||
.sel_in (evict_way),
|
||||
.data_out (evict_tag)
|
||||
);
|
||||
end else begin
|
||||
end else begin : g_evict_way_0
|
||||
`UNUSED_VAR (stall)
|
||||
assign evict_way = 1'b1;
|
||||
assign evict_tag = read_tag;
|
||||
|
@ -100,7 +100,7 @@ module VX_cache_tags #(
|
|||
wire fill_s = fill && (!WRITEBACK || ~stall);
|
||||
wire flush_s = flush && (!WRITEBACK || ~stall);
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store
|
||||
|
||||
wire do_fill = fill_s && evict_way[i];
|
||||
wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode
|
||||
|
@ -113,10 +113,10 @@ module VX_cache_tags #(
|
|||
wire [TAG_WIDTH-1:0] line_wdata;
|
||||
wire [TAG_WIDTH-1:0] line_rdata;
|
||||
|
||||
if (WRITEBACK) begin
|
||||
if (WRITEBACK) begin : g_writeback
|
||||
assign line_wdata = {line_valid, write, line_tag};
|
||||
assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata;
|
||||
end else begin
|
||||
end else begin : g_writethrough
|
||||
assign line_wdata = {line_valid, line_tag};
|
||||
assign {read_valid[i], read_tag[i]} = line_rdata;
|
||||
assign read_dirty[i] = 1'b0;
|
||||
|
@ -139,7 +139,7 @@ module VX_cache_tags #(
|
|||
);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches
|
||||
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
|
||||
end
|
||||
|
||||
|
@ -149,25 +149,27 @@ module VX_cache_tags #(
|
|||
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel};
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)));
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)))
|
||||
end
|
||||
if (init) begin
|
||||
`TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
|
||||
`TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel))
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty));
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty))
|
||||
end
|
||||
if (lookup && ~stall) begin
|
||||
if (tag_matches != 0) begin
|
||||
if (write)
|
||||
`TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
else
|
||||
`TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
if (write) begin
|
||||
`TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid))
|
||||
end else begin
|
||||
`TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid))
|
||||
end
|
||||
end else begin
|
||||
if (write)
|
||||
`TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
else
|
||||
`TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
if (write) begin
|
||||
`TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid))
|
||||
end else begin
|
||||
`TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
6
hw/rtl/cache/VX_cache_top.sv
vendored
6
hw/rtl/cache/VX_cache_top.sv
vendored
|
@ -75,7 +75,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
input wire [NUM_REQS-1:0] core_req_rw,
|
||||
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] core_req_atype,
|
||||
input wire [NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] core_req_flags,
|
||||
input wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire [NUM_REQS-1:0] core_req_ready,
|
||||
|
@ -117,7 +117,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
assign core_bus_if[i].req_data.rw = core_req_rw[i];
|
||||
assign core_bus_if[i].req_data.byteen = core_req_byteen[i];
|
||||
assign core_bus_if[i].req_data.addr = core_req_addr[i];
|
||||
assign core_bus_if[i].req_data.atype = core_req_atype[i];
|
||||
assign core_bus_if[i].req_data.flags = core_req_flags[i];
|
||||
assign core_bus_if[i].req_data.data = core_req_data[i];
|
||||
assign core_bus_if[i].req_data.tag = core_req_tag[i];
|
||||
assign core_req_ready[i] = core_bus_if[i].req_ready;
|
||||
|
@ -139,7 +139,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
`UNUSED_VAR (mem_bus_if.req_data.atype)
|
||||
`UNUSED_VAR (mem_bus_if.req_data.flags)
|
||||
|
||||
// Memory response
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
|
|
123
hw/rtl/cache/VX_cache_wrap.sv
vendored
123
hw/rtl/cache/VX_cache_wrap.sv
vendored
|
@ -84,12 +84,11 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
localparam CACHE_MEM_TAG_WIDTH = `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH);
|
||||
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH, UUID_WIDTH) :
|
||||
CACHE_MEM_TAG_WIDTH);
|
||||
|
||||
localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU);
|
||||
|
||||
|
@ -103,9 +102,12 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (CACHE_MEM_TAG_WIDTH)
|
||||
) mem_bus_cache_if();
|
||||
|
||||
if (NC_OR_BYPASS) begin
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (MEM_TAG_WIDTH)
|
||||
) mem_bus_tmp_if();
|
||||
|
||||
`RESET_RELAY (nc_bypass_reset, reset);
|
||||
if (NC_OR_BYPASS) begin : g_bypass
|
||||
|
||||
VX_cache_bypass #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
|
@ -130,51 +132,31 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.MEM_OUT_BUF (MEM_OUT_BUF)
|
||||
) cache_bypass (
|
||||
.clk (clk),
|
||||
.reset (nc_bypass_reset),
|
||||
.reset (reset),
|
||||
|
||||
.core_bus_in_if (core_bus_if),
|
||||
.core_bus_out_if(core_bus_cache_if),
|
||||
|
||||
.mem_bus_in_if (mem_bus_cache_if),
|
||||
.mem_bus_out_if (mem_bus_if)
|
||||
.mem_bus_out_if (mem_bus_tmp_if)
|
||||
);
|
||||
|
||||
end else begin
|
||||
end else begin : g_no_bypass
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_cache_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
|
||||
end
|
||||
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if);
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_tmp_if, mem_bus_cache_if);
|
||||
end
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
if (WRITE_ENABLE) begin : g_mem_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if);
|
||||
end else begin : g_mem_bus_if_ro
|
||||
`ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_valid)
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_data)
|
||||
assign core_bus_cache_if[i].req_ready = 0;
|
||||
|
||||
assign core_bus_cache_if[i].rsp_valid = 0;
|
||||
assign core_bus_cache_if[i].rsp_data = '0;
|
||||
`UNUSED_VAR (core_bus_cache_if[i].rsp_ready)
|
||||
end
|
||||
|
||||
assign mem_bus_cache_if.req_valid = 0;
|
||||
assign mem_bus_cache_if.req_data = '0;
|
||||
`UNUSED_VAR (mem_bus_cache_if.req_ready)
|
||||
|
||||
`UNUSED_VAR (mem_bus_cache_if.rsp_valid)
|
||||
`UNUSED_VAR (mem_bus_cache_if.rsp_data)
|
||||
assign mem_bus_cache_if.rsp_ready = 0;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign cache_perf = '0;
|
||||
`endif
|
||||
|
||||
end else begin
|
||||
|
||||
`RESET_RELAY (cache_reset, reset);
|
||||
if (PASSTHRU == 0) begin : g_cache
|
||||
|
||||
VX_cache #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
|
@ -197,7 +179,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF)
|
||||
) cache (
|
||||
.clk (clk),
|
||||
.reset (cache_reset),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (cache_perf),
|
||||
`endif
|
||||
|
@ -205,18 +187,41 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.mem_bus_if (mem_bus_cache_if)
|
||||
);
|
||||
|
||||
end else begin : g_passthru
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_cache_if
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_valid)
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_data)
|
||||
assign core_bus_cache_if[i].req_ready = 0;
|
||||
|
||||
assign core_bus_cache_if[i].rsp_valid = 0;
|
||||
assign core_bus_cache_if[i].rsp_data = '0;
|
||||
`UNUSED_VAR (core_bus_cache_if[i].rsp_ready)
|
||||
end
|
||||
|
||||
assign mem_bus_cache_if.req_valid = 0;
|
||||
assign mem_bus_cache_if.req_data = '0;
|
||||
`UNUSED_VAR (mem_bus_cache_if.req_ready)
|
||||
|
||||
`UNUSED_VAR (mem_bus_cache_if.rsp_valid)
|
||||
`UNUSED_VAR (mem_bus_cache_if.rsp_data)
|
||||
assign mem_bus_cache_if.rsp_ready = 0;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign cache_perf = '0;
|
||||
`endif
|
||||
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_trace
|
||||
wire [`UP(UUID_WIDTH)-1:0] core_req_uuid;
|
||||
wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
if (UUID_WIDTH != 0) begin : g_core_rsp_uuid
|
||||
assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
end else begin : g_no_core_rsp_uuid
|
||||
assign core_req_uuid = 0;
|
||||
assign core_rsp_uuid = 0;
|
||||
end
|
||||
|
@ -226,13 +231,14 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
|
||||
always @(posedge clk) begin
|
||||
if (core_req_fire) begin
|
||||
if (core_bus_if[i].req_data.rw)
|
||||
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid));
|
||||
if (core_bus_if[i].req_data.rw) begin
|
||||
`TRACE(1, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid))
|
||||
end else begin
|
||||
`TRACE(1, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid))
|
||||
end
|
||||
end
|
||||
if (core_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
|
||||
`TRACE(1, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -240,10 +246,10 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid;
|
||||
wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid;
|
||||
|
||||
if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin
|
||||
if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin : g_mem_req_uuid
|
||||
assign mem_req_uuid = mem_bus_if.req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign mem_rsp_uuid = mem_bus_if.rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
end else begin : g_no_mem_req_uuid
|
||||
assign mem_req_uuid = 0;
|
||||
assign mem_rsp_uuid = 0;
|
||||
end
|
||||
|
@ -253,16 +259,17 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_bus_if.req_data.rw)
|
||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
|
||||
if (mem_bus_if.req_data.rw) begin
|
||||
`TRACE(1, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid))
|
||||
end else begin
|
||||
`TRACE(1, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid))
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
|
||||
`TRACE(1, ("%t: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -71,19 +71,19 @@ module VX_alu_int #(
|
|||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_imm = execute_if.data.op_args.alu.use_imm ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_args.alu.imm)}} : alu_in2;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_br = (execute_if.data.op_args.alu.use_imm && ~is_br_op) ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_args.alu.imm)}} : alu_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_add_result
|
||||
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
||||
assign add_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] + alu_in2_imm[i][31:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_sub_result
|
||||
wire [`XLEN:0] sub_in1 = {is_signed & alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
wire [`XLEN:0] sub_in2 = {is_signed & alu_in2_br[i][`XLEN-1], alu_in2_br[i]};
|
||||
assign sub_result[i] = sub_in1 - sub_in2;
|
||||
assign sub_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] - alu_in2_imm[i][31:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_shr_result
|
||||
wire [`XLEN:0] shr_in1 = {is_signed && alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
always @(*) begin
|
||||
case (alu_op[1:0])
|
||||
|
@ -102,7 +102,7 @@ module VX_alu_int #(
|
|||
assign shr_result_w[i] = `XLEN'($signed(shr_res_w));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_msc_result
|
||||
always @(*) begin
|
||||
case (alu_op[1:0])
|
||||
2'b00: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; // AND
|
||||
|
@ -114,7 +114,7 @@ module VX_alu_int #(
|
|||
assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0])); // SLLW
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_alu_result
|
||||
wire [`XLEN-1:0] slt_br_result = `XLEN'({is_br_op && ~(| sub_result[i][`XLEN-1:0]), sub_result[i][`XLEN]});
|
||||
wire [`XLEN-1:0] sub_slt_br_result = (is_sub_op && ~is_br_op) ? sub_result[i][`XLEN-1:0] : slt_br_result;
|
||||
always @(*) begin
|
||||
|
@ -141,9 +141,9 @@ module VX_alu_int #(
|
|||
|
||||
assign cbr_dest = add_result[0][1 +: `PC_BITS];
|
||||
|
||||
if (LANE_BITS != 0) begin
|
||||
if (LANE_BITS != 0) begin : g_tid
|
||||
assign tid = execute_if.data.tid[0 +: LANE_BITS];
|
||||
end else begin
|
||||
end else begin : g_tid_0
|
||||
assign tid = 0;
|
||||
end
|
||||
|
||||
|
@ -185,7 +185,7 @@ module VX_alu_int #(
|
|||
.data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_commit
|
||||
assign commit_if.data.data[i] = (is_br_op_r && is_br_static) ? {(PC_r + `PC_BITS'(2)), 1'd0} : alu_result_r[i];
|
||||
end
|
||||
|
||||
|
@ -194,8 +194,8 @@ module VX_alu_int #(
|
|||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (br_enable) begin
|
||||
`TRACE(1, ("%d: %s-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid));
|
||||
`TRACE(1, ("%t: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -68,7 +68,7 @@ module VX_alu_muldiv #(
|
|||
|
||||
wire mul_fire_in = mul_valid_in && mul_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_result_tmp
|
||||
reg [`XLEN-1:0] mul_resultl, mul_resulth;
|
||||
wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i];
|
||||
wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i];
|
||||
|
@ -103,7 +103,7 @@ module VX_alu_muldiv #(
|
|||
wire [NUM_LANES-1:0][`XLEN:0] mul_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN:0] mul_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_in
|
||||
assign mul_in1[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]} : {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]};
|
||||
assign mul_in2[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]} : {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]};
|
||||
end
|
||||
|
@ -149,7 +149,7 @@ module VX_alu_muldiv #(
|
|||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_multiplier
|
||||
wire [`XLEN:0] mul_in1 = {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]};
|
||||
wire [`XLEN:0] mul_in2 = {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]};
|
||||
|
||||
|
@ -184,7 +184,7 @@ module VX_alu_muldiv #(
|
|||
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_result_out
|
||||
`ifdef XLEN_64
|
||||
assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] :
|
||||
(is_mul_w_out ? `XLEN'($signed(mul_result_tmp[i][31:0])) :
|
||||
|
@ -219,7 +219,7 @@ module VX_alu_muldiv #(
|
|||
wire [NUM_LANES-1:0][`XLEN-1:0] div_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_in
|
||||
`ifdef XLEN_64
|
||||
assign div_in1[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]}: execute_if.data.rs1_data[i];
|
||||
assign div_in2[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]}: execute_if.data.rs2_data[i];
|
||||
|
@ -234,7 +234,7 @@ module VX_alu_muldiv #(
|
|||
wire [NUM_LANES-1:0][`XLEN-1:0] div_result_in;
|
||||
wire div_fire_in = div_valid_in && div_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_result_in
|
||||
reg [`XLEN-1:0] div_quotient, div_remainder;
|
||||
always @(*) begin
|
||||
dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder);
|
||||
|
@ -306,7 +306,7 @@ module VX_alu_muldiv #(
|
|||
|
||||
assign {div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out, is_div_w_out, div_pid_out, div_sop_out, div_eop_out} = div_tag_r;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_result_out
|
||||
`ifdef XLEN_64
|
||||
assign div_result_out[i] = is_rem_op_out ? (is_div_w_out ? `XLEN'($signed(div_remainder[i][31:0])) : div_remainder[i]) :
|
||||
(is_div_w_out ? `XLEN'($signed(div_quotient[i][31:0])) : div_quotient[i]);
|
||||
|
@ -324,8 +324,8 @@ module VX_alu_muldiv #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (1)
|
||||
.ARBITER ("P"),
|
||||
.OUT_BUF (2)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -30,20 +30,24 @@ module VX_alu_unit #(
|
|||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = `NUM_ALU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_ALU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
|
||||
localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS);
|
||||
localparam PE_COUNT = 1 + `EXT_M_ENABLED;
|
||||
localparam PE_SEL_BITS = `CLOG2(PE_COUNT);
|
||||
localparam PE_IDX_INT = 0;
|
||||
localparam PE_IDX_MDV = PE_IDX_INT + `EXT_M_ENABLED;
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_execute_if[BLOCK_SIZE]();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 0)
|
||||
.OUT_BUF (PARTIAL_BW ? 3 : 0)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -51,26 +55,38 @@ module VX_alu_unit #(
|
|||
.execute_if (per_block_execute_if)
|
||||
);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
|
||||
`RESET_RELAY_EN (block_reset, reset,(BLOCK_SIZE > 1));
|
||||
|
||||
wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV);
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_alus
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_execute_if();
|
||||
) pe_execute_if[PE_COUNT]();
|
||||
|
||||
VX_commit_if #(
|
||||
VX_commit_if#(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_commit_if();
|
||||
) pe_commit_if[PE_COUNT]();
|
||||
|
||||
assign int_execute_if.valid = per_block_execute_if[block_idx].valid && ~is_muldiv_op;
|
||||
assign int_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
reg [`UP(PE_SEL_BITS)-1:0] pe_select;
|
||||
always @(*) begin
|
||||
pe_select = PE_IDX_INT;
|
||||
if (`EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV))
|
||||
pe_select = PE_IDX_MDV;
|
||||
end
|
||||
|
||||
VX_pe_switch #(
|
||||
.PE_COUNT (PE_COUNT),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.ARBITER ("R"),
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) pe_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.pe_sel (pe_select),
|
||||
.execute_in_if (per_block_execute_if[block_idx]),
|
||||
.commit_out_if (per_block_commit_if[block_idx]),
|
||||
.execute_out_if (pe_execute_if),
|
||||
.commit_in_if (pe_commit_if)
|
||||
);
|
||||
|
||||
VX_alu_int #(
|
||||
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
|
||||
|
@ -78,76 +94,23 @@ module VX_alu_unit #(
|
|||
.NUM_LANES (NUM_LANES)
|
||||
) alu_int (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.execute_if (int_execute_if),
|
||||
.reset (reset),
|
||||
.execute_if (pe_execute_if[PE_IDX_INT]),
|
||||
.branch_ctl_if (branch_ctl_if[block_idx]),
|
||||
.commit_if (int_commit_if)
|
||||
.commit_if (pe_commit_if[PE_IDX_INT])
|
||||
);
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) muldiv_execute_if();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) muldiv_commit_if();
|
||||
|
||||
assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
|
||||
assign muldiv_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
|
||||
VX_alu_muldiv #(
|
||||
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) muldiv_unit (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.execute_if (muldiv_execute_if),
|
||||
.commit_if (muldiv_commit_if)
|
||||
.reset (reset),
|
||||
.execute_if (pe_execute_if[PE_IDX_MDV]),
|
||||
.commit_if (pe_commit_if[PE_IDX_MDV])
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
assign per_block_execute_if[block_idx].ready =
|
||||
`ifdef EXT_M_ENABLE
|
||||
is_muldiv_op ? muldiv_execute_if.ready :
|
||||
`endif
|
||||
int_execute_if.ready;
|
||||
|
||||
// send response
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 3),
|
||||
.ARBITER ("F")
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.valid_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
muldiv_commit_if.valid,
|
||||
`endif
|
||||
int_commit_if.valid
|
||||
}),
|
||||
.ready_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
muldiv_commit_if.ready,
|
||||
`endif
|
||||
int_commit_if.ready
|
||||
}),
|
||||
.data_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
muldiv_commit_if.data,
|
||||
`endif
|
||||
int_commit_if.data
|
||||
}),
|
||||
.data_out (per_block_commit_if[block_idx].data),
|
||||
.valid_out (per_block_commit_if[block_idx].valid),
|
||||
.ready_out (per_block_commit_if[block_idx].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
end
|
||||
|
||||
VX_gather_unit #(
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
module VX_commit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -41,28 +41,26 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] per_issue_commit_tmask;
|
||||
wire [`ISSUE_WIDTH-1:0] per_issue_commit_eop;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_commit_arbs
|
||||
|
||||
wire [`NUM_EX_UNITS-1:0] valid_in;
|
||||
wire [`NUM_EX_UNITS-1:0][DATAW-1:0] data_in;
|
||||
wire [`NUM_EX_UNITS-1:0] ready_in;
|
||||
|
||||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
|
||||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin : g_data_in
|
||||
assign valid_in[j] = commit_if[j * `ISSUE_WIDTH + i].valid;
|
||||
assign data_in[j] = commit_if[j * `ISSUE_WIDTH + i].data;
|
||||
assign commit_if[j * `ISSUE_WIDTH + i].ready = ready_in[j];
|
||||
end
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (`NUM_EX_UNITS),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER ("R"),
|
||||
.ARBITER ("P"),
|
||||
.OUT_BUF (1)
|
||||
) commit_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.data_in (data_in),
|
||||
|
@ -86,7 +84,7 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
|
||||
assign commit_fire_any = (| per_issue_commit_fire);
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_commit_size
|
||||
wire [COMMIT_SIZEW-1:0] count;
|
||||
`POP_COUNT(count, per_issue_commit_tmask[i]);
|
||||
assign commit_size[i] = count;
|
||||
|
@ -162,7 +160,7 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
|
||||
// Writeback
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_writeback
|
||||
assign writeback_if[i].valid = commit_arb_if[i].valid && commit_arb_if[i].data.wb;
|
||||
assign writeback_if[i].data.uuid = commit_arb_if[i].data.uuid;
|
||||
assign writeback_if[i].data.wis = wid_to_wis(commit_arb_if[i].data.wid);
|
||||
|
@ -176,15 +174,15 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
end
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_trace
|
||||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin : g_j
|
||||
always @(posedge clk) begin
|
||||
if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin
|
||||
`TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}));
|
||||
`TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}))
|
||||
trace_ex_type(1, j);
|
||||
`TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", commit_if[j * `ISSUE_WIDTH + i].data.uuid));
|
||||
`TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS)
|
||||
`TRACE(1, (" (#%0d)\n", commit_if[j * `ISSUE_WIDTH + i].data.uuid))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -75,31 +75,23 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
assign mem_perf_tmp_if.mem = mem_perf_if.mem;
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (dcr_data_reset, reset);
|
||||
`RESET_RELAY (schedule_reset, reset);
|
||||
`RESET_RELAY (fetch_reset, reset);
|
||||
`RESET_RELAY (decode_reset, reset);
|
||||
`RESET_RELAY (issue_reset, reset);
|
||||
`RESET_RELAY (execute_reset, reset);
|
||||
`RESET_RELAY (commit_reset, reset);
|
||||
|
||||
base_dcrs_t base_dcrs;
|
||||
|
||||
VX_dcr_data dcr_data (
|
||||
.clk (clk),
|
||||
.reset (dcr_data_reset),
|
||||
.reset (reset),
|
||||
.dcr_bus_if (dcr_bus_if),
|
||||
.base_dcrs (base_dcrs)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (3)
|
||||
`SCOPE_IO_SWITCH (3);
|
||||
|
||||
VX_schedule #(
|
||||
.INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID)
|
||||
) schedule (
|
||||
.clk (clk),
|
||||
.reset (schedule_reset),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.sched_perf (pipeline_perf_if.sched),
|
||||
|
@ -127,7 +119,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
) fetch (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (fetch_reset),
|
||||
.reset (reset),
|
||||
.icache_bus_if (icache_bus_if),
|
||||
.schedule_if (schedule_if),
|
||||
.fetch_if (fetch_if)
|
||||
|
@ -137,7 +129,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
.INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID))
|
||||
) decode (
|
||||
.clk (clk),
|
||||
.reset (decode_reset),
|
||||
.reset (reset),
|
||||
.fetch_if (fetch_if),
|
||||
.decode_if (decode_if),
|
||||
.decode_sched_if(decode_sched_if)
|
||||
|
@ -149,7 +141,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
`SCOPE_IO_BIND (1)
|
||||
|
||||
.clk (clk),
|
||||
.reset (issue_reset),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.issue_perf (pipeline_perf_if.issue),
|
||||
|
@ -167,7 +159,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
`SCOPE_IO_BIND (2)
|
||||
|
||||
.clk (clk),
|
||||
.reset (execute_reset),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_tmp_if),
|
||||
|
@ -192,7 +184,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
.INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID))
|
||||
) commit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.reset (reset),
|
||||
|
||||
.commit_if (commit_if),
|
||||
|
||||
|
@ -202,134 +194,18 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
.commit_sched_if(commit_sched_if)
|
||||
);
|
||||
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lsu_dcache_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
`ifdef LMEM_ENABLE
|
||||
|
||||
`RESET_RELAY (lmem_unit_reset, reset);
|
||||
|
||||
VX_lmem_unit #(
|
||||
VX_mem_unit #(
|
||||
.INSTANCE_ID (INSTANCE_ID)
|
||||
) lmem_unit (
|
||||
.clk (clk),
|
||||
.reset (lmem_unit_reset),
|
||||
) mem_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (mem_perf_tmp_if.lmem),
|
||||
.lmem_perf (mem_perf_tmp_if.lmem),
|
||||
`endif
|
||||
.lsu_mem_in_if (lsu_mem_if),
|
||||
.lsu_mem_out_if (lsu_dcache_if)
|
||||
.lsu_mem_if (lsu_mem_if),
|
||||
.dcache_bus_if (dcache_bus_if)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
`ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]);
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (DCACHE_CHANNELS),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_coalesced_if();
|
||||
|
||||
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin
|
||||
|
||||
`RESET_RELAY (mem_coalescer_reset, reset);
|
||||
|
||||
VX_mem_coalescer #(
|
||||
.INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)),
|
||||
.NUM_REQS (`NUM_LSU_LANES),
|
||||
.DATA_IN_SIZE (LSU_WORD_SIZE),
|
||||
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),
|
||||
.ADDR_WIDTH (LSU_ADDR_WIDTH),
|
||||
.ATYPE_WIDTH (`ADDR_TYPE_WIDTH),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.QUEUE_SIZE (`LSUQ_OUT_SIZE)
|
||||
) mem_coalescer (
|
||||
.clk (clk),
|
||||
.reset (mem_coalescer_reset),
|
||||
|
||||
// Input request
|
||||
.in_req_valid (lsu_dcache_if[i].req_valid),
|
||||
.in_req_mask (lsu_dcache_if[i].req_data.mask),
|
||||
.in_req_rw (lsu_dcache_if[i].req_data.rw),
|
||||
.in_req_byteen (lsu_dcache_if[i].req_data.byteen),
|
||||
.in_req_addr (lsu_dcache_if[i].req_data.addr),
|
||||
.in_req_atype (lsu_dcache_if[i].req_data.atype),
|
||||
.in_req_data (lsu_dcache_if[i].req_data.data),
|
||||
.in_req_tag (lsu_dcache_if[i].req_data.tag),
|
||||
.in_req_ready (lsu_dcache_if[i].req_ready),
|
||||
|
||||
// Input response
|
||||
.in_rsp_valid (lsu_dcache_if[i].rsp_valid),
|
||||
.in_rsp_mask (lsu_dcache_if[i].rsp_data.mask),
|
||||
.in_rsp_data (lsu_dcache_if[i].rsp_data.data),
|
||||
.in_rsp_tag (lsu_dcache_if[i].rsp_data.tag),
|
||||
.in_rsp_ready (lsu_dcache_if[i].rsp_ready),
|
||||
|
||||
// Output request
|
||||
.out_req_valid (dcache_coalesced_if.req_valid),
|
||||
.out_req_mask (dcache_coalesced_if.req_data.mask),
|
||||
.out_req_rw (dcache_coalesced_if.req_data.rw),
|
||||
.out_req_byteen (dcache_coalesced_if.req_data.byteen),
|
||||
.out_req_addr (dcache_coalesced_if.req_data.addr),
|
||||
.out_req_atype (dcache_coalesced_if.req_data.atype),
|
||||
.out_req_data (dcache_coalesced_if.req_data.data),
|
||||
.out_req_tag (dcache_coalesced_if.req_data.tag),
|
||||
.out_req_ready (dcache_coalesced_if.req_ready),
|
||||
|
||||
// Output response
|
||||
.out_rsp_valid (dcache_coalesced_if.rsp_valid),
|
||||
.out_rsp_mask (dcache_coalesced_if.rsp_data.mask),
|
||||
.out_rsp_data (dcache_coalesced_if.rsp_data.data),
|
||||
.out_rsp_tag (dcache_coalesced_if.rsp_data.tag),
|
||||
.out_rsp_ready (dcache_coalesced_if.rsp_ready)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]);
|
||||
|
||||
end
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_bus_tmp_if[DCACHE_CHANNELS]();
|
||||
|
||||
`RESET_RELAY (lsu_adapter_reset, reset);
|
||||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (DCACHE_CHANNELS),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH),
|
||||
.ARBITER ("P"),
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (0)
|
||||
) lsu_adapter (
|
||||
.clk (clk),
|
||||
.reset (lsu_adapter_reset),
|
||||
.lsu_mem_if (dcache_coalesced_if),
|
||||
.mem_bus_if (dcache_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]);
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle;
|
||||
|
@ -353,8 +229,8 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
wire [LSU_NUM_REQS-1:0] perf_dcache_wr_req_fire, perf_dcache_wr_req_fire_r;
|
||||
wire [LSU_NUM_REQS-1:0] perf_dcache_rsp_fire;
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_perf_dcache
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin : g_j
|
||||
assign perf_dcache_rd_req_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].req_valid && lsu_mem_if[i].req_data.mask[j] && lsu_mem_if[i].req_ready && ~lsu_mem_if[i].req_data.rw;
|
||||
assign perf_dcache_wr_req_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].req_valid && lsu_mem_if[i].req_data.mask[j] && lsu_mem_if[i].req_ready && lsu_mem_if[i].req_data.rw;
|
||||
assign perf_dcache_rsp_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].rsp_valid && lsu_mem_if[i].rsp_data.mask[j] && lsu_mem_if[i].rsp_ready;
|
||||
|
|
|
@ -32,7 +32,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr,
|
||||
output wire [DCACHE_NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] dcache_req_atype,
|
||||
output wire [DCACHE_NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] dcache_req_flags,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] dcache_req_tag,
|
||||
input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready,
|
||||
|
@ -96,7 +96,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw;
|
||||
assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen;
|
||||
assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr;
|
||||
assign dcache_req_atype[i] = dcache_bus_if[i].req_data.atype;
|
||||
assign dcache_req_flags[i] = dcache_bus_if[i].req_data.flags;
|
||||
assign dcache_req_data[i] = dcache_bus_if[i].req_data.data;
|
||||
assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag;
|
||||
assign dcache_bus_if[i].req_ready = dcache_req_ready[i];
|
||||
|
@ -119,7 +119,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
assign icache_req_data = icache_bus_if.req_data.data;
|
||||
assign icache_req_tag = icache_bus_if.req_data.tag;
|
||||
assign icache_bus_if.req_ready = icache_req_ready;
|
||||
`UNUSED_VAR (icache_bus_if.req_data.atype)
|
||||
`UNUSED_VAR (icache_bus_if.req_data.flags)
|
||||
|
||||
assign icache_bus_if.rsp_valid = icache_rsp_valid;
|
||||
assign icache_bus_if.rsp_data.tag = icache_rsp_tag;
|
||||
|
|
|
@ -83,7 +83,7 @@ import VX_fpu_pkg::*;
|
|||
wire [`NUM_FPU_BLOCKS-1:0][`NW_WIDTH-1:0] fpu_write_wid;
|
||||
fflags_t [`NUM_FPU_BLOCKS-1:0] fpu_write_fflags;
|
||||
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin : g_fpu_write
|
||||
assign fpu_write_enable[i] = fpu_csr_if[i].write_enable;
|
||||
assign fpu_write_wid[i] = fpu_csr_if[i].write_wid;
|
||||
assign fpu_write_fflags[i] = fpu_csr_if[i].write_fflags;
|
||||
|
@ -107,7 +107,7 @@ import VX_fpu_pkg::*;
|
|||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin : g_fpu_csr_read_frm
|
||||
assign fpu_csr_if[i].read_frm = fcsr[fpu_csr_if[i].read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS];
|
||||
end
|
||||
|
||||
|
@ -155,41 +155,41 @@ import VX_fpu_pkg::*;
|
|||
|
||||
// CSRs read //////////////////////////////////////////////////////////////
|
||||
|
||||
reg [`XLEN-1:0] read_data_ro_r;
|
||||
reg [`XLEN-1:0] read_data_rw_r;
|
||||
reg read_addr_valid_r;
|
||||
reg [`XLEN-1:0] read_data_ro_w;
|
||||
reg [`XLEN-1:0] read_data_rw_w;
|
||||
reg read_addr_valid_w;
|
||||
|
||||
always @(*) begin
|
||||
read_data_ro_r = '0;
|
||||
read_data_rw_r = '0;
|
||||
read_addr_valid_r = 1;
|
||||
read_data_ro_w = '0;
|
||||
read_data_rw_w = '0;
|
||||
read_addr_valid_w = 1;
|
||||
case (read_addr)
|
||||
`VX_CSR_MVENDORID : read_data_ro_r = `XLEN'(`VENDOR_ID);
|
||||
`VX_CSR_MARCHID : read_data_ro_r = `XLEN'(`ARCHITECTURE_ID);
|
||||
`VX_CSR_MIMPID : read_data_ro_r = `XLEN'(`IMPLEMENTATION_ID);
|
||||
`VX_CSR_MISA : read_data_ro_r = `XLEN'({2'(`CLOG2(`XLEN/16)), 30'(`MISA_STD)});
|
||||
`VX_CSR_MVENDORID : read_data_ro_w = `XLEN'(`VENDOR_ID);
|
||||
`VX_CSR_MARCHID : read_data_ro_w = `XLEN'(`ARCHITECTURE_ID);
|
||||
`VX_CSR_MIMPID : read_data_ro_w = `XLEN'(`IMPLEMENTATION_ID);
|
||||
`VX_CSR_MISA : read_data_ro_w = `XLEN'({2'(`CLOG2(`XLEN/16)), 30'(`MISA_STD)});
|
||||
`ifdef EXT_F_ENABLE
|
||||
`VX_CSR_FFLAGS : read_data_rw_r = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]);
|
||||
`VX_CSR_FRM : read_data_rw_r = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]);
|
||||
`VX_CSR_FCSR : read_data_rw_r = `XLEN'(fcsr[read_wid]);
|
||||
`VX_CSR_FFLAGS : read_data_rw_w = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]);
|
||||
`VX_CSR_FRM : read_data_rw_w = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]);
|
||||
`VX_CSR_FCSR : read_data_rw_w = `XLEN'(fcsr[read_wid]);
|
||||
`endif
|
||||
`VX_CSR_MSCRATCH : read_data_rw_r = mscratch;
|
||||
`VX_CSR_MSCRATCH : read_data_rw_w = mscratch;
|
||||
|
||||
`VX_CSR_WARP_ID : read_data_ro_r = `XLEN'(read_wid);
|
||||
`VX_CSR_CORE_ID : read_data_ro_r = `XLEN'(CORE_ID);
|
||||
`VX_CSR_ACTIVE_THREADS: read_data_ro_r = `XLEN'(thread_masks[read_wid]);
|
||||
`VX_CSR_ACTIVE_WARPS: read_data_ro_r = `XLEN'(active_warps);
|
||||
`VX_CSR_NUM_THREADS: read_data_ro_r = `XLEN'(`NUM_THREADS);
|
||||
`VX_CSR_NUM_WARPS : read_data_ro_r = `XLEN'(`NUM_WARPS);
|
||||
`VX_CSR_NUM_CORES : read_data_ro_r = `XLEN'(`NUM_CORES * `NUM_CLUSTERS);
|
||||
`VX_CSR_LOCAL_MEM_BASE: read_data_ro_r = `XLEN'(`LMEM_BASE_ADDR);
|
||||
`VX_CSR_WARP_ID : read_data_ro_w = `XLEN'(read_wid);
|
||||
`VX_CSR_CORE_ID : read_data_ro_w = `XLEN'(CORE_ID);
|
||||
`VX_CSR_ACTIVE_THREADS: read_data_ro_w = `XLEN'(thread_masks[read_wid]);
|
||||
`VX_CSR_ACTIVE_WARPS: read_data_ro_w = `XLEN'(active_warps);
|
||||
`VX_CSR_NUM_THREADS: read_data_ro_w = `XLEN'(`NUM_THREADS);
|
||||
`VX_CSR_NUM_WARPS : read_data_ro_w = `XLEN'(`NUM_WARPS);
|
||||
`VX_CSR_NUM_CORES : read_data_ro_w = `XLEN'(`NUM_CORES * `NUM_CLUSTERS);
|
||||
`VX_CSR_LOCAL_MEM_BASE: read_data_ro_w = `XLEN'(`LMEM_BASE_ADDR);
|
||||
|
||||
`CSR_READ_64(`VX_CSR_MCYCLE, read_data_ro_r, cycles);
|
||||
`CSR_READ_64(`VX_CSR_MCYCLE, read_data_ro_w, cycles);
|
||||
|
||||
`VX_CSR_MPM_RESERVED : read_data_ro_r = 'x;
|
||||
`VX_CSR_MPM_RESERVED_H : read_data_ro_r = 'x;
|
||||
`VX_CSR_MPM_RESERVED : read_data_ro_w = 'x;
|
||||
`VX_CSR_MPM_RESERVED_H : read_data_ro_w = 'x;
|
||||
|
||||
`CSR_READ_64(`VX_CSR_MINSTRET, read_data_ro_r, commit_csr_if.instret);
|
||||
`CSR_READ_64(`VX_CSR_MINSTRET, read_data_ro_w, commit_csr_if.instret);
|
||||
|
||||
`VX_CSR_SATP,
|
||||
`VX_CSR_MSTATUS,
|
||||
|
@ -200,77 +200,77 @@ import VX_fpu_pkg::*;
|
|||
`VX_CSR_MTVEC,
|
||||
`VX_CSR_MEPC,
|
||||
`VX_CSR_PMPCFG0,
|
||||
`VX_CSR_PMPADDR0 : read_data_ro_r = `XLEN'(0);
|
||||
`VX_CSR_PMPADDR0 : read_data_ro_w = `XLEN'(0);
|
||||
|
||||
default: begin
|
||||
read_addr_valid_r = 0;
|
||||
read_addr_valid_w = 0;
|
||||
if ((read_addr >= `VX_CSR_MPM_USER && read_addr < (`VX_CSR_MPM_USER + 32))
|
||||
|| (read_addr >= `VX_CSR_MPM_USER_H && read_addr < (`VX_CSR_MPM_USER_H + 32))) begin
|
||||
read_addr_valid_r = 1;
|
||||
read_addr_valid_w = 1;
|
||||
`ifdef PERF_ENABLE
|
||||
case (base_dcrs.mpm_class)
|
||||
`VX_DCR_MPM_CLASS_CORE: begin
|
||||
case (read_addr)
|
||||
// PERF: pipeline
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched.idles);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched.stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.issue.ibf_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.issue.scb_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_r, pipeline_perf_if.issue.opd_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_ALU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_w, pipeline_perf_if.sched.idles);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_w, pipeline_perf_if.sched.stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_w, pipeline_perf_if.issue.ibf_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_w, pipeline_perf_if.issue.scb_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_w, pipeline_perf_if.issue.opd_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_ALU]);
|
||||
`ifdef EXT_F_ENABLE
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_FPU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_FPU]);
|
||||
`else
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, `PERF_CTR_BITS'(0));
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_w, `PERF_CTR_BITS'(0));
|
||||
`endif
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_LSU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_SFU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_LSU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_SFU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_w, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_w, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]);
|
||||
// PERF: memory
|
||||
`CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_r, pipeline_perf_if.ifetches);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_r, pipeline_perf_if.loads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_STORES, read_data_ro_r, pipeline_perf_if.stores);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IFETCH_LT, read_data_ro_r, pipeline_perf_if.ifetch_latency);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LOAD_LT, read_data_ro_r, pipeline_perf_if.load_latency);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_w, pipeline_perf_if.ifetches);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_w, pipeline_perf_if.loads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_STORES, read_data_ro_w, pipeline_perf_if.stores);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IFETCH_LT, read_data_ro_w, pipeline_perf_if.ifetch_latency);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LOAD_LT, read_data_ro_w, pipeline_perf_if.load_latency);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`VX_DCR_MPM_CLASS_MEM: begin
|
||||
case (read_addr)
|
||||
// PERF: icache
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_READS, read_data_ro_r, mem_perf_if.icache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_MISS_R, read_data_ro_r, mem_perf_if.icache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_MSHR_ST, read_data_ro_r, mem_perf_if.icache.mshr_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_READS, read_data_ro_w, mem_perf_if.icache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_MISS_R, read_data_ro_w, mem_perf_if.icache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_MSHR_ST, read_data_ro_w, mem_perf_if.icache.mshr_stalls);
|
||||
// PERF: dcache
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_READS, read_data_ro_r, mem_perf_if.dcache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_WRITES, read_data_ro_r, mem_perf_if.dcache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_R, read_data_ro_r, mem_perf_if.dcache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_W, read_data_ro_r, mem_perf_if.dcache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_BANK_ST, read_data_ro_r, mem_perf_if.dcache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MSHR_ST, read_data_ro_r, mem_perf_if.dcache.mshr_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_READS, read_data_ro_w, mem_perf_if.dcache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_WRITES, read_data_ro_w, mem_perf_if.dcache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_R, read_data_ro_w, mem_perf_if.dcache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_W, read_data_ro_w, mem_perf_if.dcache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_BANK_ST, read_data_ro_w, mem_perf_if.dcache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MSHR_ST, read_data_ro_w, mem_perf_if.dcache.mshr_stalls);
|
||||
// PERF: lmem
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_READS, read_data_ro_r, mem_perf_if.lmem.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_WRITES, read_data_ro_r, mem_perf_if.lmem.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_BANK_ST, read_data_ro_r, mem_perf_if.lmem.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_READS, read_data_ro_w, mem_perf_if.lmem.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_WRITES, read_data_ro_w, mem_perf_if.lmem.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_BANK_ST, read_data_ro_w, mem_perf_if.lmem.bank_stalls);
|
||||
// PERF: l2cache
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_READS, read_data_ro_r, mem_perf_if.l2cache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_WRITES, read_data_ro_r, mem_perf_if.l2cache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_R, read_data_ro_r, mem_perf_if.l2cache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_W, read_data_ro_r, mem_perf_if.l2cache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_BANK_ST, read_data_ro_r, mem_perf_if.l2cache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l2cache.mshr_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_READS, read_data_ro_w, mem_perf_if.l2cache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_WRITES, read_data_ro_w, mem_perf_if.l2cache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_R, read_data_ro_w, mem_perf_if.l2cache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_W, read_data_ro_w, mem_perf_if.l2cache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_BANK_ST, read_data_ro_w, mem_perf_if.l2cache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MSHR_ST, read_data_ro_w, mem_perf_if.l2cache.mshr_stalls);
|
||||
// PERF: l3cache
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_READS, read_data_ro_r, mem_perf_if.l3cache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_WRITES, read_data_ro_r, mem_perf_if.l3cache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_R, read_data_ro_r, mem_perf_if.l3cache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_W, read_data_ro_r, mem_perf_if.l3cache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_BANK_ST, read_data_ro_r, mem_perf_if.l3cache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l3cache.mshr_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_READS, read_data_ro_w, mem_perf_if.l3cache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_WRITES, read_data_ro_w, mem_perf_if.l3cache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_R, read_data_ro_w, mem_perf_if.l3cache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_W, read_data_ro_w, mem_perf_if.l3cache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_BANK_ST, read_data_ro_w, mem_perf_if.l3cache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MSHR_ST, read_data_ro_w, mem_perf_if.l3cache.mshr_stalls);
|
||||
// PERF: memory
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_READS, read_data_ro_r, mem_perf_if.mem.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_r, mem_perf_if.mem.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_r, mem_perf_if.mem.latency);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_READS, read_data_ro_w, mem_perf_if.mem.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_w, mem_perf_if.mem.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_w, mem_perf_if.mem.latency);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
@ -282,12 +282,12 @@ import VX_fpu_pkg::*;
|
|||
endcase
|
||||
end
|
||||
|
||||
assign read_data_ro = read_data_ro_r;
|
||||
assign read_data_rw = read_data_rw_r;
|
||||
assign read_data_ro = read_data_ro_w;
|
||||
assign read_data_rw = read_data_rw_w;
|
||||
|
||||
`UNUSED_VAR (base_dcrs)
|
||||
|
||||
`RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid))
|
||||
`RUNTIME_ASSERT(~read_enable || read_addr_valid_w, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
`UNUSED_VAR (mem_perf_if.icache);
|
||||
|
|
|
@ -66,7 +66,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] rs1_data;
|
||||
`UNUSED_VAR (rs1_data)
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_rs1_data
|
||||
assign rs1_data[i] = execute_if.data.rs1_data[i];
|
||||
end
|
||||
|
||||
|
@ -113,12 +113,15 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] wtid, gtid;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
if (PID_BITS != 0) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_wtid
|
||||
if (PID_BITS != 0) begin : g_pid
|
||||
assign wtid[i] = `XLEN'(execute_if.data.pid * NUM_LANES + i);
|
||||
end else begin
|
||||
end else begin : g_no_pid
|
||||
assign wtid[i] = `XLEN'(i);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_gtid
|
||||
assign gtid[i] = (`XLEN'(CORE_ID) << (`NW_BITS + `NT_BITS)) + (`XLEN'(execute_if.data.wid) << `NT_BITS) + wtid[i];
|
||||
end
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; (
|
||||
module VX_dcr_data import VX_gpu_pkg::*; (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -50,9 +50,9 @@ module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; (
|
|||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (dcr_bus_if.write_valid) begin
|
||||
`TRACE(1, ("%d: base-dcr: state=", $time));
|
||||
`TRACE(1, ("%t: base-dcr: state=", $time))
|
||||
trace_base_dcr(1, dcr_bus_if.write_addr);
|
||||
`TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data));
|
||||
`TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -15,19 +15,19 @@
|
|||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define USED_IREG(x) \
|
||||
x``_r = {1'b0, ``x}; \
|
||||
x``_v = {1'b0, ``x}; \
|
||||
use_``x = 1
|
||||
|
||||
`define USED_FREG(x) \
|
||||
x``_r = {1'b1, ``x}; \
|
||||
x``_v = {1'b1, ``x}; \
|
||||
use_``x = 1
|
||||
`else
|
||||
`define USED_IREG(x) \
|
||||
x``_r = ``x; \
|
||||
x``_v = ``x; \
|
||||
use_``x = 1
|
||||
`endif
|
||||
|
||||
module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
module VX_decode import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -50,7 +50,7 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`INST_OP_BITS-1:0] op_type;
|
||||
op_args_t op_args;
|
||||
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [`NR_BITS-1:0] rd_v, rs1_v, rs2_v, rs3_v;
|
||||
reg use_rd, use_rs1, use_rs2, use_rs3;
|
||||
reg is_wstall;
|
||||
|
||||
|
@ -152,13 +152,13 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
|
||||
always @(*) begin
|
||||
|
||||
ex_type = '0;
|
||||
ex_type = 'x;
|
||||
op_type = 'x;
|
||||
op_args = 'x;
|
||||
rd_r = '0;
|
||||
rs1_r = '0;
|
||||
rs2_r = '0;
|
||||
rs3_r = '0;
|
||||
rd_v = '0;
|
||||
rs1_v = '0;
|
||||
rs2_v = '0;
|
||||
rs3_v = '0;
|
||||
use_rd = 0;
|
||||
use_rs1 = 0;
|
||||
use_rs2 = 0;
|
||||
|
@ -376,14 +376,16 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
`USED_IREG (rs2);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FMADD,
|
||||
`INST_FMSUB,
|
||||
`INST_FNMSUB,
|
||||
`INST_FNMADD: begin
|
||||
`INST_FMADD, // 7'b1000011
|
||||
`INST_FMSUB, // 7'b1000111
|
||||
`INST_FNMSUB, // 7'b1001011
|
||||
`INST_FNMADD: // 7'b1001111
|
||||
begin
|
||||
ex_type = `EX_FPU;
|
||||
op_type = `INST_OP_BITS'({2'b11, opcode[3:2]});
|
||||
op_type = `INST_OP_BITS'({2'b00, 1'b1, opcode[3]});
|
||||
op_args.fpu.frm = func3;
|
||||
op_args.fpu.fmt[0] = func2[0]; // float / double
|
||||
op_args.fpu.fmt[1] = opcode[3] ^ opcode[2]; // SUB
|
||||
use_rd = 1;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
|
@ -399,9 +401,10 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
case (func5)
|
||||
5'b00000, // FADD
|
||||
5'b00001, // FSUB
|
||||
5'b00010, // FMUL
|
||||
5'b00011: begin // FDIV
|
||||
op_type = `INST_OP_BITS'(func5[1:0]);
|
||||
5'b00010: // FMUL
|
||||
begin
|
||||
op_type = `INST_OP_BITS'({2'b00, 1'b0, func5[1]});
|
||||
op_args.fpu.fmt[1] = func5[0]; // SUB
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
|
@ -430,6 +433,13 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
`USED_FREG (rs1);
|
||||
end
|
||||
`endif
|
||||
5'b00011: begin
|
||||
// FDIV
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_DIV);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
5'b01011: begin
|
||||
// FSQRT
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
|
||||
|
@ -527,7 +537,7 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
end
|
||||
|
||||
// disable write to integer register r0
|
||||
wire wb = use_rd && (rd_r != 0);
|
||||
wire wb = use_rd && (rd_v != 0);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
|
@ -537,7 +547,7 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
.reset (reset),
|
||||
.valid_in (fetch_if.valid),
|
||||
.ready_in (fetch_if.ready),
|
||||
.data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_args, wb, rd_r, rs1_r, rs2_r, rs3_r}),
|
||||
.data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_args, wb, rd_v, rs1_v, rs2_v, rs3_v}),
|
||||
.data_out ({decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3}),
|
||||
.valid_out (decode_if.valid),
|
||||
.ready_out (decode_if.ready)
|
||||
|
@ -547,9 +557,10 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
|
||||
wire fetch_fire = fetch_if.valid && fetch_if.ready;
|
||||
|
||||
assign decode_sched_if.valid = fetch_fire;
|
||||
assign decode_sched_if.wid = fetch_if.data.wid;
|
||||
assign decode_sched_if.is_wstall = is_wstall;
|
||||
assign decode_sched_if.valid = fetch_fire;
|
||||
assign decode_sched_if.wid = fetch_if.data.wid;
|
||||
assign decode_sched_if.unlock = ~is_wstall;
|
||||
|
||||
`ifndef L1_ENABLE
|
||||
assign fetch_if.ibuf_pop = decode_if.ibuf_pop;
|
||||
`endif
|
||||
|
@ -557,14 +568,14 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
`TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr));
|
||||
`TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr))
|
||||
trace_ex_type(1, decode_if.data.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
`TRACE(1, (", op="))
|
||||
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, opds=%b%b%b%b",
|
||||
decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3));
|
||||
decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3))
|
||||
trace_op_args(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", decode_if.data.uuid));
|
||||
`TRACE(1, (" (#%0d)\n", decode_if.data.uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -33,7 +33,7 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`NT_WIDTH-1:0] tids;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin : g_tids
|
||||
assign tids[i] = `NT_WIDTH'(i);
|
||||
end
|
||||
|
||||
|
@ -50,23 +50,19 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire [`NUM_EX_UNITS-1:0] operands_reset;
|
||||
assign operands_if.ready = operands_reset[operands_if.data.ex_type];
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
||||
|
||||
`RESET_RELAY (buffer_reset, reset);
|
||||
wire [`NUM_EX_UNITS-1:0] operands_ready_in;
|
||||
assign operands_if.ready = operands_ready_in[operands_if.data.ex_type];
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_buffers
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2), // 2-cycle EB for area reduction
|
||||
.LUTRAM (1)
|
||||
.OUT_REG (1)
|
||||
) buffer (
|
||||
.clk (clk),
|
||||
.reset (buffer_reset),
|
||||
.reset (reset),
|
||||
.valid_in (operands_if.valid && (operands_if.data.ex_type == `EX_BITS'(i))),
|
||||
.ready_in (operands_reset[i]),
|
||||
.ready_in (operands_ready_in[i]),
|
||||
.data_in ({
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.wis,
|
||||
|
@ -92,7 +88,7 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
|
||||
wire operands_if_stall = operands_if.valid && ~operands_if.ready;
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_perf_stalls
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_stalls_r[i] <= '0;
|
||||
|
|
|
@ -49,13 +49,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
wire [`ISSUE_WIDTH-1:0][IN_DATAW-1:0] dispatch_data;
|
||||
wire [`ISSUE_WIDTH-1:0] dispatch_ready;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_dispatch_data
|
||||
assign dispatch_valid[i] = dispatch_if[i].valid;
|
||||
assign dispatch_data[i] = dispatch_if[i].data;
|
||||
assign dispatch_if[i].ready = dispatch_ready[i];
|
||||
end
|
||||
|
||||
wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices;
|
||||
wire [BLOCK_SIZE-1:0] block_ready;
|
||||
wire [BLOCK_SIZE-1:0][NUM_LANES-1:0] block_tmask;
|
||||
wire [BLOCK_SIZE-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] block_regs;
|
||||
|
@ -66,30 +65,53 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
wire batch_done = (& block_done);
|
||||
|
||||
// batch select logic
|
||||
|
||||
logic [BATCH_COUNT_W-1:0] batch_idx;
|
||||
if (BATCH_COUNT != 1) begin
|
||||
|
||||
if (BATCH_COUNT != 1) begin : g_batch_idx
|
||||
wire [BATCH_COUNT_W-1:0] batch_idx_n;
|
||||
wire [BATCH_COUNT-1:0] valid_batches;
|
||||
for (genvar i = 0; i < BATCH_COUNT; ++i) begin : g_valid_batches
|
||||
assign valid_batches[i] = | dispatch_valid[i * BLOCK_SIZE +: BLOCK_SIZE];
|
||||
end
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (BATCH_COUNT),
|
||||
.TYPE ("P")
|
||||
) batch_sel (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_batches),
|
||||
.grant_index (batch_idx_n),
|
||||
`UNUSED_PIN (grant_onehot),
|
||||
`UNUSED_PIN (grant_valid),
|
||||
.grant_ready (batch_done)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
batch_idx <= '0;
|
||||
end else begin
|
||||
batch_idx <= batch_idx + BATCH_COUNT_W'(batch_done);
|
||||
end else if (batch_done) begin
|
||||
batch_idx <= batch_idx_n;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
end else begin : g_batch_idx_0
|
||||
assign batch_idx = 0;
|
||||
`UNUSED_VAR (batch_done)
|
||||
end
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices;
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_issue_indices
|
||||
assign issue_indices[block_idx] = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx);
|
||||
end
|
||||
|
||||
wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx);
|
||||
assign issue_indices[block_idx] = issue_idx;
|
||||
|
||||
`RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1));
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_blocks
|
||||
|
||||
wire [ISSUE_W-1:0] issue_idx = issue_indices[block_idx];
|
||||
wire valid_p, ready_p;
|
||||
|
||||
if (`NUM_THREADS != NUM_LANES) begin
|
||||
if (`NUM_THREADS > NUM_LANES) begin : g_partial_threads
|
||||
reg [NUM_PACKETS-1:0] sent_mask_p;
|
||||
wire [PID_WIDTH-1:0] start_p_n, start_p, end_p;
|
||||
wire dispatch_valid_r;
|
||||
|
@ -102,7 +124,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
wire fire_eop = fire_p && is_last_p;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (block_reset) begin
|
||||
if (reset) begin
|
||||
sent_mask_p <= '0;
|
||||
is_first_p <= 1;
|
||||
end else begin
|
||||
|
@ -124,8 +146,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data = dispatch_data[issue_idx][DATA_REGS_OFF + 1 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data = dispatch_data[issue_idx][DATA_REGS_OFF + 0 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin
|
||||
for (genvar j = 0; j < NUM_LANES; ++j) begin
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_per_packet_data
|
||||
for (genvar j = 0; j < NUM_LANES; ++j) begin : g_j
|
||||
localparam k = i * NUM_LANES + j;
|
||||
assign per_packet_tmask[i][j] = dispatch_tmask[k];
|
||||
assign per_packet_regs[i][0][j] = dispatch_rs1_data[k];
|
||||
|
@ -135,10 +157,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
wire [NUM_PACKETS-1:0] packet_valids;
|
||||
wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids;
|
||||
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_packet_valids
|
||||
assign packet_valids[i] = (| per_packet_tmask[i]);
|
||||
end
|
||||
|
||||
wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids;
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_packet_ids
|
||||
assign packet_ids[i] = PID_WIDTH'(i);
|
||||
end
|
||||
|
||||
|
@ -187,13 +211,13 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
assign block_pid[block_idx] = start_p;
|
||||
assign block_sop[block_idx] = is_first_p;
|
||||
assign block_eop[block_idx] = is_last_p;
|
||||
if (FANOUT_ENABLE) begin
|
||||
if (FANOUT_ENABLE) begin : g_block_ready_fanout
|
||||
assign block_ready[block_idx] = dispatch_valid_r && ready_p && block_enable;
|
||||
end else begin
|
||||
end else begin : g_block_ready
|
||||
assign block_ready[block_idx] = ready_p && block_enable;
|
||||
end
|
||||
assign block_done[block_idx] = ~dispatch_valid[issue_idx] || fire_eop;
|
||||
end else begin
|
||||
assign block_done[block_idx] = fire_eop || ~dispatch_valid[issue_idx];
|
||||
end else begin : g_full_threads
|
||||
assign valid_p = dispatch_valid[issue_idx];
|
||||
assign block_tmask[block_idx] = dispatch_data[issue_idx][DATA_TMASK_OFF +: `NUM_THREADS];
|
||||
assign block_regs[block_idx][0] = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
|
@ -203,29 +227,31 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
assign block_sop[block_idx] = 1'b1;
|
||||
assign block_eop[block_idx] = 1'b1;
|
||||
assign block_ready[block_idx] = ready_p;
|
||||
assign block_done[block_idx] = ~valid_p || ready_p;
|
||||
assign block_done[block_idx] = ready_p || ~valid_p;
|
||||
end
|
||||
|
||||
wire [ISSUE_ISW_W-1:0] isw;
|
||||
if (BATCH_COUNT != 1) begin
|
||||
if (BLOCK_SIZE != 1) begin
|
||||
if (BATCH_COUNT != 1) begin : g_isw_batch
|
||||
if (BLOCK_SIZE != 1) begin : g_block
|
||||
assign isw = {batch_idx, BLOCK_SIZE_W'(block_idx)};
|
||||
end else begin
|
||||
end else begin : g_no_block
|
||||
assign isw = batch_idx;
|
||||
end
|
||||
end else begin
|
||||
end else begin : g_isw
|
||||
assign isw = block_idx;
|
||||
end
|
||||
|
||||
wire [`NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_data[issue_idx][DATA_TMASK_OFF+`NUM_THREADS +: ISSUE_WIS_W], isw);
|
||||
|
||||
logic [OUT_DATAW-1:0] execute_data, execute_data_w;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (OUT_DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) buf_out (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.reset (reset),
|
||||
.valid_in (valid_p),
|
||||
.ready_in (ready_p),
|
||||
.data_in ({
|
||||
|
@ -239,17 +265,27 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
block_pid[block_idx],
|
||||
block_sop[block_idx],
|
||||
block_eop[block_idx]}),
|
||||
.data_out (execute_if[block_idx].data),
|
||||
.data_out (execute_data),
|
||||
.valid_out (execute_if[block_idx].valid),
|
||||
.ready_out (execute_if[block_idx].ready)
|
||||
);
|
||||
|
||||
if (`NUM_THREADS != NUM_LANES) begin : g_execute_data_w_partial
|
||||
assign execute_data_w = execute_data;
|
||||
end else begin : g_execute_data_w_full
|
||||
always @(*) begin
|
||||
execute_data_w = execute_data;
|
||||
execute_data_w[2:0] = {1'b0, 1'b1, 1'b1}; // default pid, sop, and eop
|
||||
end
|
||||
end
|
||||
assign execute_if[block_idx].data = execute_data_w;
|
||||
end
|
||||
|
||||
reg [`ISSUE_WIDTH-1:0] ready_in;
|
||||
always @(*) begin
|
||||
ready_in = 0;
|
||||
for (integer i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
ready_in[issue_indices[i]] = block_ready[i] && block_eop[i];
|
||||
for (integer block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
ready_in[issue_indices[block_idx]] = block_ready[block_idx] && block_eop[block_idx];
|
||||
end
|
||||
end
|
||||
assign dispatch_ready = ready_in;
|
||||
|
|
|
@ -51,41 +51,35 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
VX_fpu_csr_if fpu_csr_if[`NUM_FPU_BLOCKS]();
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (alu_reset, reset);
|
||||
`RESET_RELAY (lsu_reset, reset);
|
||||
`RESET_RELAY (sfu_reset, reset);
|
||||
|
||||
VX_alu_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID))
|
||||
) alu_unit (
|
||||
.clk (clk),
|
||||
.reset (alu_reset),
|
||||
.reset (reset),
|
||||
.dispatch_if (dispatch_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.branch_ctl_if (branch_ctl_if)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (1)
|
||||
`SCOPE_IO_SWITCH (1);
|
||||
|
||||
VX_lsu_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID))
|
||||
) lsu_unit (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (lsu_reset),
|
||||
.reset (reset),
|
||||
.dispatch_if (dispatch_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.lsu_mem_if (lsu_mem_if)
|
||||
);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`RESET_RELAY (fpu_reset, reset);
|
||||
|
||||
VX_fpu_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID))
|
||||
) fpu_unit (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.reset (reset),
|
||||
.dispatch_if (dispatch_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.fpu_csr_if (fpu_csr_if)
|
||||
|
@ -97,7 +91,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
.CORE_ID (CORE_ID)
|
||||
) sfu_unit (
|
||||
.clk (clk),
|
||||
.reset (sfu_reset),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
.pipeline_perf_if (pipeline_perf_if),
|
||||
|
|
|
@ -71,7 +71,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
// This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache requests.
|
||||
// This issue is particularly prevalent when the icache and dcache are disabled and both requests share the same bus.
|
||||
wire [`NUM_WARPS-1:0] pending_ibuf_full;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_pending_reads
|
||||
VX_pending_size #(
|
||||
.SIZE (`IBUF_SIZE)
|
||||
) pending_reads (
|
||||
|
@ -116,9 +116,9 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
.ready_out (icache_bus_if.req_ready)
|
||||
);
|
||||
|
||||
assign icache_bus_if.req_data.atype = '0;
|
||||
assign icache_bus_if.req_data.flags = '0;
|
||||
assign icache_bus_if.req_data.rw = 0;
|
||||
assign icache_bus_if.req_data.byteen = 4'b1111;
|
||||
assign icache_bus_if.req_data.byteen = '1;
|
||||
assign icache_bus_if.req_data.data = '0;
|
||||
|
||||
// Icache Response
|
||||
|
@ -131,47 +131,57 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
assign fetch_if.data.uuid = rsp_uuid;
|
||||
assign icache_bus_if.rsp_ready = fetch_if.ready;
|
||||
|
||||
`ifdef SCOPE
|
||||
`ifdef DBG_SCOPE_FETCH
|
||||
`SCOPE_IO_SWITCH (1);
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (1),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
|
||||
ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
|
||||
(ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH)
|
||||
) scope_tap (
|
||||
.clk (clk),
|
||||
.reset (scope_reset),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers ({
|
||||
reset,
|
||||
wire icache_bus_req_fire = icache_bus_if.req_valid && icache_bus_if.req_ready;
|
||||
wire icache_bus_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
wire [`UUID_WIDTH-1:0] icache_bus_req_uuid = icache_bus_if.req_data.tag[ICACHE_TAG_WIDTH-1 -: `UUID_WIDTH];
|
||||
wire [`UUID_WIDTH-1:0] icache_bus_rsp_uuid = icache_bus_if.rsp_data.tag[ICACHE_TAG_WIDTH-1 -: `UUID_WIDTH];
|
||||
`NEG_EDGE (reset_negedge, reset);
|
||||
`SCOPE_TAP_EX (0, 1, 6, 3, (
|
||||
`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
|
||||
`UUID_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
|
||||
`UUID_WIDTH + (ICACHE_WORD_SIZE * 8)
|
||||
), {
|
||||
schedule_if.valid,
|
||||
schedule_if.ready,
|
||||
icache_bus_if.req_valid,
|
||||
icache_bus_if.req_ready,
|
||||
icache_bus_if.rsp_valid,
|
||||
icache_bus_if.rsp_ready
|
||||
}, {
|
||||
schedule_fire,
|
||||
icache_req_fire,
|
||||
icache_rsp_fire
|
||||
}),
|
||||
.probes ({
|
||||
icache_bus_req_fire,
|
||||
icache_bus_rsp_fire
|
||||
},{
|
||||
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
|
||||
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
|
||||
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
|
||||
}),
|
||||
.bus_in (scope_bus_in),
|
||||
.bus_out (scope_bus_out)
|
||||
icache_bus_req_uuid, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
|
||||
icache_bus_rsp_uuid, icache_bus_if.rsp_data.data
|
||||
},
|
||||
reset_negedge, 1'b0, 4096
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_fetch ila_fetch_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({schedule_if.valid, schedule_if.data, schedule_if.ready}),
|
||||
.probe1 ({icache_bus_if.req_valid, icache_bus_if.req_data, icache_bus_if.req_ready}),
|
||||
.probe2 ({icache_bus_if.rsp_valid, icache_bus_if.rsp_data, icache_bus_if.rsp_ready})
|
||||
);
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_MEM
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire fetch_fire = fetch_if.valid && fetch_if.ready;
|
||||
always @(posedge clk) begin
|
||||
if (schedule_fire) begin
|
||||
`TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid));
|
||||
if (schedule_if.valid && schedule_if.ready) begin
|
||||
`TRACE(1, ("%t: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
end
|
||||
if (fetch_fire) begin
|
||||
`TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
|
||||
if (fetch_if.valid && fetch_if.ready) begin
|
||||
`TRACE(1, ("%t: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -41,7 +41,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 0)
|
||||
.OUT_BUF (PARTIAL_BW ? 3 : 0)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -53,12 +53,10 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_fpus
|
||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.tid)
|
||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.wb)
|
||||
|
||||
`RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1));
|
||||
|
||||
// Store request info
|
||||
wire fpu_req_valid, fpu_req_ready;
|
||||
wire fpu_rsp_valid, fpu_rsp_ready;
|
||||
|
@ -71,9 +69,9 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
wire [NUM_LANES-1:0] fpu_rsp_tmask;
|
||||
wire [`PC_BITS-1:0] fpu_rsp_PC;
|
||||
wire [`NR_BITS-1:0] fpu_rsp_rd;
|
||||
wire [PID_WIDTH-1:0] fpu_rsp_pid;
|
||||
wire fpu_rsp_sop;
|
||||
wire fpu_rsp_eop;
|
||||
wire [PID_WIDTH-1:0] fpu_rsp_pid, fpu_rsp_pid_u;
|
||||
wire fpu_rsp_sop, fpu_rsp_sop_u;
|
||||
wire fpu_rsp_eop, fpu_rsp_eop_u;
|
||||
|
||||
wire [TAG_WIDTH-1:0] fpu_req_tag, fpu_rsp_tag;
|
||||
wire mdata_full;
|
||||
|
@ -89,17 +87,30 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.SIZE (`FPUQ_SIZE)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.reset (reset),
|
||||
.acquire_en (execute_fire),
|
||||
.write_addr (fpu_req_tag),
|
||||
.write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}),
|
||||
.read_data ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),
|
||||
.read_data ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_pid_u, fpu_rsp_sop_u, fpu_rsp_eop_u}),
|
||||
.read_addr (fpu_rsp_tag),
|
||||
.release_en (fpu_rsp_fire),
|
||||
.full (mdata_full),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
if (PID_BITS != 0) begin : g_fpu_rsp_pid
|
||||
assign fpu_rsp_pid = fpu_rsp_pid_u;
|
||||
assign fpu_rsp_sop = fpu_rsp_sop_u;
|
||||
assign fpu_rsp_eop = fpu_rsp_eop_u;
|
||||
end else begin : g_no_fpu_rsp_pid
|
||||
`UNUSED_VAR (fpu_rsp_pid_u)
|
||||
`UNUSED_VAR (fpu_rsp_sop_u)
|
||||
`UNUSED_VAR (fpu_rsp_eop_u)
|
||||
assign fpu_rsp_pid = 0;
|
||||
assign fpu_rsp_sop = 1;
|
||||
assign fpu_rsp_eop = 1;
|
||||
end
|
||||
|
||||
// resolve dynamic FRM from CSR
|
||||
wire [`INST_FRM_BITS-1:0] fpu_req_frm;
|
||||
`ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].read_wid, per_block_execute_if[block_idx].data.wid, block_idx, `NUM_FPU_BLOCKS)
|
||||
|
@ -119,7 +130,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_dpi (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
|
@ -148,7 +159,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_fpnew (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
|
@ -177,7 +188,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_dsp (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
|
@ -200,27 +211,38 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
`endif
|
||||
|
||||
// handle FPU response
|
||||
|
||||
// handle CSR update
|
||||
fflags_t fpu_rsp_fflags_q;
|
||||
|
||||
if (PID_BITS != 0) begin
|
||||
if (PID_BITS != 0) begin : g_pid
|
||||
fflags_t fpu_rsp_fflags_r;
|
||||
always @(posedge clk) begin
|
||||
if (block_reset) begin
|
||||
if (reset) begin
|
||||
fpu_rsp_fflags_r <= '0;
|
||||
end else if (fpu_rsp_fire) begin
|
||||
fpu_rsp_fflags_r <= fpu_rsp_eop ? '0 : (fpu_rsp_fflags_r | fpu_rsp_fflags);
|
||||
end
|
||||
end
|
||||
assign fpu_rsp_fflags_q = fpu_rsp_fflags_r | fpu_rsp_fflags;
|
||||
end else begin
|
||||
end else begin : g_no_pid
|
||||
assign fpu_rsp_fflags_q = fpu_rsp_fflags;
|
||||
end
|
||||
|
||||
assign fpu_csr_if[block_idx].write_enable = fpu_rsp_fire && fpu_rsp_eop && fpu_rsp_has_fflags;
|
||||
`ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].write_wid, fpu_rsp_wid, block_idx, `NUM_FPU_BLOCKS)
|
||||
assign fpu_csr_if[block_idx].write_fflags = fpu_rsp_fflags_q;
|
||||
VX_fpu_csr_if fpu_csr_tmp_if();
|
||||
assign fpu_csr_tmp_if.write_enable = fpu_rsp_fire && fpu_rsp_eop && fpu_rsp_has_fflags;
|
||||
`ASSIGN_BLOCKED_WID (fpu_csr_tmp_if.write_wid, fpu_rsp_wid, block_idx, `NUM_FPU_BLOCKS)
|
||||
assign fpu_csr_tmp_if.write_fflags = fpu_rsp_fflags_q;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_WIDTH + $bits(fflags_t)),
|
||||
.RESETW (1)
|
||||
) fpu_csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({fpu_csr_tmp_if.write_enable, fpu_csr_tmp_if.write_wid, fpu_csr_tmp_if.write_fflags}),
|
||||
.data_out ({fpu_csr_if[block_idx].write_enable, fpu_csr_if[block_idx].write_wid, fpu_csr_if[block_idx].write_fflags})
|
||||
);
|
||||
|
||||
// send response
|
||||
|
||||
|
@ -229,7 +251,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.SIZE (0)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.reset (reset),
|
||||
.valid_in (fpu_rsp_valid),
|
||||
.ready_in (fpu_rsp_ready),
|
||||
.data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),
|
||||
|
|
|
@ -41,17 +41,17 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||
wire [BLOCK_SIZE-1:0] commit_in_ready;
|
||||
wire [BLOCK_SIZE-1:0][ISSUE_ISW_W-1:0] commit_in_isw;
|
||||
|
||||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin : g_commit_in
|
||||
assign commit_in_valid[i] = commit_in_if[i].valid;
|
||||
assign commit_in_data[i] = commit_in_if[i].data;
|
||||
assign commit_in_if[i].ready = commit_in_ready[i];
|
||||
if (BLOCK_SIZE != `ISSUE_WIDTH) begin
|
||||
if (BLOCK_SIZE != 1) begin
|
||||
if (BLOCK_SIZE != `ISSUE_WIDTH) begin : g_commit_in_isw_partial
|
||||
if (BLOCK_SIZE != 1) begin : g_block
|
||||
assign commit_in_isw[i] = {commit_in_data[i][DATA_WIS_OFF+BLOCK_SIZE_W +: (ISSUE_ISW_W-BLOCK_SIZE_W)], BLOCK_SIZE_W'(i)};
|
||||
end else begin
|
||||
end else begin : g_no_block
|
||||
assign commit_in_isw[i] = commit_in_data[i][DATA_WIS_OFF +: ISSUE_ISW_W];
|
||||
end
|
||||
end else begin
|
||||
end else begin : g_commit_in_isw_full
|
||||
assign commit_in_isw[i] = BLOCK_SIZE_W'(i);
|
||||
end
|
||||
end
|
||||
|
@ -70,11 +70,12 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||
commit_out_data[commit_in_isw[i]] = commit_in_data[i];
|
||||
end
|
||||
end
|
||||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
|
||||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin : g_commit_in_ready
|
||||
assign commit_in_ready[i] = commit_out_ready[commit_in_isw[i]];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin: g_out_bufs
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_tmp_if();
|
||||
|
@ -94,31 +95,31 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||
.ready_out (commit_tmp_if.ready)
|
||||
);
|
||||
|
||||
logic [`NUM_THREADS-1:0] commit_tmask_r;
|
||||
logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_r;
|
||||
if (PID_BITS != 0) begin
|
||||
logic [`NUM_THREADS-1:0] commit_tmask_w;
|
||||
logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_w;
|
||||
if (PID_BITS != 0) begin : g_commit_data_with_pid
|
||||
always @(*) begin
|
||||
commit_tmask_r = '0;
|
||||
commit_data_r = 'x;
|
||||
commit_tmask_w = '0;
|
||||
commit_data_w = 'x;
|
||||
for (integer j = 0; j < NUM_LANES; ++j) begin
|
||||
commit_tmask_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.tmask[j];
|
||||
commit_data_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j];
|
||||
commit_tmask_w[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.tmask[j];
|
||||
commit_data_w[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
assign commit_tmask_r = commit_tmp_if.data.tmask;
|
||||
assign commit_data_r = commit_tmp_if.data.data;
|
||||
end else begin : g_commit_data_no_pid
|
||||
assign commit_tmask_w = commit_tmp_if.data.tmask;
|
||||
assign commit_data_w = commit_tmp_if.data.data;
|
||||
end
|
||||
|
||||
assign commit_out_if[i].valid = commit_tmp_if.valid;
|
||||
assign commit_out_if[i].data = {
|
||||
commit_tmp_if.data.uuid,
|
||||
commit_tmp_if.data.wid,
|
||||
commit_tmask_r,
|
||||
commit_tmask_w,
|
||||
commit_tmp_if.data.PC,
|
||||
commit_tmp_if.data.wb,
|
||||
commit_tmp_if.data.rd,
|
||||
commit_data_r,
|
||||
commit_data_w,
|
||||
1'b0, // PID
|
||||
commit_tmp_if.data.sop,
|
||||
commit_tmp_if.data.eop
|
||||
|
|
|
@ -35,7 +35,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
|
|||
wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in;
|
||||
assign decode_if.ready = ibuf_ready_in[decode_if.data.wid];
|
||||
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_instr_bufs
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`IBUF_SIZE),
|
||||
|
|
|
@ -48,9 +48,9 @@ module VX_ipdom_stack #(
|
|||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
end else begin
|
||||
`ASSERT(~push || ~full, ("runtime error: writing to a full stack!"));
|
||||
`ASSERT(~pop || ~empty, ("runtime error: reading an empty stack!"));
|
||||
`ASSERT(~push || ~pop, ("runtime error: push and pop in same cycle not supported!"));
|
||||
`ASSERT(~push || ~full, ("%t: runtime error: writing to a full stack!", $time));
|
||||
`ASSERT(~pop || ~empty, ("%t: runtime error: reading an empty stack!", $time));
|
||||
`ASSERT(~push || ~pop, ("%t: runtime error: push and pop in same cycle not supported!", $time));
|
||||
if (push) begin
|
||||
rd_ptr <= wr_ptr;
|
||||
wr_ptr <= wr_ptr + ADDRW'(1);
|
||||
|
|
|
@ -29,16 +29,17 @@ module VX_issue import VX_gpu_pkg::*; #(
|
|||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH]
|
||||
);
|
||||
`STATIC_ASSERT ((`ISSUE_WIDTH <= `NUM_WARPS), ("invalid parameter"))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
issue_perf_t per_issue_perf [`ISSUE_WIDTH];
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, ibf_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, scb_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, opd_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_issue_perf_units_uses
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, units_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
end
|
||||
for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin : g_issue_perf_sfu_uses
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, sfu_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
end
|
||||
`endif
|
||||
|
@ -49,9 +50,9 @@ module VX_issue import VX_gpu_pkg::*; #(
|
|||
wire [`ISSUE_WIDTH-1:0] decode_ready_in;
|
||||
assign decode_if.ready = decode_ready_in[decode_isw];
|
||||
|
||||
`SCOPE_IO_SWITCH (`ISSUE_WIDTH)
|
||||
`SCOPE_IO_SWITCH (`ISSUE_WIDTH);
|
||||
|
||||
for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : issue_slices
|
||||
for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_issue_slices
|
||||
VX_decode_if #(
|
||||
.NUM_WARPS (PER_ISSUE_WARPS)
|
||||
) per_issue_decode_if();
|
||||
|
@ -76,15 +77,13 @@ module VX_issue import VX_gpu_pkg::*; #(
|
|||
assign decode_if.ibuf_pop[issue_id * PER_ISSUE_WARPS +: PER_ISSUE_WARPS] = per_issue_decode_if.ibuf_pop;
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
VX_issue_slice #(
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)),
|
||||
.ISSUE_ID (issue_id)
|
||||
) issue_slice (
|
||||
`SCOPE_IO_BIND(issue_id)
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.issue_perf (per_issue_perf[issue_id]),
|
||||
`endif
|
||||
|
@ -94,7 +93,7 @@ module VX_issue import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
// Assign transposed dispatch_if
|
||||
for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin
|
||||
for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin : g_dispatch_if
|
||||
`ASSIGN_VX_IF(dispatch_if[ex_id * `ISSUE_WIDTH + issue_id], per_issue_dispatch_if[ex_id]);
|
||||
end
|
||||
end
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
module VX_issue_slice import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter ISSUE_ID = 0
|
||||
) (
|
||||
|
@ -36,16 +36,11 @@ module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
VX_scoreboard_if scoreboard_if();
|
||||
VX_operands_if operands_if();
|
||||
|
||||
`RESET_RELAY (ibuf_reset, reset);
|
||||
`RESET_RELAY (scoreboard_reset, reset);
|
||||
`RESET_RELAY (operands_reset, reset);
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
|
||||
VX_ibuffer #(
|
||||
.INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID))
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (ibuf_reset),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (issue_perf.ibf_stalls),
|
||||
`endif
|
||||
|
@ -57,7 +52,7 @@ module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
.INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID))
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (scoreboard_reset),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (issue_perf.scb_stalls),
|
||||
.perf_units_uses(issue_perf.units_uses),
|
||||
|
@ -72,7 +67,7 @@ module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
.INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID))
|
||||
) operands (
|
||||
.clk (clk),
|
||||
.reset (operands_reset),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (issue_perf.opd_stalls),
|
||||
`endif
|
||||
|
@ -85,7 +80,7 @@ module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
.INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID))
|
||||
) dispatch (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
`UNUSED_PIN (perf_stalls),
|
||||
`endif
|
||||
|
@ -93,65 +88,86 @@ module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
.dispatch_if (dispatch_if)
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
`ifdef DBG_SCOPE_ISSUE
|
||||
wire operands_if_fire = operands_if.valid && operands_if.ready;
|
||||
wire operands_if_not_ready = ~operands_if.ready;
|
||||
wire writeback_if_valid = writeback_if.valid;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (2),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS +
|
||||
1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) +
|
||||
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
|
||||
) scope_tap (
|
||||
.clk (clk),
|
||||
.reset (scope_reset),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers ({
|
||||
reset,
|
||||
operands_if_fire,
|
||||
operands_if_not_ready,
|
||||
writeback_if_valid
|
||||
}),
|
||||
.probes ({
|
||||
`SCOPE_IO_SWITCH (1);
|
||||
wire decode_fire = decode_if.valid && decode_if.ready;
|
||||
wire operands_fire = operands_if.valid && operands_if.ready;
|
||||
`NEG_EDGE (reset_negedge, reset);
|
||||
`SCOPE_TAP_EX (0, 2, 4, 3, (
|
||||
`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS * 4 +
|
||||
`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (3 * `XLEN) +
|
||||
`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * `XLEN) + 1
|
||||
), {
|
||||
decode_if.valid,
|
||||
decode_if.ready,
|
||||
operands_if.valid,
|
||||
operands_if.ready
|
||||
}, {
|
||||
decode_fire,
|
||||
operands_fire,
|
||||
writeback_if.valid // ack-free
|
||||
}, {
|
||||
decode_if.data.uuid,
|
||||
decode_if.data.wid,
|
||||
decode_if.data.tmask,
|
||||
decode_if.data.PC,
|
||||
decode_if.data.ex_type,
|
||||
decode_if.data.op_type,
|
||||
decode_if.data.wb,
|
||||
decode_if.data.rd,
|
||||
decode_if.data.rs1,
|
||||
decode_if.data.rs2,
|
||||
decode_if.data.rs3,
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.wis,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.PC,
|
||||
operands_if.data.ex_type,
|
||||
operands_if.data.op_type,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.rd,
|
||||
operands_if.data.rs1_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs3_data,
|
||||
operands_if.data.rs1_data[0],
|
||||
operands_if.data.rs2_data[0],
|
||||
operands_if.data.rs3_data[0],
|
||||
writeback_if.data.uuid,
|
||||
writeback_if.data.wis,
|
||||
writeback_if.data.tmask,
|
||||
writeback_if.data.rd,
|
||||
writeback_if.data.data,
|
||||
writeback_if.data.eop
|
||||
}),
|
||||
.bus_in (scope_bus_in),
|
||||
.bus_out (scope_bus_out)
|
||||
},
|
||||
reset_negedge, 1'b0, 4096
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_issue ila_issue_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({decode_if.valid, decode_if.data, decode_if.ready}),
|
||||
.probe1 ({scoreboard_if.valid, scoreboard_if.data, scoreboard_if.ready}),
|
||||
.probe2 ({operands_if.valid, operands_if.data, operands_if.ready}),
|
||||
.probe3 ({writeback_if.valid, writeback_if.data})
|
||||
);
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (operands_if.valid && operands_if.ready) begin
|
||||
`TRACE(1, ("%d: %s wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0}));
|
||||
`TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0}))
|
||||
trace_ex_type(1, operands_if.data.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
`TRACE(1, (", op="))
|
||||
trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs2_data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs3_data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS)
|
||||
`TRACE(1, (", rs2_data="))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS)
|
||||
`TRACE(1, (", rs3_data="))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS)
|
||||
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid));
|
||||
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -80,7 +80,7 @@ module VX_issue_top import VX_gpu_pkg::*; #(
|
|||
assign decode_if.data.rs3 = decode_rs3;
|
||||
assign decode_ready = decode_if.ready;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_writeback_if
|
||||
assign writeback_if[i].valid = writeback_valid[i];
|
||||
assign writeback_if[i].data.uuid = writeback_uuid[i];
|
||||
assign writeback_if[i].data.wis = writeback_wis[i];
|
||||
|
@ -92,7 +92,7 @@ module VX_issue_top import VX_gpu_pkg::*; #(
|
|||
assign writeback_if[i].data.eop = writeback_eop[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin : g_dispatch_if
|
||||
assign dispatch_valid[i] = dispatch_if[i].valid;
|
||||
assign dispatch_uuid[i] = dispatch_if[i].data.uuid;
|
||||
assign dispatch_wis[i] = dispatch_if[i].data.wis;
|
||||
|
@ -113,6 +113,13 @@ module VX_issue_top import VX_gpu_pkg::*; #(
|
|||
issue_perf_t issue_perf = '0;
|
||||
`endif
|
||||
|
||||
`ifdef SCOPE
|
||||
wire [0:0] scope_reset_w = 1'b0;
|
||||
wire [0:0] scope_bus_in_w = 1'b0;
|
||||
wire [0:0] scope_bus_out_w;
|
||||
`UNUSED_VAR (scope_bus_out_w)
|
||||
`endif
|
||||
|
||||
VX_issue #(
|
||||
.INSTANCE_ID (INSTANCE_ID)
|
||||
) issue (
|
||||
|
|
|
@ -1,201 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lmem_unit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t cache_perf,
|
||||
`endif
|
||||
|
||||
VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS],
|
||||
VX_lsu_mem_if.master lsu_mem_out_if [`NUM_LSU_BLOCKS]
|
||||
);
|
||||
`STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter"))
|
||||
|
||||
localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `ADDR_TYPE_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH;
|
||||
localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH;
|
||||
localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE);
|
||||
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lsu_switch_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
`RESET_RELAY_EX (block_reset, reset, `NUM_LSU_BLOCKS, 1);
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
|
||||
assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL];
|
||||
end
|
||||
|
||||
wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask);
|
||||
wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask);
|
||||
|
||||
wire req_global_ready;
|
||||
wire req_local_ready;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (1)
|
||||
) req_global_buf (
|
||||
.clk (clk),
|
||||
.reset (block_reset[i]),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
|
||||
.data_in ({
|
||||
lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask,
|
||||
lsu_mem_in_if[i].req_data.rw,
|
||||
lsu_mem_in_if[i].req_data.byteen,
|
||||
lsu_mem_in_if[i].req_data.addr,
|
||||
lsu_mem_in_if[i].req_data.atype,
|
||||
lsu_mem_in_if[i].req_data.data,
|
||||
lsu_mem_in_if[i].req_data.tag
|
||||
}),
|
||||
.ready_in (req_global_ready),
|
||||
.valid_out (lsu_mem_out_if[i].req_valid),
|
||||
.data_out ({
|
||||
lsu_mem_out_if[i].req_data.mask,
|
||||
lsu_mem_out_if[i].req_data.rw,
|
||||
lsu_mem_out_if[i].req_data.byteen,
|
||||
lsu_mem_out_if[i].req_data.addr,
|
||||
lsu_mem_out_if[i].req_data.atype,
|
||||
lsu_mem_out_if[i].req_data.data,
|
||||
lsu_mem_out_if[i].req_data.tag
|
||||
}),
|
||||
.ready_out (lsu_mem_out_if[i].req_ready)
|
||||
);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.SIZE (0),
|
||||
.OUT_REG (0)
|
||||
) req_local_buf (
|
||||
.clk (clk),
|
||||
.reset (block_reset[i]),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
|
||||
.data_in ({
|
||||
lsu_mem_in_if[i].req_data.mask & is_addr_local_mask,
|
||||
lsu_mem_in_if[i].req_data.rw,
|
||||
lsu_mem_in_if[i].req_data.byteen,
|
||||
lsu_mem_in_if[i].req_data.addr,
|
||||
lsu_mem_in_if[i].req_data.atype,
|
||||
lsu_mem_in_if[i].req_data.data,
|
||||
lsu_mem_in_if[i].req_data.tag
|
||||
}),
|
||||
.ready_in (req_local_ready),
|
||||
.valid_out (lsu_switch_if[i].req_valid),
|
||||
.data_out ({
|
||||
lsu_switch_if[i].req_data.mask,
|
||||
lsu_switch_if[i].req_data.rw,
|
||||
lsu_switch_if[i].req_data.byteen,
|
||||
lsu_switch_if[i].req_data.addr,
|
||||
lsu_switch_if[i].req_data.atype,
|
||||
lsu_switch_if[i].req_data.data,
|
||||
lsu_switch_if[i].req_data.tag
|
||||
}),
|
||||
.ready_out (lsu_switch_if[i].req_ready)
|
||||
);
|
||||
|
||||
assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global)
|
||||
|| (req_local_ready && is_addr_local);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (1)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (block_reset[i]),
|
||||
.valid_in ({
|
||||
lsu_switch_if[i].rsp_valid,
|
||||
lsu_mem_out_if[i].rsp_valid
|
||||
}),
|
||||
.ready_in ({
|
||||
lsu_switch_if[i].rsp_ready,
|
||||
lsu_mem_out_if[i].rsp_ready
|
||||
}),
|
||||
.data_in ({
|
||||
lsu_switch_if[i].rsp_data,
|
||||
lsu_mem_out_if[i].rsp_data
|
||||
}),
|
||||
.data_out (lsu_mem_in_if[i].rsp_data),
|
||||
.valid_out (lsu_mem_in_if[i].rsp_valid),
|
||||
.ready_out (lsu_mem_in_if[i].rsp_ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
end
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_bus_if[LSU_NUM_REQS]();
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_bus_tmp_if[`NUM_LSU_LANES]();
|
||||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
|
||||
.ARBITER ("P"),
|
||||
.REQ_OUT_BUF (3),
|
||||
.RSP_OUT_BUF (0)
|
||||
) lsu_adapter (
|
||||
.clk (clk),
|
||||
.reset (block_reset[i]),
|
||||
.lsu_mem_if (lsu_switch_if[i]),
|
||||
.mem_bus_if (lmem_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]);
|
||||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (lmem_reset, reset);
|
||||
|
||||
VX_local_mem #(
|
||||
.INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)),
|
||||
.SIZE (1 << `LMEM_LOG_SIZE),
|
||||
.NUM_REQS (LSU_NUM_REQS),
|
||||
.NUM_BANKS (`LMEM_NUM_BANKS),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.OUT_BUF (3)
|
||||
) local_mem (
|
||||
.clk (clk),
|
||||
.reset (lmem_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (cache_perf),
|
||||
`endif
|
||||
.mem_bus_if (lmem_bus_if)
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
module VX_lsu_slice import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
@ -59,25 +59,25 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
wire req_is_fence, rsp_is_fence;
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] full_addr;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_full_addr
|
||||
assign full_addr[i] = execute_if.data.rs1_data[i] + `SEXT(`XLEN, execute_if.data.op_args.lsu.offset);
|
||||
end
|
||||
|
||||
// address type calculation
|
||||
|
||||
wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_atype;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_flags
|
||||
wire [MEM_ADDRW-1:0] block_addr = full_addr[i][MEM_ASHIFT +: MEM_ADDRW];
|
||||
// is I/O address
|
||||
wire [MEM_ADDRW-1:0] io_addr_start = MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT);
|
||||
wire [MEM_ADDRW-1:0] io_addr_end = MEM_ADDRW'(`XLEN'(`IO_END_ADDR) >> MEM_ASHIFT);
|
||||
assign mem_req_atype[i][`ADDR_TYPE_FLUSH] = req_is_fence;
|
||||
assign mem_req_atype[i][`ADDR_TYPE_IO] = (block_addr >= io_addr_start) && (block_addr < io_addr_end);
|
||||
assign mem_req_flags[i][`MEM_REQ_FLAG_FLUSH] = req_is_fence;
|
||||
assign mem_req_flags[i][`MEM_REQ_FLAG_IO] = (block_addr >= io_addr_start) && (block_addr < io_addr_end);
|
||||
`ifdef LMEM_ENABLE
|
||||
// is local memory address
|
||||
wire [MEM_ADDRW-1:0] lmem_addr_start = MEM_ADDRW'(`XLEN'(`LMEM_BASE_ADDR) >> MEM_ASHIFT);
|
||||
wire [MEM_ADDRW-1:0] lmem_addr_end = MEM_ADDRW'((`XLEN'(`LMEM_BASE_ADDR) + `XLEN'(1 << `LMEM_LOG_SIZE)) >> MEM_ASHIFT);
|
||||
assign mem_req_atype[i][`ADDR_TYPE_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end);
|
||||
assign mem_req_flags[i][`MEM_REQ_FLAG_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end);
|
||||
`endif
|
||||
end
|
||||
|
||||
|
@ -102,8 +102,6 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
`UNUSED_VAR (mem_req_fire)
|
||||
`UNUSED_VAR (mem_rsp_fire)
|
||||
|
||||
wire mem_rsp_sop_pkt, mem_rsp_eop_pkt;
|
||||
wire no_rsp_buf_valid, no_rsp_buf_ready;
|
||||
|
@ -151,49 +149,49 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
|
||||
wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] req_align;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_addr
|
||||
assign req_align[i] = full_addr[i][REQ_ASHIFT-1:0];
|
||||
assign mem_req_addr[i] = full_addr[i][`MEM_ADDR_WIDTH-1:REQ_ASHIFT];
|
||||
end
|
||||
|
||||
// byte enable formatting
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
reg [LSU_WORD_SIZE-1:0] mem_req_byteen_r;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_byteen_w
|
||||
reg [LSU_WORD_SIZE-1:0] mem_req_byteen_w;
|
||||
always @(*) begin
|
||||
mem_req_byteen_r = '0;
|
||||
mem_req_byteen_w = '0;
|
||||
case (`INST_LSU_WSIZE(execute_if.data.op_type))
|
||||
0: begin // 8-bit
|
||||
mem_req_byteen_r[req_align[i]] = 1'b1;
|
||||
mem_req_byteen_w[req_align[i]] = 1'b1;
|
||||
end
|
||||
1: begin // 16 bit
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1;
|
||||
mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1;
|
||||
mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1;
|
||||
end
|
||||
`ifdef XLEN_64
|
||||
2: begin // 32 bit
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1;
|
||||
mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1;
|
||||
mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1;
|
||||
mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1;
|
||||
mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1;
|
||||
end
|
||||
`endif
|
||||
// 3: 64 bit
|
||||
default : mem_req_byteen_r = {LSU_WORD_SIZE{1'b1}};
|
||||
default : mem_req_byteen_w = {LSU_WORD_SIZE{1'b1}};
|
||||
endcase
|
||||
end
|
||||
assign mem_req_byteen[i] = mem_req_byteen_r;
|
||||
assign mem_req_byteen[i] = mem_req_byteen_w;
|
||||
end
|
||||
|
||||
// memory misalignment not supported!
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_missalign
|
||||
wire lsu_req_fire = execute_if.valid && execute_if.ready;
|
||||
`RUNTIME_ASSERT((~lsu_req_fire || ~execute_if.data.tmask[i] || req_is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if.data.op_type))) == 0),
|
||||
("misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)",
|
||||
execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid));
|
||||
("%t: misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)",
|
||||
$time, execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid))
|
||||
end
|
||||
|
||||
// store data formatting
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_data
|
||||
always @(*) begin
|
||||
mem_req_data[i] = execute_if.data.rs2_data[i];
|
||||
case (req_align[i])
|
||||
|
@ -215,7 +213,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
|
||||
wire [LSUQ_SIZEW-1:0] pkt_waddr, pkt_raddr;
|
||||
|
||||
if (PID_BITS != 0) begin
|
||||
if (PID_BITS != 0) begin : g_pids
|
||||
reg [`LSUQ_IN_SIZE-1:0][PID_BITS:0] pkt_ctr;
|
||||
reg [`LSUQ_IN_SIZE-1:0] pkt_sop, pkt_eop;
|
||||
|
||||
|
@ -271,10 +269,10 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
|
||||
assign mem_rsp_sop_pkt = pkt_sop[pkt_raddr];
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop_fire && pkt_eop[pkt_raddr] && (pkt_ctr[pkt_raddr] == 1);
|
||||
`RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("allocator full!"))
|
||||
`RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("Oops!"))
|
||||
`RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("%t: allocator full!", $time))
|
||||
`RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("%t: oops! broken sop request!", $time))
|
||||
`UNUSED_VAR (mem_rsp_sop)
|
||||
end else begin
|
||||
end else begin : g_no_pids
|
||||
assign pkt_waddr = 0;
|
||||
assign mem_rsp_sop_pkt = mem_rsp_sop;
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop;
|
||||
|
@ -300,7 +298,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
wire [NUM_LANES-1:0] lsu_mem_req_mask;
|
||||
wire [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_mem_req_byteen;
|
||||
wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_mem_req_addr;
|
||||
wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] lsu_mem_req_atype;
|
||||
wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] lsu_mem_req_flags;
|
||||
wire [NUM_LANES-1:0][(LSU_WORD_SIZE*8)-1:0] lsu_mem_req_data;
|
||||
wire [LSU_TAG_WIDTH-1:0] lsu_mem_req_tag;
|
||||
wire lsu_mem_req_ready;
|
||||
|
@ -311,16 +309,14 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
wire [LSU_TAG_WIDTH-1:0] lsu_mem_rsp_tag;
|
||||
wire lsu_mem_rsp_ready;
|
||||
|
||||
`RESET_RELAY (mem_scheduler_reset, reset);
|
||||
|
||||
VX_mem_scheduler #(
|
||||
.INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-memsched", INSTANCE_ID)),
|
||||
.CORE_REQS (NUM_LANES),
|
||||
.MEM_CHANNELS(NUM_LANES),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
.LINE_SIZE (LSU_WORD_SIZE),
|
||||
.ADDR_WIDTH (LSU_ADDR_WIDTH),
|
||||
.ATYPE_WIDTH (`ADDR_TYPE_WIDTH),
|
||||
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_QUEUE_SIZE (`LSUQ_IN_SIZE),
|
||||
.MEM_QUEUE_SIZE (`LSUQ_OUT_SIZE),
|
||||
|
@ -330,7 +326,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
.CORE_OUT_BUF(0)
|
||||
) mem_scheduler (
|
||||
.clk (clk),
|
||||
.reset (mem_scheduler_reset),
|
||||
.reset (reset),
|
||||
|
||||
// Input request
|
||||
.core_req_valid (mem_req_valid),
|
||||
|
@ -338,12 +334,12 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
.core_req_mask (mem_req_mask),
|
||||
.core_req_byteen(mem_req_byteen),
|
||||
.core_req_addr (mem_req_addr),
|
||||
.core_req_atype (mem_req_atype),
|
||||
.core_req_flags (mem_req_flags),
|
||||
.core_req_data (mem_req_data),
|
||||
.core_req_tag (mem_req_tag),
|
||||
.core_req_ready (mem_req_ready),
|
||||
`UNUSED_PIN (core_req_empty),
|
||||
`UNUSED_PIN (core_req_sent),
|
||||
`UNUSED_PIN (core_req_wr_notify),
|
||||
|
||||
// Output response
|
||||
.core_rsp_valid (mem_rsp_valid),
|
||||
|
@ -360,7 +356,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
.mem_req_mask (lsu_mem_req_mask),
|
||||
.mem_req_byteen (lsu_mem_req_byteen),
|
||||
.mem_req_addr (lsu_mem_req_addr),
|
||||
.mem_req_atype (lsu_mem_req_atype),
|
||||
.mem_req_flags (lsu_mem_req_flags),
|
||||
.mem_req_data (lsu_mem_req_data),
|
||||
.mem_req_tag (lsu_mem_req_tag),
|
||||
.mem_req_ready (lsu_mem_req_ready),
|
||||
|
@ -378,7 +374,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
assign lsu_mem_if.req_data.rw = lsu_mem_req_rw;
|
||||
assign lsu_mem_if.req_data.byteen = lsu_mem_req_byteen;
|
||||
assign lsu_mem_if.req_data.addr = lsu_mem_req_addr;
|
||||
assign lsu_mem_if.req_data.atype = lsu_mem_req_atype;
|
||||
assign lsu_mem_if.req_data.flags = lsu_mem_req_flags;
|
||||
assign lsu_mem_if.req_data.data = lsu_mem_req_data;
|
||||
assign lsu_mem_if.req_data.tag = lsu_mem_req_tag;
|
||||
assign lsu_mem_req_ready = lsu_mem_if.req_ready;
|
||||
|
@ -426,7 +422,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
`endif
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; i++) begin
|
||||
for (genvar i = 0; i < NUM_LANES; i++) begin : g_rsp_data
|
||||
`ifdef XLEN_64
|
||||
wire [63:0] rsp_data64 = mem_rsp_data[i];
|
||||
wire [31:0] rsp_data32 = (rsp_align[i][2] ? mem_rsp_data[i][63:32] : mem_rsp_data[i][31:0]);
|
||||
|
@ -483,6 +479,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
.valid_out (commit_no_rsp_if.valid),
|
||||
.ready_out (commit_no_rsp_if.ready)
|
||||
);
|
||||
|
||||
assign commit_no_rsp_if.data.rd = '0;
|
||||
assign commit_no_rsp_if.data.wb = 1'b0;
|
||||
assign commit_no_rsp_if.data.data = commit_rsp_if.data.data; // arbiter MUX optimization
|
||||
|
@ -507,51 +504,70 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
`ifdef DBG_TRACE_MEM
|
||||
always @(posedge clk) begin
|
||||
if (execute_if.valid && fence_lock) begin
|
||||
`TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID));
|
||||
`TRACE(1, ("%t: *** %s fence wait\n", $time, INSTANCE_ID))
|
||||
end
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
`TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid));
|
||||
`TRACE(1, ("%t: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask))
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES)
|
||||
`TRACE(1, (", flags="))
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES)
|
||||
`TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES)
|
||||
`TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid))
|
||||
end else begin
|
||||
`TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
`TRACE(1, (", byteen=0x%0h, rd=%0d, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, mem_req_tag, execute_if.data.uuid));
|
||||
`TRACE(1, ("%t: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask))
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES)
|
||||
`TRACE(1, (", flags="))
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES)
|
||||
`TRACE(1, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid))
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid));
|
||||
`TRACE(1, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES)
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef SCOPE
|
||||
`ifdef DBG_SCOPE_LSU
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (3),
|
||||
.TRIGGERW (3),
|
||||
.PROBEW (1 + NUM_LANES*(`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE*8) + `UUID_WIDTH + NUM_LANES*LSU_WORD_SIZE*8 + `UUID_WIDTH)
|
||||
) scope_tap (
|
||||
.clk (clk),
|
||||
.reset (scope_reset),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers({reset, mem_req_fire, mem_rsp_fire}),
|
||||
.probes ({mem_req_rw, full_addr, mem_req_byteen, mem_req_data, execute_if.data.uuid, rsp_data, rsp_uuid}),
|
||||
.bus_in (scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
`SCOPE_IO_SWITCH (1);
|
||||
`NEG_EDGE (reset_negedge, reset);
|
||||
`SCOPE_TAP_EX (0, 3, 4, 2, (
|
||||
1 + NUM_LANES * (`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE * 8) + `UUID_WIDTH + NUM_LANES * LSU_WORD_SIZE * 8 + `UUID_WIDTH
|
||||
), {
|
||||
mem_req_valid,
|
||||
mem_req_ready,
|
||||
mem_rsp_valid,
|
||||
mem_rsp_ready
|
||||
}, {
|
||||
mem_req_fire,
|
||||
mem_rsp_fire
|
||||
}, {
|
||||
mem_req_rw,
|
||||
full_addr,
|
||||
mem_req_byteen,
|
||||
mem_req_data,
|
||||
execute_if.data.uuid,
|
||||
rsp_data,
|
||||
rsp_uuid
|
||||
},
|
||||
reset_negedge, 1'b0, 4096
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_lsu ila_lsu_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({execute_if.valid, execute_if.data, execute_if.ready}),
|
||||
.probe1 ({lsu_mem_if.req_valid, lsu_mem_if.req_data, lsu_mem_if.req_ready}),
|
||||
.probe2 ({lsu_mem_if.rsp_valid, lsu_mem_if.rsp_data, lsu_mem_if.rsp_ready})
|
||||
);
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -31,9 +31,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
localparam BLOCK_SIZE = `NUM_LSU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_LSU_LANES;
|
||||
|
||||
`ifdef SCOPE
|
||||
`SCOPE_IO_SWITCH (BLOCK_SIZE);
|
||||
`endif
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
|
@ -42,7 +40,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_BUF (1)
|
||||
.OUT_BUF (3)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -54,16 +52,13 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_slices
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_lsus
|
||||
VX_lsu_slice #(
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx))
|
||||
) lsu_slice(
|
||||
`SCOPE_IO_BIND (block_idx)
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
.reset (reset),
|
||||
.execute_if (per_block_execute_if[block_idx]),
|
||||
.commit_if (per_block_commit_if[block_idx]),
|
||||
.lsu_mem_if (lsu_mem_if[block_idx])
|
||||
|
|
221
hw/rtl/core/VX_mem_unit.sv
Normal file
221
hw/rtl/core/VX_mem_unit.sv
Normal file
|
@ -0,0 +1,221 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_mem_unit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t lmem_perf,
|
||||
`endif
|
||||
|
||||
VX_lsu_mem_if.slave lsu_mem_if [`NUM_LSU_BLOCKS],
|
||||
VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS]
|
||||
);
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lsu_dcache_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
`ifdef LMEM_ENABLE
|
||||
|
||||
`STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter"))
|
||||
|
||||
localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE);
|
||||
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lsu_lmem_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_switches
|
||||
VX_lmem_switch #(
|
||||
.REQ0_OUT_BUF (3),
|
||||
.REQ1_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (1),
|
||||
.ARBITER ("P")
|
||||
) lmem_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.lsu_in_if (lsu_mem_if[i]),
|
||||
.global_out_if(lsu_dcache_if[i]),
|
||||
.local_out_if (lsu_lmem_if[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_bus_if[LSU_NUM_REQS]();
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_adapters
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_bus_tmp_if[`NUM_LSU_LANES]();
|
||||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
|
||||
.ARBITER ("P"),
|
||||
.REQ_OUT_BUF (3),
|
||||
.RSP_OUT_BUF (0)
|
||||
) lmem_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.lsu_mem_if (lsu_lmem_if[i]),
|
||||
.mem_bus_if (lmem_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin : g_lmem_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]);
|
||||
end
|
||||
end
|
||||
|
||||
VX_local_mem #(
|
||||
.INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)),
|
||||
.SIZE (1 << `LMEM_LOG_SIZE),
|
||||
.NUM_REQS (LSU_NUM_REQS),
|
||||
.NUM_BANKS (`LMEM_NUM_BANKS),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.OUT_BUF (3)
|
||||
) local_mem (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.lmem_perf (lmem_perf),
|
||||
`endif
|
||||
.mem_bus_if (lmem_bus_if)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign lmem_perf = '0;
|
||||
`endif
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_dcache_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (lsu_dcache_if[i], lsu_mem_if[i]);
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (DCACHE_CHANNELS),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_coalesced_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : g_enabled
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_coalescers
|
||||
VX_mem_coalescer #(
|
||||
.INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)),
|
||||
.NUM_REQS (`NUM_LSU_LANES),
|
||||
.DATA_IN_SIZE (LSU_WORD_SIZE),
|
||||
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),
|
||||
.ADDR_WIDTH (LSU_ADDR_WIDTH),
|
||||
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.QUEUE_SIZE (`LSUQ_OUT_SIZE)
|
||||
) mem_coalescer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Input request
|
||||
.in_req_valid (lsu_dcache_if[i].req_valid),
|
||||
.in_req_mask (lsu_dcache_if[i].req_data.mask),
|
||||
.in_req_rw (lsu_dcache_if[i].req_data.rw),
|
||||
.in_req_byteen (lsu_dcache_if[i].req_data.byteen),
|
||||
.in_req_addr (lsu_dcache_if[i].req_data.addr),
|
||||
.in_req_flags (lsu_dcache_if[i].req_data.flags),
|
||||
.in_req_data (lsu_dcache_if[i].req_data.data),
|
||||
.in_req_tag (lsu_dcache_if[i].req_data.tag),
|
||||
.in_req_ready (lsu_dcache_if[i].req_ready),
|
||||
|
||||
// Input response
|
||||
.in_rsp_valid (lsu_dcache_if[i].rsp_valid),
|
||||
.in_rsp_mask (lsu_dcache_if[i].rsp_data.mask),
|
||||
.in_rsp_data (lsu_dcache_if[i].rsp_data.data),
|
||||
.in_rsp_tag (lsu_dcache_if[i].rsp_data.tag),
|
||||
.in_rsp_ready (lsu_dcache_if[i].rsp_ready),
|
||||
|
||||
// Output request
|
||||
.out_req_valid (dcache_coalesced_if[i].req_valid),
|
||||
.out_req_mask (dcache_coalesced_if[i].req_data.mask),
|
||||
.out_req_rw (dcache_coalesced_if[i].req_data.rw),
|
||||
.out_req_byteen (dcache_coalesced_if[i].req_data.byteen),
|
||||
.out_req_addr (dcache_coalesced_if[i].req_data.addr),
|
||||
.out_req_flags (dcache_coalesced_if[i].req_data.flags),
|
||||
.out_req_data (dcache_coalesced_if[i].req_data.data),
|
||||
.out_req_tag (dcache_coalesced_if[i].req_data.tag),
|
||||
.out_req_ready (dcache_coalesced_if[i].req_ready),
|
||||
|
||||
// Output response
|
||||
.out_rsp_valid (dcache_coalesced_if[i].rsp_valid),
|
||||
.out_rsp_mask (dcache_coalesced_if[i].rsp_data.mask),
|
||||
.out_rsp_data (dcache_coalesced_if[i].rsp_data.data),
|
||||
.out_rsp_tag (dcache_coalesced_if[i].rsp_data.tag),
|
||||
.out_rsp_ready (dcache_coalesced_if[i].rsp_ready)
|
||||
);
|
||||
end
|
||||
|
||||
end else begin : g_passthru
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_dcache_coalesced_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (dcache_coalesced_if[i], lsu_dcache_if[i]);
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_dcache_adapters
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_bus_tmp_if[DCACHE_CHANNELS]();
|
||||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (DCACHE_CHANNELS),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH),
|
||||
.ARBITER ("P"),
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (0)
|
||||
) dcache_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.lsu_mem_if (dcache_coalesced_if[i]),
|
||||
.mem_bus_if (dcache_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin : g_dcache_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]);
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
127
hw/rtl/core/VX_mem_unit_top.sv
Normal file
127
hw/rtl/core/VX_mem_unit_top.sv
Normal file
|
@ -0,0 +1,127 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_mem_unit_top import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter LSU_WORD_WIDTH = LSU_WORD_SIZE * 8
|
||||
) (
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// LSU memory request
|
||||
input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_valid,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_rw,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0] lsu_req_mask,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_req_byteen,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_req_addr,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] lsu_req_flags,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_WIDTH-1:0] lsu_req_data,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0][LSU_TAG_WIDTH-1:0] lsu_req_tag,
|
||||
output wire [`NUM_LSU_BLOCKS-1:0] lsu_req_ready,
|
||||
|
||||
// LSU memory response
|
||||
output wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_valid,
|
||||
output wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0] lsu_rsp_mask,
|
||||
output wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_WIDTH-1:0] lsu_rsp_data,
|
||||
output wire [`NUM_LSU_BLOCKS-1:0][LSU_TAG_WIDTH-1:0] lsu_rsp_tag,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_ready,
|
||||
|
||||
// Memory request
|
||||
output wire [DCACHE_NUM_REQS-1:0] mem_req_valid,
|
||||
output wire [DCACHE_NUM_REQS-1:0] mem_req_rw,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] mem_req_byteen,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [DCACHE_NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] mem_req_data,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire [DCACHE_NUM_REQS-1:0] mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire [DCACHE_NUM_REQS-1:0] mem_rsp_valid,
|
||||
input wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] mem_rsp_data,
|
||||
input wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire [DCACHE_NUM_REQS-1:0] mem_rsp_ready
|
||||
);
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lsu_mem_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
// LSU memory request
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_mem_req
|
||||
assign lsu_mem_if[i].req_valid = lsu_req_valid[i];
|
||||
assign lsu_mem_if[i].req_data.rw = lsu_req_rw[i];
|
||||
assign lsu_mem_if[i].req_data.mask = lsu_req_mask[i];
|
||||
assign lsu_mem_if[i].req_data.byteen = lsu_req_byteen[i];
|
||||
assign lsu_mem_if[i].req_data.addr = lsu_req_addr[i];
|
||||
assign lsu_mem_if[i].req_data.flags = lsu_req_flags[i];
|
||||
assign lsu_mem_if[i].req_data.data = lsu_req_data[i];
|
||||
assign lsu_mem_if[i].req_data.tag = lsu_req_tag[i];
|
||||
assign lsu_req_ready[i] = lsu_mem_if[i].req_ready;
|
||||
end
|
||||
|
||||
// LSU memory response
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_rsp
|
||||
assign lsu_rsp_valid[i] = lsu_mem_if[i].rsp_valid;
|
||||
assign lsu_rsp_mask[i] = lsu_mem_if[i].rsp_data.mask;
|
||||
assign lsu_rsp_data[i] = lsu_mem_if[i].rsp_data.data;
|
||||
assign lsu_rsp_tag[i] = lsu_mem_if[i].rsp_data.tag;
|
||||
assign lsu_mem_if[i].rsp_ready = lsu_rsp_ready[i];
|
||||
end
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) mem_bus_if[DCACHE_NUM_REQS]();
|
||||
|
||||
// memory request
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin : g_mem_req
|
||||
assign mem_req_valid[i] = mem_bus_if[i].req_valid;
|
||||
assign mem_req_rw[i] = mem_bus_if[i].req_data.rw;
|
||||
assign mem_req_byteen[i] = mem_bus_if[i].req_data.byteen;
|
||||
assign mem_req_addr[i] = mem_bus_if[i].req_data.addr;
|
||||
assign mem_req_flags[i] = mem_bus_if[i].req_data.flags;
|
||||
assign mem_req_data[i] = mem_bus_if[i].req_data.data;
|
||||
assign mem_req_tag[i] = mem_bus_if[i].req_data.tag;
|
||||
assign mem_bus_if[i].req_ready = mem_req_ready[i];
|
||||
end
|
||||
|
||||
// memory response
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin : g_mem_bus_rsp
|
||||
assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i];
|
||||
assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i];
|
||||
assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i];
|
||||
assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
cache_perf_t lmem_perf = '0;
|
||||
`endif
|
||||
|
||||
VX_mem_unit #(
|
||||
.INSTANCE_ID (INSTANCE_ID)
|
||||
) mem_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.lmem_perf (lmem_perf),
|
||||
`endif
|
||||
.lsu_mem_if (lsu_mem_if),
|
||||
.dcache_bus_if (mem_bus_if)
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -23,7 +23,7 @@
|
|||
module VX_operands import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_BANKS = 4,
|
||||
parameter OUT_BUF = 4 // using 2-cycle EB for area reduction
|
||||
parameter OUT_BUF = 3
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -37,15 +37,15 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
VX_operands_if.master operands_if
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam NUM_SRC_REGS = 3;
|
||||
localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_REGS);
|
||||
localparam NUM_SRC_OPDS = 3;
|
||||
localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_OPDS);
|
||||
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
|
||||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
|
||||
localparam META_DATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS + `UUID_WIDTH;
|
||||
localparam REGS_DATAW = `XLEN * `NUM_THREADS;
|
||||
localparam DATAW = META_DATAW + NUM_SRC_REGS * REGS_DATAW;
|
||||
localparam DATAW = META_DATAW + NUM_SRC_OPDS * REGS_DATAW;
|
||||
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
|
||||
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
|
||||
localparam XLEN_SIZE = `XLEN / 8;
|
||||
|
@ -53,55 +53,56 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
|
||||
`UNUSED_VAR (writeback_if.data.sop)
|
||||
|
||||
wire [NUM_SRC_REGS-1:0] src_valid;
|
||||
wire [NUM_SRC_REGS-1:0] req_in_valid, req_in_ready;
|
||||
wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data;
|
||||
wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
|
||||
wire [NUM_SRC_OPDS-1:0] src_valid;
|
||||
wire [NUM_SRC_OPDS-1:0] req_valid_in, req_ready_in;
|
||||
wire [NUM_SRC_OPDS-1:0][PER_BANK_ADDRW-1:0] req_data_in;
|
||||
wire [NUM_SRC_OPDS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
|
||||
|
||||
wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready;
|
||||
wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2;
|
||||
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1;
|
||||
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1, gpr_rd_data_st2;
|
||||
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2;
|
||||
|
||||
wire pipe_ready_in;
|
||||
wire pipe_valid_st1, pipe_ready_st1;
|
||||
wire pipe_valid_st2, pipe_ready_st2;
|
||||
wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2;
|
||||
|
||||
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n;
|
||||
wire [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2;
|
||||
reg [NUM_SRC_OPDS-1:0][(`NUM_THREADS * `XLEN)-1:0] src_data_st2, src_data_m_st2;
|
||||
|
||||
reg [NUM_SRC_REGS-1:0] data_fetched_n;
|
||||
wire [NUM_SRC_REGS-1:0] data_fetched_st1;
|
||||
reg [NUM_SRC_OPDS-1:0] data_fetched_st1;
|
||||
|
||||
reg has_collision_n;
|
||||
wire has_collision_st1;
|
||||
|
||||
wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3,
|
||||
scoreboard_if.data.rs2,
|
||||
scoreboard_if.data.rs1};
|
||||
wire [NUM_SRC_OPDS-1:0][`NR_BITS-1:0] src_opds;
|
||||
assign src_opds = {scoreboard_if.data.rs3, scoreboard_if.data.rs2, scoreboard_if.data.rs1};
|
||||
|
||||
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
|
||||
if (ISSUE_WIS != 0) begin
|
||||
assign req_in_data[i] = {src_regs[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis};
|
||||
end else begin
|
||||
assign req_in_data[i] = src_regs[i][`NR_BITS-1:BANK_SEL_BITS];
|
||||
for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_data_in
|
||||
if (ISSUE_WIS != 0) begin : g_wis
|
||||
assign req_data_in[i] = {src_opds[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis};
|
||||
end else begin : g_no_wis
|
||||
assign req_data_in[i] = src_opds[i][`NR_BITS-1:BANK_SEL_BITS];
|
||||
end
|
||||
if (NUM_BANKS != 1) begin
|
||||
assign req_bank_idx[i] = src_regs[i][BANK_SEL_BITS-1:0];
|
||||
end else begin
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_bank_idx
|
||||
if (NUM_BANKS != 1) begin : g_multibanks
|
||||
assign req_bank_idx[i] = src_opds[i][BANK_SEL_BITS-1:0];
|
||||
end else begin : g_singlebank
|
||||
assign req_bank_idx[i] = '0;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
|
||||
assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched_st1[i];
|
||||
for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_src_valid
|
||||
assign src_valid[i] = (src_opds[i] != 0) && ~data_fetched_st1[i];
|
||||
end
|
||||
|
||||
assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid;
|
||||
assign req_valid_in = {NUM_SRC_OPDS{scoreboard_if.valid}} & src_valid;
|
||||
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_SRC_REGS),
|
||||
.NUM_INPUTS (NUM_SRC_OPDS),
|
||||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (PER_BANK_ADDRW),
|
||||
.ARBITER ("P"), // use priority arbiter
|
||||
|
@ -111,29 +112,22 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
`UNUSED_PIN(collisions),
|
||||
.valid_in (req_in_valid),
|
||||
.data_in (req_in_data),
|
||||
.valid_in (req_valid_in),
|
||||
.data_in (req_data_in),
|
||||
.sel_in (req_bank_idx),
|
||||
.ready_in (req_in_ready),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (gpr_rd_valid),
|
||||
.data_out (gpr_rd_addr),
|
||||
.sel_out (gpr_rd_req_idx),
|
||||
.ready_out (gpr_rd_ready)
|
||||
);
|
||||
|
||||
wire pipe_in_ready = pipe_ready_st1 || ~pipe_valid_st1;
|
||||
|
||||
assign gpr_rd_ready = {NUM_BANKS{pipe_in_ready}};
|
||||
|
||||
assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n;
|
||||
|
||||
wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1;
|
||||
wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2;
|
||||
assign gpr_rd_ready = {NUM_BANKS{pipe_ready_in}};
|
||||
|
||||
always @(*) begin
|
||||
has_collision_n = 0;
|
||||
for (integer i = 0; i < NUM_SRC_REGS; ++i) begin
|
||||
for (integer j = 1; j < (NUM_SRC_REGS-i); ++j) begin
|
||||
for (integer i = 0; i < NUM_SRC_OPDS; ++i) begin
|
||||
for (integer j = 1; j < (NUM_SRC_OPDS-i); ++j) begin
|
||||
has_collision_n |= src_valid[i]
|
||||
&& src_valid[j+i]
|
||||
&& (req_bank_idx[i] == req_bank_idx[j+i]);
|
||||
|
@ -141,14 +135,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
data_fetched_n = data_fetched_st1;
|
||||
if (scoreboard_if.ready) begin
|
||||
data_fetched_n = '0;
|
||||
end else begin
|
||||
data_fetched_n = data_fetched_st1 | req_in_ready;
|
||||
end
|
||||
end
|
||||
wire [NUM_SRC_OPDS-1:0] req_fire_in = req_valid_in & req_ready_in;
|
||||
|
||||
assign pipe_data = {
|
||||
scoreboard_if.data.wis,
|
||||
|
@ -162,61 +149,74 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
scoreboard_if.data.uuid
|
||||
};
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_SRC_REGS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)),
|
||||
.RESETW (1 + NUM_SRC_REGS)
|
||||
assign scoreboard_if.ready = pipe_ready_in && ~has_collision_n;
|
||||
|
||||
wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1;
|
||||
wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2;
|
||||
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH))
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (pipe_in_ready),
|
||||
.data_in ({scoreboard_if.valid, data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}),
|
||||
.data_out ({pipe_valid_st1, data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1})
|
||||
.valid_in (scoreboard_if.valid),
|
||||
.ready_in (pipe_ready_in),
|
||||
.data_in ({gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}),
|
||||
.data_out ({gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}),
|
||||
.valid_out(pipe_valid_st1),
|
||||
.ready_out(pipe_ready_st1)
|
||||
);
|
||||
|
||||
assign pipe_ready_st1 = pipe_ready_st2 || ~pipe_valid_st2;
|
||||
|
||||
assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_n;
|
||||
always @(posedge clk) begin
|
||||
if (reset || scoreboard_if.ready) begin
|
||||
data_fetched_st1 <= 0;
|
||||
end else begin
|
||||
data_fetched_st1 <= data_fetched_st1 | req_fire_in;
|
||||
end
|
||||
end
|
||||
|
||||
wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1;
|
||||
|
||||
`RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_SRC_REGS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH),
|
||||
.RESETW (1 + NUM_SRC_REGS * REGS_DATAW)
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (pipe2_reset),
|
||||
.enable (pipe_ready_st1),
|
||||
.data_in ({pipe_valid2_st1, src_data_st1, gpr_rd_valid_st1, gpr_rd_data_st1, pipe_data_st1, gpr_rd_req_idx_st1}),
|
||||
.data_out ({pipe_valid_st2, src_data_st2, gpr_rd_valid_st2, gpr_rd_data_st2, pipe_data_st2, gpr_rd_req_idx_st2})
|
||||
.reset (reset),
|
||||
.valid_in (pipe_valid2_st1),
|
||||
.ready_in (pipe_ready_st1),
|
||||
.data_in ({gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}),
|
||||
.data_out ({gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}),
|
||||
.valid_out(pipe_valid_st2),
|
||||
.ready_out(pipe_ready_st2)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
src_data_n = src_data_st2;
|
||||
src_data_m_st2 = src_data_st2;
|
||||
for (integer b = 0; b < NUM_BANKS; ++b) begin
|
||||
if (gpr_rd_valid_st2[b]) begin
|
||||
src_data_n[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b];
|
||||
src_data_m_st2[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || pipe_fire_st2) begin
|
||||
src_data_st2 <= 0;
|
||||
end else begin
|
||||
src_data_st2 <= src_data_m_st2;
|
||||
end
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
|
||||
.LUTRAM (1)
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (pipe_valid_st2),
|
||||
.ready_in (pipe_ready_st2),
|
||||
.data_in ({
|
||||
pipe_data_st2,
|
||||
src_data_n[0],
|
||||
src_data_n[1],
|
||||
src_data_n[2]
|
||||
}),
|
||||
.data_in ({pipe_data_st2, src_data_m_st2}),
|
||||
.data_out ({
|
||||
operands_if.data.wis,
|
||||
operands_if.data.tmask,
|
||||
|
@ -227,57 +227,47 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
operands_if.data.op_args,
|
||||
operands_if.data.rd,
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.rs1_data,
|
||||
operands_if.data.rs3_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs3_data
|
||||
operands_if.data.rs1_data
|
||||
}),
|
||||
.valid_out (operands_if.valid),
|
||||
.ready_out (operands_if.ready)
|
||||
);
|
||||
|
||||
wire [PER_BANK_ADDRW-1:0] gpr_wr_addr;
|
||||
if (ISSUE_WIS != 0) begin
|
||||
if (ISSUE_WIS != 0) begin : g_gpr_wr_addr
|
||||
assign gpr_wr_addr = {writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS], writeback_if.data.wis};
|
||||
end else begin
|
||||
end else begin : g_gpr_wr_addr_no_wis
|
||||
assign gpr_wr_addr = writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS];
|
||||
end
|
||||
|
||||
wire [BANK_SEL_WIDTH-1:0] gpr_wr_bank_idx;
|
||||
if (NUM_BANKS != 1) begin
|
||||
if (NUM_BANKS != 1) begin : g_gpr_wr_bank_idx
|
||||
assign gpr_wr_bank_idx = writeback_if.data.rd[BANK_SEL_BITS-1:0];
|
||||
end else begin
|
||||
end else begin : g_gpr_wr_bank_idx_0
|
||||
assign gpr_wr_bank_idx = '0;
|
||||
end
|
||||
|
||||
`ifdef GPR_RESET
|
||||
reg wr_enabled = 0;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_enabled <= 1;
|
||||
end
|
||||
end
|
||||
`else
|
||||
wire wr_enabled = 1;
|
||||
`endif
|
||||
|
||||
for (genvar b = 0; b < NUM_BANKS; ++b) begin
|
||||
for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_gpr_rams
|
||||
wire gpr_wr_enabled;
|
||||
if (BANK_SEL_BITS != 0) begin
|
||||
assign gpr_wr_enabled = wr_enabled
|
||||
&& writeback_if.valid
|
||||
if (BANK_SEL_BITS != 0) begin : g_gpr_wr_enabled_multibanks
|
||||
assign gpr_wr_enabled = writeback_if.valid
|
||||
&& (gpr_wr_bank_idx == BANK_SEL_BITS'(b));
|
||||
end else begin
|
||||
assign gpr_wr_enabled = wr_enabled && writeback_if.valid;
|
||||
end else begin : g_gpr_wr_enabled
|
||||
assign gpr_wr_enabled = writeback_if.valid;
|
||||
end
|
||||
|
||||
wire [BYTEENW-1:0] wren;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin : g_wren
|
||||
assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}};
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (REGS_DATAW),
|
||||
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
|
||||
.OUT_REG (1),
|
||||
.READ_ENABLE (1),
|
||||
.WRENW (BYTEENW),
|
||||
`ifdef GPR_RESET
|
||||
.RESET_RAM (1),
|
||||
|
@ -292,7 +282,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
.waddr (gpr_wr_addr),
|
||||
.wdata (writeback_if.data.data),
|
||||
.raddr (gpr_rd_addr_st1[b]),
|
||||
.rdata (gpr_rd_data_st1[b])
|
||||
.rdata (gpr_rd_data_st2[b])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -302,7 +292,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
if (reset) begin
|
||||
collisions_r <= '0;
|
||||
end else begin
|
||||
collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_in_ready && has_collision_n);
|
||||
collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_ready_in && has_collision_n);
|
||||
end
|
||||
end
|
||||
assign perf_stalls = collisions_r;
|
||||
|
|
92
hw/rtl/core/VX_pe_switch.sv
Normal file
92
hw/rtl/core/VX_pe_switch.sv
Normal file
|
@ -0,0 +1,92 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_pe_switch import VX_gpu_pkg::*; #(
|
||||
parameter PE_COUNT = 0,
|
||||
parameter NUM_LANES = 0,
|
||||
parameter REQ_OUT_BUF = 0,
|
||||
parameter RSP_OUT_BUF = 0,
|
||||
parameter `STRING ARBITER = "R",
|
||||
parameter PE_SEL_BITS = `CLOG2(PE_COUNT)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [`UP(PE_SEL_BITS)-1:0] pe_sel,
|
||||
VX_execute_if.slave execute_in_if,
|
||||
VX_commit_if.master commit_out_if,
|
||||
VX_execute_if.master execute_out_if[PE_COUNT],
|
||||
VX_commit_if .slave commit_in_if[PE_COUNT]
|
||||
);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam REQ_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `INST_ALU_BITS + $bits(op_args_t) + 1 + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
|
||||
wire [PE_COUNT-1:0] pe_req_valid;
|
||||
wire [PE_COUNT-1:0][REQ_DATAW-1:0] pe_req_data;
|
||||
wire [PE_COUNT-1:0] pe_req_ready;
|
||||
|
||||
VX_stream_switch #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.NUM_OUTPUTS (PE_COUNT),
|
||||
.OUT_BUF (REQ_OUT_BUF)
|
||||
) req_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel_in (pe_sel),
|
||||
.valid_in (execute_in_if.valid),
|
||||
.ready_in (execute_in_if.ready),
|
||||
.data_in (execute_in_if.data),
|
||||
.data_out (pe_req_data),
|
||||
.valid_out (pe_req_valid),
|
||||
.ready_out (pe_req_ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < PE_COUNT; ++i) begin : g_execute_out_if
|
||||
assign execute_out_if[i].valid = pe_req_valid[i];
|
||||
assign execute_out_if[i].data = pe_req_data[i];
|
||||
assign pe_req_ready[i] = execute_out_if[i].ready;
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [PE_COUNT-1:0] pe_rsp_valid;
|
||||
wire [PE_COUNT-1:0][RSP_DATAW-1:0] pe_rsp_data;
|
||||
wire [PE_COUNT-1:0] pe_rsp_ready;
|
||||
|
||||
for (genvar i = 0; i < PE_COUNT; ++i) begin : g_commit_in_if
|
||||
assign pe_rsp_valid[i] = commit_in_if[i].valid;
|
||||
assign pe_rsp_data[i] = commit_in_if[i].data;
|
||||
assign commit_in_if[i].ready = pe_rsp_ready[i];
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (PE_COUNT),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.OUT_BUF (RSP_OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (pe_rsp_valid),
|
||||
.ready_in (pe_rsp_ready),
|
||||
.data_in (pe_rsp_data),
|
||||
.data_out (commit_out_if.data),
|
||||
.valid_out (commit_out_if.valid),
|
||||
.ready_out (commit_out_if.ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -78,7 +78,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
wire [`NUM_ALU_BLOCKS-1:0][`NW_WIDTH-1:0] branch_wid;
|
||||
wire [`NUM_ALU_BLOCKS-1:0] branch_taken;
|
||||
wire [`NUM_ALU_BLOCKS-1:0][`PC_BITS-1:0] branch_dest;
|
||||
for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin : g_branch_init
|
||||
assign branch_valid[i] = branch_ctl_if[i].valid;
|
||||
assign branch_wid[i] = branch_ctl_if[i].wid;
|
||||
assign branch_taken[i] = branch_ctl_if[i].taken;
|
||||
|
@ -189,7 +189,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
// decode unlock
|
||||
if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin
|
||||
if (decode_sched_if.valid && decode_sched_if.unlock) begin
|
||||
stalled_warps_n[decode_sched_if.wid] = 0;
|
||||
end
|
||||
|
||||
|
@ -289,13 +289,11 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
|
||||
// split/join handling
|
||||
|
||||
`RESET_RELAY (split_join_reset, reset);
|
||||
|
||||
VX_split_join #(
|
||||
.INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID))
|
||||
) split_join (
|
||||
.clk (clk),
|
||||
.reset (split_join_reset),
|
||||
.reset (reset),
|
||||
.valid (warp_ctl_if.valid),
|
||||
.wid (warp_ctl_if.wid),
|
||||
.split (warp_ctl_if.split),
|
||||
|
@ -324,7 +322,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
wire [`NUM_WARPS-1:0][(`NUM_THREADS + `PC_BITS)-1:0] schedule_data;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_schedule_data
|
||||
assign schedule_data[i] = {thread_masks[i], warp_pcs[i]};
|
||||
end
|
||||
|
||||
|
@ -333,67 +331,50 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
schedule_data[schedule_wid][(`NUM_THREADS + `PC_BITS)-5:0]
|
||||
};
|
||||
|
||||
`ifndef NDEBUG
|
||||
localparam GNW_WIDTH = `LOG2UP(`NUM_CLUSTERS * `NUM_CORES * `NUM_WARPS);
|
||||
reg [`UUID_WIDTH-1:0] instr_uuid;
|
||||
wire [GNW_WIDTH-1:0] g_wid = (GNW_WIDTH'(CORE_ID) << `NW_BITS) + GNW_WIDTH'(schedule_wid);
|
||||
`ifdef SV_DPI
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(1, 32'd0));
|
||||
end else if (schedule_fire) begin
|
||||
instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(0, 32'(g_wid)));
|
||||
end
|
||||
end
|
||||
wire [`UUID_WIDTH-1:0] instr_uuid;
|
||||
`ifdef UUID_ENABLE
|
||||
VX_uuid_gen #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.UUID_WIDTH (`UUID_WIDTH)
|
||||
) uuid_gen (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (schedule_fire),
|
||||
.wid (schedule_wid),
|
||||
.uuid (instr_uuid)
|
||||
);
|
||||
`else
|
||||
wire [GNW_WIDTH+16-1:0] w_uuid = {g_wid, 16'(schedule_pc)};
|
||||
always @(*) begin
|
||||
instr_uuid = `UUID_WIDTH'(w_uuid);
|
||||
end
|
||||
`endif
|
||||
`else
|
||||
wire [`UUID_WIDTH-1:0] instr_uuid = '0;
|
||||
assign instr_uuid = '0;
|
||||
`endif
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH)
|
||||
.DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH + `UUID_WIDTH),
|
||||
.SIZE (2), // need to buffer out ready_in
|
||||
.OUT_REG (1) // should be registered for BRAM acces in fetch unit
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (schedule_valid),
|
||||
.ready_in (schedule_ready),
|
||||
.data_in ({schedule_tmask, schedule_pc, schedule_wid}),
|
||||
.data_out ({schedule_if.data.tmask, schedule_if.data.PC, schedule_if.data.wid}),
|
||||
.data_in ({schedule_tmask, schedule_pc, schedule_wid, instr_uuid}),
|
||||
.data_out ({schedule_if.data.tmask, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.uuid}),
|
||||
.valid_out (schedule_if.valid),
|
||||
.ready_out (schedule_if.ready)
|
||||
);
|
||||
|
||||
assign schedule_if.data.uuid = instr_uuid;
|
||||
|
||||
// Track pending instructions per warp
|
||||
|
||||
reg [`NUM_WARPS-1:0] per_warp_incr;
|
||||
always @(*) begin
|
||||
per_warp_incr = 0;
|
||||
if (schedule_if_fire) begin
|
||||
per_warp_incr[schedule_if.data.wid] = 1;
|
||||
end
|
||||
end
|
||||
|
||||
wire [`NUM_WARPS-1:0] pending_warp_empty;
|
||||
wire [`NUM_WARPS-1:0] pending_warp_alm_empty;
|
||||
|
||||
`RESET_RELAY_EX (pending_instr_reset, reset, `NUM_WARPS, `MAX_FANOUT);
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_pending_sizes
|
||||
VX_pending_size #(
|
||||
.SIZE (4096),
|
||||
.ALM_EMPTY (1)
|
||||
) counter (
|
||||
.clk (clk),
|
||||
.reset (pending_instr_reset[i]),
|
||||
.incr (per_warp_incr[i]),
|
||||
.reset (reset),
|
||||
.incr (schedule_if_fire && (schedule_if.data.wid == `NW_WIDTH'(i))),
|
||||
.decr (commit_sched_if.committed_warps[i]),
|
||||
.empty (pending_warp_empty[i]),
|
||||
.alm_empty (pending_warp_alm_empty[i]),
|
||||
|
@ -422,7 +403,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
timeout_ctr <= '0;
|
||||
timeout_enable <= 0;
|
||||
end else begin
|
||||
if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin
|
||||
if (decode_sched_if.valid && decode_sched_if.unlock) begin
|
||||
timeout_enable <= 1;
|
||||
end
|
||||
if (timeout_enable && active_warps !=0 && active_warps == stalled_warps) begin
|
||||
|
|
|
@ -30,6 +30,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
VX_scoreboard_if.master scoreboard_if
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam NUM_SRC_OPDS = 3;
|
||||
localparam NUM_OPDS = NUM_SRC_OPDS + 1;
|
||||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4) + 1;
|
||||
|
||||
VX_ibuffer_if staging_if [PER_ISSUE_WARPS]();
|
||||
|
@ -64,13 +66,13 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
|
||||
|
||||
wire [PER_ISSUE_WARPS-1:0] stg_valid_in;
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stg_valid_in
|
||||
assign stg_valid_in[w] = staging_if[w].valid;
|
||||
end
|
||||
|
||||
wire perf_stall_per_cycle = (|stg_valid_in) && ~(|(stg_valid_in & operands_ready));
|
||||
|
||||
always @(posedge clk) begin
|
||||
always @(posedge clk) begin : g_perf_stalls
|
||||
if (reset) begin
|
||||
perf_stalls <= '0;
|
||||
end else begin
|
||||
|
@ -78,7 +80,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_perf_units_uses
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_units_uses[i] <= '0;
|
||||
|
@ -88,7 +90,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin : g_perf_sfu_uses
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_sfu_uses[i] <= '0;
|
||||
|
@ -99,10 +101,9 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
`endif
|
||||
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (1)
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stanging_bufs
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) stanging_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -115,10 +116,10 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
);
|
||||
end
|
||||
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_scoreboard
|
||||
reg [`NUM_REGS-1:0] inuse_regs;
|
||||
|
||||
reg [3:0] operands_busy, operands_busy_n;
|
||||
reg [NUM_OPDS-1:0] operands_busy, operands_busy_n;
|
||||
|
||||
wire ibuffer_fire = ibuffer_if[w].valid && ibuffer_if[w].ready;
|
||||
|
||||
|
@ -128,6 +129,10 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
&& (writeback_if.data.wis == ISSUE_WIS_W'(w))
|
||||
&& writeback_if.data.eop;
|
||||
|
||||
wire [NUM_OPDS-1:0][`NR_BITS-1:0] ibuf_opds, stg_opds;
|
||||
assign ibuf_opds = {ibuffer_if[w].data.rs3, ibuffer_if[w].data.rs2, ibuffer_if[w].data.rs1, ibuffer_if[w].data.rd};
|
||||
assign stg_opds = {staging_if[w].data.rs3, staging_if[w].data.rs2, staging_if[w].data.rs1, staging_if[w].data.rd};
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units;
|
||||
reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu;
|
||||
|
@ -135,29 +140,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
always @(*) begin
|
||||
perf_inuse_units_per_cycle[w] = '0;
|
||||
perf_inuse_sfu_per_cycle[w] = '0;
|
||||
if (staging_if[w].valid) begin
|
||||
if (operands_busy[0]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rd]] = 1;
|
||||
if (inuse_units[staging_if[w].data.rd] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rd]] = 1;
|
||||
end
|
||||
end
|
||||
if (operands_busy[1]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs1]] = 1;
|
||||
if (inuse_units[staging_if[w].data.rs1] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs1]] = 1;
|
||||
end
|
||||
end
|
||||
if (operands_busy[2]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs2]] = 1;
|
||||
if (inuse_units[staging_if[w].data.rs2] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs2]] = 1;
|
||||
end
|
||||
end
|
||||
if (operands_busy[3]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs3]] = 1;
|
||||
if (inuse_units[staging_if[w].data.rs3] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs3]] = 1;
|
||||
for (integer i = 0; i < NUM_OPDS; ++i) begin
|
||||
if (staging_if[w].valid && operands_busy[i]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[stg_opds[i]]] = 1;
|
||||
if (inuse_units[stg_opds[i]] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[stg_opds[i]]] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -165,56 +152,24 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
always @(*) begin
|
||||
operands_busy_n = operands_busy;
|
||||
if (ibuffer_fire) begin
|
||||
operands_busy_n = {
|
||||
inuse_regs[ibuffer_if[w].data.rs3],
|
||||
inuse_regs[ibuffer_if[w].data.rs2],
|
||||
inuse_regs[ibuffer_if[w].data.rs1],
|
||||
inuse_regs[ibuffer_if[w].data.rd]
|
||||
};
|
||||
end
|
||||
if (writeback_fire) begin
|
||||
for (integer i = 0; i < NUM_OPDS; ++i) begin
|
||||
operands_busy_n[i] = operands_busy[i];
|
||||
if (ibuffer_fire) begin
|
||||
if (writeback_if.data.rd == ibuffer_if[w].data.rd) begin
|
||||
operands_busy_n[0] = 0;
|
||||
end
|
||||
if (writeback_if.data.rd == ibuffer_if[w].data.rs1) begin
|
||||
operands_busy_n[1] = 0;
|
||||
end
|
||||
if (writeback_if.data.rd == ibuffer_if[w].data.rs2) begin
|
||||
operands_busy_n[2] = 0;
|
||||
end
|
||||
if (writeback_if.data.rd == ibuffer_if[w].data.rs3) begin
|
||||
operands_busy_n[3] = 0;
|
||||
end
|
||||
end else begin
|
||||
if (writeback_if.data.rd == staging_if[w].data.rd) begin
|
||||
operands_busy_n[0] = 0;
|
||||
end
|
||||
if (writeback_if.data.rd == staging_if[w].data.rs1) begin
|
||||
operands_busy_n[1] = 0;
|
||||
end
|
||||
if (writeback_if.data.rd == staging_if[w].data.rs2) begin
|
||||
operands_busy_n[2] = 0;
|
||||
end
|
||||
if (writeback_if.data.rd == staging_if[w].data.rs3) begin
|
||||
operands_busy_n[3] = 0;
|
||||
operands_busy_n[i] = inuse_regs[ibuf_opds[i]];
|
||||
end
|
||||
if (writeback_fire) begin
|
||||
if (ibuffer_fire) begin
|
||||
if (writeback_if.data.rd == ibuf_opds[i]) begin
|
||||
operands_busy_n[i] = 0;
|
||||
end
|
||||
end else begin
|
||||
if (writeback_if.data.rd == stg_opds[i]) begin
|
||||
operands_busy_n[i] = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
if (staging_fire && staging_if[w].data.wb) begin
|
||||
if (staging_if[w].data.rd == ibuffer_if[w].data.rd) begin
|
||||
operands_busy_n[0] = 1;
|
||||
end
|
||||
if (staging_if[w].data.rd == ibuffer_if[w].data.rs1) begin
|
||||
operands_busy_n[1] = 1;
|
||||
end
|
||||
if (staging_if[w].data.rd == ibuffer_if[w].data.rs2) begin
|
||||
operands_busy_n[2] = 1;
|
||||
end
|
||||
if (staging_if[w].data.rd == ibuffer_if[w].data.rs3) begin
|
||||
operands_busy_n[3] = 1;
|
||||
if (staging_fire && staging_if[w].data.wb && staging_if[w].data.rd == ibuf_opds[i]) begin
|
||||
operands_busy_n[i] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -251,9 +206,9 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end else begin
|
||||
if (staging_if[w].valid && ~staging_if[w].ready) begin
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
`TRACE(3, ("%d: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
`TRACE(3, ("%t: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
$time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
|
||||
operands_busy, staging_if[w].data.uuid));
|
||||
operands_busy, staging_if[w].data.uuid))
|
||||
`endif
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
end else if (ibuffer_fire) begin
|
||||
|
@ -265,11 +220,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
|
||||
("%t: *** %s timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
||||
$time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
|
||||
operands_busy, staging_if[w].data.uuid));
|
||||
operands_busy, staging_if[w].data.uuid))
|
||||
|
||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if.data.rd] != 0,
|
||||
("%t: *** %s invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
$time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid));
|
||||
$time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid))
|
||||
`endif
|
||||
|
||||
end
|
||||
|
@ -278,23 +233,20 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
wire [PER_ISSUE_WARPS-1:0][DATAW-1:0] arb_data_in;
|
||||
wire [PER_ISSUE_WARPS-1:0] arb_ready_in;
|
||||
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_arb_data_in
|
||||
assign arb_valid_in[w] = staging_if[w].valid && operands_ready[w];
|
||||
assign arb_data_in[w] = staging_if[w].data;
|
||||
assign staging_if[w].ready = arb_ready_in[w] && operands_ready[w];
|
||||
end
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (PER_ISSUE_WARPS),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER ("F"),
|
||||
.LUTRAM (1),
|
||||
.OUT_BUF (4) // using 2-cycle EB for area reduction
|
||||
.ARBITER ("C"),
|
||||
.OUT_BUF (3)
|
||||
) out_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.reset (reset),
|
||||
.valid_in (arb_valid_in),
|
||||
.ready_in (arb_ready_in),
|
||||
.data_in (arb_data_in),
|
||||
|
|
|
@ -41,24 +41,25 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
VX_warp_ctl_if.master warp_ctl_if
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_SFU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
|
||||
localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `PC_BITS + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + 1;
|
||||
localparam RSP_ARB_IDX_WCTL = 0;
|
||||
localparam RSP_ARB_IDX_CSRS = 1;
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_SFU_LANES;
|
||||
localparam PE_COUNT = 2;
|
||||
localparam PE_SEL_BITS = `CLOG2(PE_COUNT);
|
||||
localparam PE_IDX_WCTL = 0;
|
||||
localparam PE_IDX_CSRS = 1;
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_execute_if[BLOCK_SIZE]();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_BUF (1)
|
||||
.OUT_BUF (3)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -66,61 +67,58 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
.execute_if (per_block_execute_if)
|
||||
);
|
||||
|
||||
wire [RSP_ARB_SIZE-1:0] rsp_arb_valid_in;
|
||||
wire [RSP_ARB_SIZE-1:0] rsp_arb_ready_in;
|
||||
wire [RSP_ARB_SIZE-1:0][RSP_ARB_DATAW-1:0] rsp_arb_data_in;
|
||||
|
||||
// Warp control block
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_execute_if();
|
||||
) pe_execute_if[PE_COUNT]();
|
||||
|
||||
VX_commit_if#(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_commit_if();
|
||||
) pe_commit_if[PE_COUNT]();
|
||||
|
||||
assign wctl_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_WCTL(per_block_execute_if[0].data.op_type);
|
||||
assign wctl_execute_if.data = per_block_execute_if[0].data;
|
||||
reg [PE_SEL_BITS-1:0] pe_select;
|
||||
always @(*) begin
|
||||
pe_select = PE_IDX_WCTL;
|
||||
if (`INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type))
|
||||
pe_select = PE_IDX_CSRS;
|
||||
end
|
||||
|
||||
`RESET_RELAY (wctl_reset, reset);
|
||||
VX_pe_switch #(
|
||||
.PE_COUNT (PE_COUNT),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.ARBITER ("R"),
|
||||
.REQ_OUT_BUF(0),
|
||||
.RSP_OUT_BUF(3)
|
||||
) pe_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.pe_sel (pe_select),
|
||||
.execute_in_if (per_block_execute_if[0]),
|
||||
.commit_out_if (per_block_commit_if[0]),
|
||||
.execute_out_if (pe_execute_if),
|
||||
.commit_in_if (pe_commit_if)
|
||||
);
|
||||
|
||||
VX_wctl_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_unit (
|
||||
.clk (clk),
|
||||
.reset (wctl_reset),
|
||||
.execute_if (wctl_execute_if),
|
||||
.reset (reset),
|
||||
.execute_if (pe_execute_if[PE_IDX_WCTL]),
|
||||
.warp_ctl_if(warp_ctl_if),
|
||||
.commit_if (wctl_commit_if)
|
||||
.commit_if (pe_commit_if[PE_IDX_WCTL])
|
||||
);
|
||||
|
||||
assign rsp_arb_valid_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.valid;
|
||||
assign rsp_arb_data_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.data;
|
||||
assign wctl_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_WCTL];
|
||||
|
||||
// CSR unit
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_execute_if();
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_commit_if();
|
||||
|
||||
assign csr_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type);
|
||||
assign csr_execute_if.data = per_block_execute_if[0].data;
|
||||
|
||||
`RESET_RELAY (csr_reset, reset);
|
||||
|
||||
VX_csr_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_unit (
|
||||
.clk (clk),
|
||||
.reset (csr_reset),
|
||||
.reset (reset),
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
.execute_if (csr_execute_if),
|
||||
.execute_if (pe_execute_if[PE_IDX_CSRS]),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
|
@ -133,47 +131,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
.sched_csr_if (sched_csr_if),
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.commit_if (csr_commit_if)
|
||||
);
|
||||
|
||||
assign rsp_arb_valid_in[RSP_ARB_IDX_CSRS] = csr_commit_if.valid;
|
||||
assign rsp_arb_data_in[RSP_ARB_IDX_CSRS] = csr_commit_if.data;
|
||||
assign csr_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_CSRS];
|
||||
|
||||
// can accept new request?
|
||||
|
||||
reg sfu_req_ready;
|
||||
always @(*) begin
|
||||
case (per_block_execute_if[0].data.op_type)
|
||||
`INST_SFU_CSRRW,
|
||||
`INST_SFU_CSRRS,
|
||||
`INST_SFU_CSRRC: sfu_req_ready = csr_execute_if.ready;
|
||||
default: sfu_req_ready = wctl_execute_if.ready;
|
||||
endcase
|
||||
end
|
||||
assign per_block_execute_if[0].ready = sfu_req_ready;
|
||||
|
||||
// response arbitration
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) arb_commit_if[BLOCK_SIZE]();
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_arb_valid_in),
|
||||
.ready_in (rsp_arb_ready_in),
|
||||
.data_in (rsp_arb_data_in),
|
||||
.data_out (arb_commit_if[0].data),
|
||||
.valid_out (arb_commit_if[0].valid),
|
||||
.ready_out (arb_commit_if[0].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
.commit_if (pe_commit_if[PE_IDX_CSRS])
|
||||
);
|
||||
|
||||
VX_gather_unit #(
|
||||
|
@ -181,9 +139,9 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_BUF (3)
|
||||
) gather_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.commit_in_if (arb_commit_if),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.commit_in_if (per_block_commit_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
|
|
|
@ -45,16 +45,14 @@ module VX_split_join import VX_gpu_pkg::*; #(
|
|||
wire ipdom_push = valid && split.valid && split.is_dvg;
|
||||
wire ipdom_pop = valid && sjoin.valid && sjoin_is_dvg;
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
`RESET_RELAY (ipdom_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_ipdom_stacks
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH (`NUM_THREADS+`PC_BITS),
|
||||
.DEPTH (`DV_STACK_SIZE)
|
||||
.DEPTH (`DV_STACK_SIZE),
|
||||
.OUT_REG (0)
|
||||
) ipdom_stack (
|
||||
.clk (clk),
|
||||
.reset (ipdom_reset),
|
||||
.reset (reset),
|
||||
.q0 (ipdom_q0),
|
||||
.q1 (ipdom_q1),
|
||||
.d (ipdom_data[i]),
|
||||
|
|
|
@ -1,399 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VX_TRACE_PKG_VH
|
||||
`define VX_TRACE_PKG_VH
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
package VX_trace_pkg;
|
||||
|
||||
`ifdef SIMULATION
|
||||
|
||||
`ifdef SV_DPI
|
||||
import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/);
|
||||
`endif
|
||||
|
||||
import VX_gpu_pkg::*;
|
||||
|
||||
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
|
||||
case (ex_type)
|
||||
`EX_ALU: `TRACE(level, ("ALU"));
|
||||
`EX_LSU: `TRACE(level, ("LSU"));
|
||||
`EX_FPU: `TRACE(level, ("FPU"));
|
||||
`EX_SFU: `TRACE(level, ("SFU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_ex_op(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
case (op_args.alu.xtype)
|
||||
`ALU_TYPE_ARITH: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDIW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLIW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLIW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAIW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDW"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUBW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDI"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLI"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLI"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAI"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLTI"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XORI"));
|
||||
`INST_ALU_OR: `TRACE(level, ("ORI"));
|
||||
`INST_ALU_AND: `TRACE(level, ("ANDI"));
|
||||
`INST_ALU_LUI: `TRACE(level, ("LUI"));
|
||||
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADD"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUB"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLL"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRL"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRA"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLT"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XOR"));
|
||||
`INST_ALU_OR: `TRACE(level, ("OR"));
|
||||
`INST_ALU_AND: `TRACE(level, ("AND"));
|
||||
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"));
|
||||
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
`ALU_TYPE_BRANCH: begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: `TRACE(level, ("BEQ"));
|
||||
`INST_BR_NE: `TRACE(level, ("BNE"));
|
||||
`INST_BR_LT: `TRACE(level, ("BLT"));
|
||||
`INST_BR_GE: `TRACE(level, ("BGE"));
|
||||
`INST_BR_LTU: `TRACE(level, ("BLTU"));
|
||||
`INST_BR_GEU: `TRACE(level, ("BGEU"));
|
||||
`INST_BR_JAL: `TRACE(level, ("JAL"));
|
||||
`INST_BR_JALR: `TRACE(level, ("JALR"));
|
||||
`INST_BR_ECALL: `TRACE(level, ("ECALL"));
|
||||
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"));
|
||||
`INST_BR_URET: `TRACE(level, ("URET"));
|
||||
`INST_BR_SRET: `TRACE(level, ("SRET"));
|
||||
`INST_BR_MRET: `TRACE(level, ("MRET"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`ALU_TYPE_MULDIV: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MULW"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIVW"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVUW"));
|
||||
`INST_M_REM: `TRACE(level, ("REMW"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMUW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MUL"));
|
||||
`INST_M_MULH: `TRACE(level, ("MULH"));
|
||||
`INST_M_MULHSU:`TRACE(level, ("MULHSU"));
|
||||
`INST_M_MULHU: `TRACE(level, ("MULHU"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIV"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVU"));
|
||||
`INST_M_REM: `TRACE(level, ("REM"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`EX_LSU: begin
|
||||
if (op_args.lsu.is_float) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LW: `TRACE(level, ("FLW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("FLD"));
|
||||
`INST_LSU_SW: `TRACE(level, ("FSW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("FSD"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: `TRACE(level, ("LB"));
|
||||
`INST_LSU_LH: `TRACE(level, ("LH"));
|
||||
`INST_LSU_LW: `TRACE(level, ("LW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("LD"));
|
||||
`INST_LSU_LBU:`TRACE(level, ("LBU"));
|
||||
`INST_LSU_LHU:`TRACE(level, ("LHU"));
|
||||
`INST_LSU_LWU:`TRACE(level, ("LWU"));
|
||||
`INST_LSU_SB: `TRACE(level, ("SB"));
|
||||
`INST_LSU_SH: `TRACE(level, ("SH"));
|
||||
`INST_LSU_SW: `TRACE(level, ("SW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("SD"));
|
||||
`INST_LSU_FENCE:`TRACE(level,("FENCE"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_FPU: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FADD.S"));
|
||||
end
|
||||
`INST_FPU_SUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FSUB.S"));
|
||||
end
|
||||
`INST_FPU_MUL: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMUL.D"));
|
||||
else
|
||||
`TRACE(level, ("FMUL.S"));
|
||||
end
|
||||
`INST_FPU_DIV: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FDIV.D"));
|
||||
else
|
||||
`TRACE(level, ("FDIV.S"));
|
||||
end
|
||||
`INST_FPU_SQRT: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FSQRT.D"));
|
||||
else
|
||||
`TRACE(level, ("FSQRT.S"));
|
||||
end
|
||||
`INST_FPU_MADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FMADD.S"));
|
||||
end
|
||||
`INST_FPU_MSUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FMSUB.S"));
|
||||
end
|
||||
`INST_FPU_NMADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FNMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMADD.S"));
|
||||
end
|
||||
`INST_FPU_NMSUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FNMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMSUB.S"));
|
||||
end
|
||||
`INST_FPU_CMP: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.D"));
|
||||
1: `TRACE(level, ("FLT.D"));
|
||||
2: `TRACE(level, ("FEQ.D"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.S"));
|
||||
1: `TRACE(level, ("FLT.S"));
|
||||
2: `TRACE(level, ("FEQ.S"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FCVT.D.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.D"));
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2I: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.D"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2U: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.D"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_I2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.W"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.W"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_U2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.WU"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.WU"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MISC: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.D"));
|
||||
1: `TRACE(level, ("FSGNJN.D"));
|
||||
2: `TRACE(level, ("FSGNJX.D"));
|
||||
3: `TRACE(level, ("FCLASS.D"));
|
||||
4: `TRACE(level, ("FMV.X.D"));
|
||||
5: `TRACE(level, ("FMV.D.X"));
|
||||
6: `TRACE(level, ("FMIN.D"));
|
||||
7: `TRACE(level, ("FMAX.D"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.S"));
|
||||
1: `TRACE(level, ("FSGNJN.S"));
|
||||
2: `TRACE(level, ("FSGNJX.S"));
|
||||
3: `TRACE(level, ("FCLASS.S"));
|
||||
4: `TRACE(level, ("FMV.X.S"));
|
||||
5: `TRACE(level, ("FMV.S.X"));
|
||||
6: `TRACE(level, ("FMIN.S"));
|
||||
7: `TRACE(level, ("FMAX.S"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`EX_SFU: begin
|
||||
case (`INST_SFU_BITS'(op_type))
|
||||
`INST_SFU_TMC: `TRACE(level, ("TMC"));
|
||||
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"));
|
||||
`INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end
|
||||
`INST_SFU_JOIN: `TRACE(level, ("JOIN"));
|
||||
`INST_SFU_BAR: `TRACE(level, ("BAR"));
|
||||
`INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end
|
||||
`INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
|
||||
`INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
|
||||
`INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_op_args(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
`TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm));
|
||||
end
|
||||
`EX_LSU: begin
|
||||
`TRACE(level, (", offset=0x%0h", op_args.lsu.offset));
|
||||
end
|
||||
`EX_FPU: begin
|
||||
`TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm));
|
||||
end
|
||||
`EX_SFU: begin
|
||||
if (`INST_SFU_IS_CSR(op_type)) begin
|
||||
`TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm));
|
||||
end
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
|
||||
case (addr)
|
||||
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"));
|
||||
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"));
|
||||
`VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0"));
|
||||
`VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1"));
|
||||
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
`endif
|
||||
|
||||
endpackage
|
||||
|
||||
`endif // VX_TRACE_PKG_VH
|
44
hw/rtl/core/VX_uuid_gen.sv
Normal file
44
hw/rtl/core/VX_uuid_gen.sv
Normal file
|
@ -0,0 +1,44 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_uuid_gen import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter UUID_WIDTH = 48
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire incr,
|
||||
input wire [`NW_WIDTH-1:0] wid,
|
||||
output wire [UUID_WIDTH-1:0] uuid
|
||||
);
|
||||
localparam GNW_WIDTH = UUID_WIDTH - 32;
|
||||
reg [31:0] uuid_cntrs [0:`NUM_WARPS-1];
|
||||
reg [`NUM_WARPS-1:0] has_uuid_cntrs;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
has_uuid_cntrs <= '0;
|
||||
end else if (incr) begin
|
||||
has_uuid_cntrs[wid] <= 1;
|
||||
end
|
||||
if (incr) begin
|
||||
uuid_cntrs[wid] <= has_uuid_cntrs[wid] ? (uuid_cntrs[wid] + 1) : 1;
|
||||
end
|
||||
end
|
||||
|
||||
wire [GNW_WIDTH-1:0] g_wid = (GNW_WIDTH'(CORE_ID) << `NW_BITS) + GNW_WIDTH'(wid);
|
||||
assign uuid = {g_wid, (has_uuid_cntrs[wid] ? uuid_cntrs[wid] : 0)};
|
||||
|
||||
endmodule
|
|
@ -50,9 +50,9 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
wire is_bar = (execute_if.data.op_type == `INST_SFU_BAR);
|
||||
|
||||
wire [`UP(LANE_BITS)-1:0] tid;
|
||||
if (LANE_BITS != 0) begin
|
||||
if (LANE_BITS != 0) begin : g_tid
|
||||
assign tid = execute_if.data.tid[0 +: LANE_BITS];
|
||||
end else begin
|
||||
end else begin : g_no_tid
|
||||
assign tid = 0;
|
||||
end
|
||||
|
||||
|
@ -63,7 +63,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
wire not_pred = execute_if.data.op_args.wctl.is_neg;
|
||||
|
||||
wire [NUM_LANES-1:0] taken;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_taken
|
||||
assign taken[i] = (execute_if.data.rs1_data[i][0] ^ not_pred);
|
||||
end
|
||||
|
||||
|
@ -131,7 +131,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
// wspawn
|
||||
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_wspawn_wmask
|
||||
assign wspawn_wmask[i] = (i < rs1_data[`NW_BITS:0]) && (i != execute_if.data.wid);
|
||||
end
|
||||
assign wspawn.valid = is_wspawn;
|
||||
|
@ -162,7 +162,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
assign warp_ctl_if.sjoin = sjoin_r;
|
||||
assign warp_ctl_if.barrier = barrier_r;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_commit_if
|
||||
assign commit_if.data.data[i] = `XLEN'(dvstack_ptr);
|
||||
end
|
||||
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Modified port of cast module from fpnew Libray
|
||||
// Modified port of cast module from fpnew Libray
|
||||
// reference: https://github.com/pulp-platform/fpnew
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
@ -22,7 +22,8 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
parameter LATENCY = 1,
|
||||
parameter INT_WIDTH = 32,
|
||||
parameter MAN_BITS = 23,
|
||||
parameter EXP_BITS = 8
|
||||
parameter EXP_BITS = 8,
|
||||
parameter OUT_REG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -35,10 +36,10 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
input wire is_signed,
|
||||
|
||||
input wire [31:0] dataa,
|
||||
output wire [31:0] result,
|
||||
output wire [31:0] result,
|
||||
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags
|
||||
);
|
||||
);
|
||||
// Constants
|
||||
localparam EXP_BIAS = 2**(EXP_BITS-1)-1;
|
||||
|
||||
|
@ -55,11 +56,11 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
localparam FMT_SHIFT_COMPENSATION = S_MAN_WIDTH - 1 - MAN_BITS;
|
||||
localparam NUM_FP_STICKY = 2 * S_MAN_WIDTH - MAN_BITS - 1; // removed mantissa, 1. and R
|
||||
localparam NUM_INT_STICKY = 2 * S_MAN_WIDTH - INT_WIDTH; // removed int and R
|
||||
|
||||
|
||||
// Input processing
|
||||
|
||||
fclass_t fclass;
|
||||
VX_fp_classifier #(
|
||||
|
||||
fclass_t fclass;
|
||||
VX_fp_classifier #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_classifier (
|
||||
|
@ -69,9 +70,9 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
);
|
||||
|
||||
wire [S_MAN_WIDTH-1:0] input_mant;
|
||||
wire [S_EXP_WIDTH-1:0] input_exp;
|
||||
wire [S_EXP_WIDTH-1:0] input_exp;
|
||||
wire input_sign;
|
||||
|
||||
|
||||
wire i2f_sign = dataa[INT_WIDTH-1];
|
||||
wire f2i_sign = dataa[INT_WIDTH-1] && is_signed;
|
||||
wire [S_MAN_WIDTH-1:0] f2i_mantissa = f2i_sign ? (-dataa) : dataa;
|
||||
|
@ -81,7 +82,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
assign input_sign = is_itof ? f2i_sign : i2f_sign;
|
||||
|
||||
// Pipeline stage0
|
||||
|
||||
|
||||
wire is_itof_s0;
|
||||
wire is_signed_s0;
|
||||
wire [2:0] rnd_mode_s0;
|
||||
|
@ -92,7 +93,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `INST_FRM_BITS + 1 + $bits(fclass_t) + 1 + S_EXP_WIDTH + S_MAN_WIDTH),
|
||||
.DEPTH (LATENCY > 2)
|
||||
.DEPTH (LATENCY > 1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -100,7 +101,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
.data_in ({is_itof, is_signed, frm, fclass, input_sign, input_exp, input_mant}),
|
||||
.data_out ({is_itof_s0, is_signed_s0, rnd_mode_s0, fclass_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0})
|
||||
);
|
||||
|
||||
|
||||
// Normalization
|
||||
|
||||
wire [LZC_RESULT_WIDTH-1:0] renorm_shamt_s0; // renormalization shift amount
|
||||
|
@ -113,12 +114,12 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
.data_out (renorm_shamt_s0),
|
||||
.valid_out (mant_is_nonzero_s0)
|
||||
);
|
||||
|
||||
|
||||
wire mant_is_zero_s0 = ~mant_is_nonzero_s0;
|
||||
|
||||
wire [S_MAN_WIDTH-1:0] input_mant_n_s0; // normalized input mantissa
|
||||
wire [S_MAN_WIDTH-1:0] input_mant_n_s0; // normalized input mantissa
|
||||
wire [S_EXP_WIDTH-1:0] input_exp_n_s0; // unbiased true exponent
|
||||
|
||||
|
||||
// Realign input mantissa, append zeroes if destination is wider
|
||||
assign input_mant_n_s0 = encoded_mant_s0 << renorm_shamt_s0;
|
||||
|
||||
|
@ -140,7 +141,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `INST_FRM_BITS + 1 + $bits(fclass_t) + 1 + 1 + S_MAN_WIDTH + S_EXP_WIDTH),
|
||||
.DEPTH (LATENCY > 1)
|
||||
.DEPTH (LATENCY > 2)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -169,30 +170,30 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
wire of_before_round_s1 = overflow;
|
||||
|
||||
// Pipeline stage2
|
||||
|
||||
|
||||
wire is_itof_s2;
|
||||
wire is_signed_s2;
|
||||
wire [2:0] rnd_mode_s2;
|
||||
fclass_t fclass_s2;
|
||||
fclass_t fclass_s2;
|
||||
wire mant_is_zero_s2;
|
||||
wire input_sign_s2;
|
||||
wire [2*S_MAN_WIDTH:0] destination_mant_s2;
|
||||
wire [EXP_BITS-1:0] final_exp_s2;
|
||||
wire of_before_round_s2;
|
||||
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + `INST_FRM_BITS + $bits(fclass_t) + 1 + 1 + (2*S_MAN_WIDTH+1) + EXP_BITS + 1),
|
||||
.DEPTH (LATENCY > 3)
|
||||
.DEPTH (LATENCY > 0)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({is_itof_s1, is_signed_s1, rnd_mode_s1, fclass_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}),
|
||||
.data_out ({is_itof_s2, is_signed_s2, rnd_mode_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2})
|
||||
);
|
||||
|
||||
);
|
||||
|
||||
// Rouding and classification
|
||||
|
||||
|
||||
wire [MAN_BITS-1:0] final_mant_s2; // mantissa after adjustments
|
||||
wire [INT_WIDTH-1:0] final_int_s2; // integer shifted in position
|
||||
wire [1:0] f2i_round_sticky_bits_s2, i2f_round_sticky_bits_s2;
|
||||
|
@ -237,20 +238,20 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
wire is_itof_s3;
|
||||
wire is_signed_s3;
|
||||
fclass_t fclass_s3;
|
||||
fclass_t fclass_s3;
|
||||
wire mant_is_zero_s3;
|
||||
wire input_sign_s3;
|
||||
wire rounded_sign_s3;
|
||||
wire [INT_WIDTH-1:0] rounded_abs_s3;
|
||||
wire of_before_round_s3;
|
||||
wire of_before_round_s3;
|
||||
wire f2i_round_has_sticky_s3;
|
||||
wire i2f_round_has_sticky_s3;
|
||||
|
||||
`UNUSED_VAR (fclass_s3)
|
||||
`UNUSED_VAR (fclass_s3)
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + $bits(fclass_t) + 1 + 1 + 32 + 1 + 1 + 1 + 1),
|
||||
.DEPTH (LATENCY > 4)
|
||||
.DEPTH (LATENCY > 3)
|
||||
) pipe_reg3 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -258,7 +259,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
.data_in ({is_itof_s2, is_signed_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2, f2i_round_has_sticky_s2, i2f_round_has_sticky_s2}),
|
||||
.data_out ({is_itof_s3, is_signed_s3, fclass_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3, f2i_round_has_sticky_s3, i2f_round_has_sticky_s3})
|
||||
);
|
||||
|
||||
|
||||
// Assemble regular result, nan box short ones. Int zeroes need to be detected
|
||||
wire [INT_WIDTH-1:0] fmt_result_s3 = mant_is_zero_s3 ? 0 : {rounded_sign_s3, rounded_abs_s3[EXP_BITS+MAN_BITS-1:0]};
|
||||
|
||||
|
@ -278,18 +279,18 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
f2i_special_result_s3[INT_WIDTH-2:0] = 2**(INT_WIDTH-1) - 1; // alone yields 2**(31)-1
|
||||
f2i_special_result_s3[INT_WIDTH-1] = ~is_signed_s3; // for unsigned casts yields 2**31
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
|
||||
wire f2i_result_is_special_s3 = fclass_s3.is_nan
|
||||
wire f2i_result_is_special_s3 = fclass_s3.is_nan
|
||||
| fclass_s3.is_inf
|
||||
| of_before_round_s3
|
||||
| (input_sign_s3 & ~is_signed_s3 & ~rounded_int_res_zero_s3);
|
||||
|
||||
|
||||
fflags_t f2i_special_status_s3;
|
||||
fflags_t i2f_status_s3, f2i_status_s3;
|
||||
fflags_t tmp_fflags_s3;
|
||||
|
||||
|
||||
// All integer special cases are invalid
|
||||
assign f2i_special_status_s3 = {1'b1, 4'h0};
|
||||
|
||||
|
@ -306,7 +307,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (32 + `FP_FLAGS_BITS),
|
||||
.DEPTH (LATENCY > 0)
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg4 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Modified port of noncomp module from fpnew Libray
|
||||
// Modified port of noncomp module from fpnew Libray
|
||||
// reference: https://github.com/pulp-platform/fpnew
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
@ -19,9 +19,10 @@
|
|||
`ifdef FPU_DSP
|
||||
|
||||
module VX_fncp_unit import VX_fpu_pkg::*; #(
|
||||
parameter LATENCY = 2,
|
||||
parameter LATENCY = 1,
|
||||
parameter EXP_BITS = 8,
|
||||
parameter MAN_BITS = 23
|
||||
parameter MAN_BITS = 23,
|
||||
parameter OUT_REG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -33,10 +34,10 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
input wire [31:0] dataa,
|
||||
input wire [31:0] datab,
|
||||
output wire [31:0] result,
|
||||
output wire [31:0] result,
|
||||
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags
|
||||
);
|
||||
);
|
||||
localparam NEG_INF = 32'h00000001,
|
||||
NEG_NORM = 32'h00000002,
|
||||
NEG_SUBNORM = 32'h00000004,
|
||||
|
@ -55,15 +56,15 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
wire a_smaller, ab_equal;
|
||||
|
||||
// Setup
|
||||
assign a_sign = dataa[31];
|
||||
assign a_sign = dataa[31];
|
||||
assign a_exponent = dataa[30:23];
|
||||
assign a_mantissa = dataa[22:0];
|
||||
|
||||
assign b_sign = datab[31];
|
||||
assign b_sign = datab[31];
|
||||
assign b_exponent = datab[30:23];
|
||||
assign b_mantissa = datab[22:0];
|
||||
|
||||
VX_fp_classifier #(
|
||||
VX_fp_classifier #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class_a (
|
||||
|
@ -72,7 +73,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
.clss_o (a_fclass)
|
||||
);
|
||||
|
||||
VX_fp_classifier #(
|
||||
VX_fp_classifier #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class_b (
|
||||
|
@ -82,7 +83,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
);
|
||||
|
||||
assign a_smaller = (dataa < datab) ^ (a_sign || b_sign);
|
||||
assign ab_equal = (dataa == datab)
|
||||
assign ab_equal = (dataa == datab)
|
||||
|| (a_fclass.is_zero && b_fclass.is_zero); // +0 == -0
|
||||
|
||||
// Pipeline stage0
|
||||
|
@ -101,54 +102,54 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (4 + 2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fclass_t) + 1 + 1),
|
||||
.DEPTH (LATENCY > 1)
|
||||
.DEPTH (LATENCY > 0)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({op_mod, dataa, datab, a_sign, b_sign, a_exponent, a_mantissa, a_fclass, b_fclass, a_smaller, ab_equal}),
|
||||
.data_out ({op_mod_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_fclass_s0, b_fclass_s0, a_smaller_s0, ab_equal_s0})
|
||||
);
|
||||
);
|
||||
|
||||
// FCLASS
|
||||
reg [31:0] fclass_mask_s0; // generate a 10-bit mask for integer reg
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
if (a_fclass_s0.is_normal) begin
|
||||
fclass_mask_s0 = a_sign_s0 ? NEG_NORM : POS_NORM;
|
||||
end
|
||||
end
|
||||
else if (a_fclass_s0.is_inf) begin
|
||||
fclass_mask_s0 = a_sign_s0 ? NEG_INF : POS_INF;
|
||||
end
|
||||
end
|
||||
else if (a_fclass_s0.is_zero) begin
|
||||
fclass_mask_s0 = a_sign_s0 ? NEG_ZERO : POS_ZERO;
|
||||
end
|
||||
end
|
||||
else if (a_fclass_s0.is_subnormal) begin
|
||||
fclass_mask_s0 = a_sign_s0 ? NEG_SUBNORM : POS_SUBNORM;
|
||||
end
|
||||
end
|
||||
else if (a_fclass_s0.is_nan) begin
|
||||
fclass_mask_s0 = {22'h0, a_fclass_s0.is_quiet, a_fclass_s0.is_signaling, 8'h0};
|
||||
end
|
||||
else begin
|
||||
end
|
||||
else begin
|
||||
fclass_mask_s0 = QUT_NAN;
|
||||
end
|
||||
end
|
||||
|
||||
// Min/Max
|
||||
// Min/Max
|
||||
reg [31:0] fminmax_res_s0;
|
||||
always @(*) begin
|
||||
if (a_fclass_s0.is_nan && b_fclass_s0.is_nan)
|
||||
fminmax_res_s0 = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
|
||||
else if (a_fclass_s0.is_nan)
|
||||
else if (a_fclass_s0.is_nan)
|
||||
fminmax_res_s0 = datab_s0;
|
||||
else if (b_fclass_s0.is_nan)
|
||||
else if (b_fclass_s0.is_nan)
|
||||
fminmax_res_s0 = dataa_s0;
|
||||
else begin
|
||||
else begin
|
||||
// FMIN, FMAX
|
||||
fminmax_res_s0 = (op_mod_s0[0] ^ a_smaller_s0) ? dataa_s0 : datab_s0;
|
||||
end
|
||||
end
|
||||
|
||||
// Sign injection
|
||||
// Sign injection
|
||||
reg [31:0] fsgnj_res_s0; // result of sign injection
|
||||
always @(*) begin
|
||||
case (op_mod_s0[1:0])
|
||||
|
@ -158,12 +159,12 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
endcase
|
||||
end
|
||||
|
||||
// Comparison
|
||||
// Comparison
|
||||
reg fcmp_res_s0; // result of comparison
|
||||
reg fcmp_fflags_NV_s0; // comparison fflags
|
||||
always @(*) begin
|
||||
case (op_mod_s0[1:0])
|
||||
0: begin // LE
|
||||
0: begin // LE
|
||||
if (a_fclass_s0.is_nan || b_fclass_s0.is_nan) begin
|
||||
fcmp_res_s0 = 0;
|
||||
fcmp_fflags_NV_s0 = 1;
|
||||
|
@ -179,12 +180,12 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
end else begin
|
||||
fcmp_res_s0 = (a_smaller_s0 & ~ab_equal_s0);
|
||||
fcmp_fflags_NV_s0 = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
2: begin // EQ
|
||||
if (a_fclass_s0.is_nan || b_fclass_s0.is_nan) begin
|
||||
fcmp_res_s0 = 0;
|
||||
fcmp_fflags_NV_s0 = a_fclass_s0.is_signaling | b_fclass_s0.is_signaling;
|
||||
fcmp_fflags_NV_s0 = a_fclass_s0.is_signaling | b_fclass_s0.is_signaling;
|
||||
end else begin
|
||||
fcmp_res_s0 = ab_equal_s0;
|
||||
fcmp_fflags_NV_s0 = 0;
|
||||
|
@ -192,7 +193,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
end
|
||||
default: begin
|
||||
fcmp_res_s0 = 'x;
|
||||
fcmp_fflags_NV_s0 = 'x;
|
||||
fcmp_fflags_NV_s0 = 'x;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
@ -216,7 +217,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
// FMV
|
||||
result_s0 = dataa_s0;
|
||||
fflags_NV_s0 = 0;
|
||||
end
|
||||
end
|
||||
6,7: begin
|
||||
// MIN/MAX
|
||||
result_s0 = fminmax_res_s0;
|
||||
|
@ -229,7 +230,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (32 + 1),
|
||||
.DEPTH (LATENCY > 0)
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -46,56 +46,68 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
localparam DATAW = 32 + `INST_FRM_BITS + 1 + 1;
|
||||
|
||||
wire [NUM_LANES-1:0][DATAW-1:0] data_in;
|
||||
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
fflags_t [NUM_LANES-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][31:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in
|
||||
assign data_in[i][0 +: 32] = dataa[i];
|
||||
assign data_in[i][32 +: `INST_FRM_BITS] = frm;
|
||||
assign data_in[i][32 + `INST_FRM_BITS +: 1] = is_itof;
|
||||
assign data_in[i][32 + `INST_FRM_BITS + 1 +: 1] = is_signed;
|
||||
end
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FCVT),
|
||||
.DATA_IN_WIDTH(32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.DATA_IN_WIDTH (DATAW),
|
||||
.DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
.OUT_BUF (2)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.data_in (dataa),
|
||||
.data_in (data_in),
|
||||
.tag_in ({mask_in, tag_in}),
|
||||
.ready_in (ready_in),
|
||||
.pe_enable (pe_enable),
|
||||
.pe_data_in (pe_data_in),
|
||||
.pe_data_out(pe_data_out),
|
||||
.pe_data_out(pe_data_in),
|
||||
.pe_data_in (pe_data_out),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.tag_out ({mask_out, tag_out}),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`UNUSED_VAR (pe_data_in)
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result
|
||||
assign result[i] = data_out[i][0 +: 32];
|
||||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fcvt_units
|
||||
VX_fcvt_unit #(
|
||||
.LATENCY (`LATENCY_FCVT)
|
||||
.LATENCY (`LATENCY_FCVT),
|
||||
.OUT_REG (1)
|
||||
) fcvt_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (pe_enable),
|
||||
.frm (frm),
|
||||
.is_itof (is_itof),
|
||||
.is_signed (is_signed),
|
||||
.frm (pe_data_in[0][32 +: `INST_FRM_BITS]),
|
||||
.is_itof (pe_data_in[0][32 + `INST_FRM_BITS +: 1]),
|
||||
.is_signed (pe_data_in[0][32 + `INST_FRM_BITS + 1 +: 1]),
|
||||
.dataa (pe_data_in[i][0 +: 32]),
|
||||
.result (pe_data_out[i][0 +: 32]),
|
||||
.fflags (pe_data_out[i][32 +: `FP_FLAGS_BITS])
|
||||
|
|
|
@ -44,31 +44,33 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
output wire valid_out,
|
||||
input wire ready_out
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
localparam DATAW = 2 * 32 + `INST_FRM_BITS;
|
||||
|
||||
wire [NUM_LANES-1:0][DATAW-1:0] data_in;
|
||||
|
||||
wire [NUM_LANES-1:0][2*32-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in
|
||||
assign data_in[i][0 +: 32] = dataa[i];
|
||||
assign data_in[i][32 +: 32] = datab[i];
|
||||
assign data_in[i][64 +: `INST_FRM_BITS] = frm;
|
||||
end
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FDIV),
|
||||
.DATA_IN_WIDTH(2*32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.DATA_IN_WIDTH (DATAW),
|
||||
.DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
.OUT_BUF (2)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -77,15 +79,17 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
.tag_in ({mask_in, tag_in}),
|
||||
.ready_in (ready_in),
|
||||
.pe_enable (pe_enable),
|
||||
.pe_data_in (pe_data_in),
|
||||
.pe_data_out(pe_data_out),
|
||||
.pe_data_out(pe_data_in),
|
||||
.pe_data_in (pe_data_out),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.tag_out ({mask_out, tag_out}),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`UNUSED_VAR (pe_data_in)
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result
|
||||
assign result[i] = data_out[i][0 +: 32];
|
||||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
end
|
||||
|
@ -94,7 +98,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
|
||||
`ifdef QUARTUS
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs
|
||||
acl_fdiv fdiv (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
@ -112,7 +116,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
|
||||
`elsif VIVADO
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs
|
||||
wire [3:0] tuser;
|
||||
xil_fdiv fdiv (
|
||||
.aclk (clk),
|
||||
|
@ -134,7 +138,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs
|
||||
reg [63:0] r;
|
||||
`UNUSED_VAR (r)
|
||||
fflags_t f;
|
||||
|
@ -143,9 +147,9 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
dpi_fdiv (
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]},
|
||||
frm,
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]}, // a
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]}, // b
|
||||
pe_data_in[0][64 +: `INST_FRM_BITS], // frm
|
||||
r,
|
||||
f
|
||||
);
|
||||
|
|
|
@ -76,7 +76,6 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
|
||||
reg is_fadd, is_fsub, is_fmul, is_fmadd, is_fmsub, is_fnmadd, is_fnmsub;
|
||||
reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f;
|
||||
reg dst_fmt, int_fmt;
|
||||
|
||||
reg [NUM_LANES-1:0][63:0] operands [3];
|
||||
|
||||
|
@ -88,7 +87,8 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
`UNUSED_VAR (fmt)
|
||||
wire f_fmt = fmt[0];
|
||||
wire i_fmt = fmt[1];
|
||||
|
||||
always @(*) begin
|
||||
is_fadd = 0;
|
||||
|
@ -106,25 +106,11 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
is_ftou = 0;
|
||||
is_f2f = 0;
|
||||
|
||||
dst_fmt = 0;
|
||||
int_fmt = 0;
|
||||
|
||||
`ifdef FLEN_64
|
||||
dst_fmt = fmt[0];
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
int_fmt = fmt[1];
|
||||
`endif
|
||||
|
||||
case (op_type)
|
||||
`INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end
|
||||
`INST_FPU_SUB: begin core_select = FPU_FMA; is_fsub = 1; end
|
||||
`INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = ~i_fmt; is_fsub = i_fmt; end
|
||||
`INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = ~i_fmt; is_fmsub = i_fmt; end
|
||||
`INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = ~i_fmt; is_fnmsub = i_fmt; end
|
||||
`INST_FPU_MUL: begin core_select = FPU_FMA; is_fmul = 1; end
|
||||
`INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = 1; end
|
||||
`INST_FPU_MSUB: begin core_select = FPU_FMA; is_fmsub = 1; end
|
||||
`INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = 1; end
|
||||
`INST_FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end
|
||||
`INST_FPU_DIV: begin core_select = FPU_DIVSQRT; is_div = 1; end
|
||||
`INST_FPU_SQRT: begin core_select = FPU_DIVSQRT; end
|
||||
`INST_FPU_CMP: begin core_select = FPU_NCP; is_fcmp = 1; end
|
||||
|
@ -138,7 +124,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
end
|
||||
|
||||
generate
|
||||
begin : fma
|
||||
begin : g_fma
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fma;
|
||||
reg [NUM_LANES-1:0][63:0] result_fadd;
|
||||
|
@ -164,13 +150,13 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_fadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]);
|
||||
dpi_fsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]);
|
||||
dpi_fmul (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fmul[i], fflags_fmul[i]);
|
||||
dpi_fmadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmadd[i], fflags_fmadd[i]);
|
||||
dpi_fmsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmsub[i], fflags_fmsub[i]);
|
||||
dpi_fnmadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmadd[i], fflags_fnmadd[i]);
|
||||
dpi_fnmsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmsub[i], fflags_fnmsub[i]);
|
||||
dpi_fadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]);
|
||||
dpi_fsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]);
|
||||
dpi_fmul (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fmul[i], fflags_fmul[i]);
|
||||
dpi_fmadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmadd[i], fflags_fmadd[i]);
|
||||
dpi_fmsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmsub[i], fflags_fmsub[i]);
|
||||
dpi_fnmadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmadd[i], fflags_fnmadd[i]);
|
||||
dpi_fnmsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmsub[i], fflags_fnmsub[i]);
|
||||
|
||||
result_fma[i] = is_fadd ? result_fadd[i][`XLEN-1:0] :
|
||||
is_fsub ? result_fsub[i][`XLEN-1:0] :
|
||||
|
@ -214,7 +200,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
endgenerate
|
||||
|
||||
generate
|
||||
begin : fdiv
|
||||
begin : g_fdiv
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fdiv_r;
|
||||
reg [NUM_LANES-1:0][63:0] result_fdiv;
|
||||
|
@ -226,7 +212,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_fdiv (fdiv_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]);
|
||||
dpi_fdiv (fdiv_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]);
|
||||
result_fdiv_r[i] = result_fdiv[i][`XLEN-1:0];
|
||||
end
|
||||
end
|
||||
|
@ -253,7 +239,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
endgenerate
|
||||
|
||||
generate
|
||||
begin : fsqrt
|
||||
begin : g_fsqrt
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fsqrt_r;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsqrt;
|
||||
|
@ -265,7 +251,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_fsqrt (fsqrt_fire, int'(dst_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]);
|
||||
dpi_fsqrt (fsqrt_fire, int'(f_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]);
|
||||
result_fsqrt_r[i] = result_fsqrt[i][`XLEN-1:0];
|
||||
end
|
||||
end
|
||||
|
@ -292,7 +278,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
endgenerate
|
||||
|
||||
generate
|
||||
begin : fcvt
|
||||
begin : g_fcvt
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fcvt;
|
||||
reg [NUM_LANES-1:0][63:0] result_itof;
|
||||
|
@ -313,11 +299,11 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_itof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]);
|
||||
dpi_utof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]);
|
||||
dpi_ftoi (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]);
|
||||
dpi_ftou (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]);
|
||||
dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]);
|
||||
dpi_itof (fcvt_fire, int'(f_fmt), int'(i_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]);
|
||||
dpi_utof (fcvt_fire, int'(f_fmt), int'(i_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]);
|
||||
dpi_ftoi (fcvt_fire, int'(i_fmt), int'(f_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]);
|
||||
dpi_ftou (fcvt_fire, int'(i_fmt), int'(f_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]);
|
||||
dpi_f2f (fcvt_fire, int'(f_fmt), operands[0][i], result_f2f[i]);
|
||||
|
||||
result_fcvt[i] = is_itof ? result_itof[i][`XLEN-1:0] :
|
||||
is_utof ? result_utof[i][`XLEN-1:0] :
|
||||
|
@ -356,7 +342,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
endgenerate
|
||||
|
||||
generate
|
||||
begin : fncp
|
||||
begin : g_fncp
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fncp;
|
||||
reg [NUM_LANES-1:0][63:0] result_fclss;
|
||||
|
@ -384,17 +370,17 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_fclss (fncp_fire, int'(dst_fmt), operands[0][i], result_fclss[i]);
|
||||
dpi_fle (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]);
|
||||
dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]);
|
||||
dpi_feq (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]);
|
||||
dpi_fmin (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]);
|
||||
dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]);
|
||||
dpi_fsgnj (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnj[i]);
|
||||
dpi_fsgnjn (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]);
|
||||
dpi_fsgnjx (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]);
|
||||
result_fmvx[i] = dst_fmt ? operands[0][i] : 64'($signed(operands[0][i][31:0])); // sign-extension
|
||||
result_fmvf[i] = dst_fmt ? operands[0][i] : (operands[0][i] | 64'hffffffff00000000); // nan-boxing
|
||||
dpi_fclss (fncp_fire, int'(f_fmt), operands[0][i], result_fclss[i]);
|
||||
dpi_fle (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]);
|
||||
dpi_flt (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]);
|
||||
dpi_feq (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]);
|
||||
dpi_fmin (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]);
|
||||
dpi_fmax (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]);
|
||||
dpi_fsgnj (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnj[i]);
|
||||
dpi_fsgnjn (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]);
|
||||
dpi_fsgnjx (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]);
|
||||
result_fmvx[i] = f_fmt ? operands[0][i] : 64'($signed(operands[0][i][31:0])); // sign-extension
|
||||
result_fmvf[i] = f_fmt ? operands[0][i] : (operands[0][i] | 64'hffffffff00000000); // nan-boxing
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -444,7 +430,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.ARBITER ("P"),
|
||||
.OUT_BUF (0)
|
||||
) div_sqrt_arb (
|
||||
.clk (clk),
|
||||
|
@ -463,14 +449,14 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
|
||||
wire [NUM_FPC-1:0][RSP_DATAW-1:0] per_core_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_FPC; ++i) begin
|
||||
for (genvar i = 0; i < NUM_FPC; ++i) begin : g_per_core_data_out
|
||||
assign per_core_data_out[i] = {per_core_result[i], per_core_has_fflags[i], per_core_fflags[i], per_core_tag_out[i]};
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_FPC),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("F"),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
|
|
|
@ -51,68 +51,39 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
localparam FPU_DIVSQRT = 1;
|
||||
localparam FPU_CVT = 2;
|
||||
localparam FPU_NCP = 3;
|
||||
localparam NUM_FPC = 4;
|
||||
localparam FPC_BITS = `LOG2UP(NUM_FPC);
|
||||
localparam NUM_FPCORES = 4;
|
||||
localparam FPCORES_BITS = `LOG2UP(NUM_FPCORES);
|
||||
|
||||
localparam REQ_DATAW = NUM_LANES + TAG_WIDTH + `INST_FPU_BITS + `INST_FMT_BITS + `INST_FRM_BITS + 3 * (NUM_LANES * 32);
|
||||
localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH;
|
||||
|
||||
`UNUSED_VAR (fmt)
|
||||
|
||||
wire [NUM_FPC-1:0] per_core_ready_in;
|
||||
wire [NUM_FPC-1:0][NUM_LANES-1:0][31:0] per_core_result;
|
||||
wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out;
|
||||
wire [NUM_FPC-1:0] per_core_ready_out;
|
||||
wire [NUM_FPC-1:0] per_core_valid_out;
|
||||
wire [NUM_FPC-1:0] per_core_has_fflags;
|
||||
fflags_t [NUM_FPC-1:0] per_core_fflags;
|
||||
wire [NUM_FPCORES-1:0] per_core_valid_in;
|
||||
wire [NUM_FPCORES-1:0][REQ_DATAW-1:0] per_core_data_in;
|
||||
wire [NUM_FPCORES-1:0] per_core_ready_in;
|
||||
|
||||
wire div_ready_in, sqrt_ready_in;
|
||||
wire [NUM_LANES-1:0][31:0] div_result, sqrt_result;
|
||||
wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out;
|
||||
wire div_ready_out, sqrt_ready_out;
|
||||
wire div_valid_out, sqrt_valid_out;
|
||||
wire div_has_fflags, sqrt_has_fflags;
|
||||
fflags_t div_fflags, sqrt_fflags;
|
||||
wire [NUM_FPCORES-1:0][NUM_LANES-1:0] per_core_mask_in;
|
||||
wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_in;
|
||||
wire [NUM_FPCORES-1:0][`INST_FPU_BITS-1:0] per_core_op_type;
|
||||
wire [NUM_FPCORES-1:0][`INST_FMT_BITS-1:0] per_core_fmt;
|
||||
wire [NUM_FPCORES-1:0][`INST_FRM_BITS-1:0] per_core_frm;
|
||||
wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_dataa;
|
||||
wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datab;
|
||||
wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datac;
|
||||
|
||||
reg [FPC_BITS-1:0] core_select;
|
||||
reg is_madd, is_sub, is_neg, is_div, is_itof, is_signed;
|
||||
|
||||
always @(*) begin
|
||||
is_madd = 0;
|
||||
is_sub = 0;
|
||||
is_neg = 0;
|
||||
is_div = 0;
|
||||
is_itof = 0;
|
||||
is_signed = 0;
|
||||
case (op_type)
|
||||
`INST_FPU_ADD: begin core_select = FPU_FMA; end
|
||||
`INST_FPU_SUB: begin core_select = FPU_FMA; is_sub = 1; end
|
||||
`INST_FPU_MUL: begin core_select = FPU_FMA; is_neg = 1; end
|
||||
`INST_FPU_MADD: begin core_select = FPU_FMA; is_madd = 1; end
|
||||
`INST_FPU_MSUB: begin core_select = FPU_FMA; is_madd = 1; is_sub = 1; end
|
||||
`INST_FPU_NMADD: begin core_select = FPU_FMA; is_madd = 1; is_neg = 1; end
|
||||
`INST_FPU_NMSUB: begin core_select = FPU_FMA; is_madd = 1; is_sub = 1; is_neg = 1; end
|
||||
`INST_FPU_DIV: begin core_select = FPU_DIVSQRT; is_div = 1; end
|
||||
`INST_FPU_SQRT: begin core_select = FPU_DIVSQRT; end
|
||||
`INST_FPU_F2I: begin core_select = FPU_CVT; is_signed = 1; end
|
||||
`INST_FPU_F2U: begin core_select = FPU_CVT; end
|
||||
`INST_FPU_I2F: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end
|
||||
`INST_FPU_U2F: begin core_select = FPU_CVT; is_itof = 1; end
|
||||
default: begin core_select = FPU_NCP; end
|
||||
endcase
|
||||
end
|
||||
|
||||
`RESET_RELAY (fma_reset, reset);
|
||||
`RESET_RELAY (div_reset, reset);
|
||||
`RESET_RELAY (sqrt_reset, reset);
|
||||
`RESET_RELAY (cvt_reset, reset);
|
||||
`RESET_RELAY (ncp_reset, reset);
|
||||
wire [NUM_FPCORES-1:0] per_core_valid_out;
|
||||
wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_result;
|
||||
wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_out;
|
||||
wire [NUM_FPCORES-1:0] per_core_has_fflags;
|
||||
fflags_t [NUM_FPCORES-1:0] per_core_fflags;
|
||||
wire [NUM_FPCORES-1:0] per_core_ready_out;
|
||||
|
||||
wire [NUM_LANES-1:0][31:0] dataa_s;
|
||||
wire [NUM_LANES-1:0][31:0] datab_s;
|
||||
wire [NUM_LANES-1:0][31:0] datac_s;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data
|
||||
assign dataa_s[i] = dataa[i][31:0];
|
||||
assign datab_s[i] = datab[i][31:0];
|
||||
assign datac_s[i] = datac[i][31:0];
|
||||
|
@ -122,23 +93,60 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
`UNUSED_VAR (datab)
|
||||
`UNUSED_VAR (datac)
|
||||
|
||||
// Decode fpu core type
|
||||
wire [FPCORES_BITS-1:0] core_select = op_type[3:2];
|
||||
|
||||
VX_stream_switch #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.NUM_OUTPUTS (NUM_FPCORES)
|
||||
) req_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel_in (core_select),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.data_in ({mask_in, tag_in, fmt, frm, dataa_s, datab_s, datac_s, op_type}),
|
||||
.data_out (per_core_data_in),
|
||||
.valid_out (per_core_valid_in),
|
||||
.ready_out (per_core_ready_in)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_FPCORES; ++i) begin : g_per_core_data_in
|
||||
assign {
|
||||
per_core_mask_in[i],
|
||||
per_core_tag_in[i],
|
||||
per_core_fmt[i],
|
||||
per_core_frm[i],
|
||||
per_core_dataa[i],
|
||||
per_core_datab[i],
|
||||
per_core_datac[i],
|
||||
per_core_op_type[i]
|
||||
} = per_core_data_in[i];
|
||||
end
|
||||
|
||||
// FMA core ///////////////////////////////////////////////////////////////
|
||||
|
||||
wire is_madd = per_core_op_type[FPU_FMA][1];
|
||||
wire is_neg = per_core_op_type[FPU_FMA][0];
|
||||
wire is_sub = per_core_fmt[FPU_FMA][1];
|
||||
|
||||
VX_fpu_fma #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) fpu_fma (
|
||||
.clk (clk),
|
||||
.reset (fma_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_FMA)),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_valid_in[FPU_FMA]),
|
||||
.ready_in (per_core_ready_in[FPU_FMA]),
|
||||
.mask_in (mask_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.mask_in (per_core_mask_in[FPU_FMA]),
|
||||
.tag_in (per_core_tag_in[FPU_FMA]),
|
||||
.frm (per_core_frm[FPU_FMA]),
|
||||
.is_madd (is_madd),
|
||||
.is_sub (is_sub),
|
||||
.is_neg (is_neg),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.datac (datac_s),
|
||||
.dataa (per_core_dataa[FPU_FMA]),
|
||||
.datab (per_core_datab[FPU_FMA]),
|
||||
.datac (per_core_datac[FPU_FMA]),
|
||||
.has_fflags (per_core_has_fflags[FPU_FMA]),
|
||||
.fflags (per_core_fflags[FPU_FMA]),
|
||||
.result (per_core_result[FPU_FMA]),
|
||||
|
@ -147,25 +155,99 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
.valid_out (per_core_valid_out[FPU_FMA])
|
||||
);
|
||||
|
||||
// Div/Sqrt cores /////////////////////////////////////////////////////////
|
||||
|
||||
wire [1:0] div_sqrt_valid_in;
|
||||
wire [1:0][REQ_DATAW-1:0] div_sqrt_data_in;
|
||||
wire [1:0] div_sqrt_ready_in;
|
||||
|
||||
wire [1:0][NUM_LANES-1:0] div_sqrt_mask_in;
|
||||
wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_in;
|
||||
wire [1:0][`INST_FPU_BITS-1:0] div_sqrt_op_type;
|
||||
wire [1:0][`INST_FMT_BITS-1:0] div_sqrt_fmt;
|
||||
wire [1:0][`INST_FRM_BITS-1:0] div_sqrt_frm;
|
||||
wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_dataa;
|
||||
wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_datab;
|
||||
wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_datac;
|
||||
|
||||
wire [1:0] div_sqrt_valid_out;
|
||||
wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_result;
|
||||
wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out;
|
||||
wire [1:0] div_sqrt_has_fflags;
|
||||
fflags_t [1:0] div_sqrt_fflags;
|
||||
wire [1:0] div_sqrt_ready_out;
|
||||
|
||||
wire div_sqrt_valid_tmp_in;
|
||||
wire [REQ_DATAW-1:0] div_sqrt_data_tmp_in;
|
||||
wire div_sqrt_ready_tmp_in;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) div_sqrt_req_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_valid_in[FPU_DIVSQRT]),
|
||||
.ready_in (per_core_ready_in[FPU_DIVSQRT]),
|
||||
.data_in (per_core_data_in[FPU_DIVSQRT]),
|
||||
.data_out (div_sqrt_data_tmp_in),
|
||||
.valid_out (div_sqrt_valid_tmp_in),
|
||||
.ready_out (div_sqrt_ready_tmp_in)
|
||||
);
|
||||
|
||||
wire is_sqrt = div_sqrt_data_tmp_in[0]; // op_type[0]
|
||||
|
||||
VX_stream_switch #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.NUM_OUTPUTS (2)
|
||||
) div_sqrt_req_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel_in (is_sqrt),
|
||||
.valid_in (div_sqrt_valid_tmp_in),
|
||||
.ready_in (div_sqrt_ready_tmp_in),
|
||||
.data_in (div_sqrt_data_tmp_in),
|
||||
.data_out (div_sqrt_data_in),
|
||||
.valid_out (div_sqrt_valid_in),
|
||||
.ready_out (div_sqrt_ready_in)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < 2; ++i) begin : g_div_sqrt_data_in
|
||||
assign {
|
||||
div_sqrt_mask_in[i],
|
||||
div_sqrt_tag_in[i],
|
||||
div_sqrt_fmt[i],
|
||||
div_sqrt_frm[i],
|
||||
div_sqrt_dataa[i],
|
||||
div_sqrt_datab[i],
|
||||
div_sqrt_datac[i],
|
||||
div_sqrt_op_type[i]
|
||||
} = div_sqrt_data_in[i];
|
||||
end
|
||||
|
||||
`UNUSED_VAR (div_sqrt_op_type)
|
||||
`UNUSED_VAR (div_sqrt_fmt)
|
||||
`UNUSED_VAR (div_sqrt_datab)
|
||||
`UNUSED_VAR (div_sqrt_datac)
|
||||
|
||||
VX_fpu_div #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) fpu_div (
|
||||
.clk (clk),
|
||||
.reset (div_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_DIVSQRT) && is_div),
|
||||
.ready_in (div_ready_in),
|
||||
.mask_in (mask_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.has_fflags (div_has_fflags),
|
||||
.fflags (div_fflags),
|
||||
.result (div_result),
|
||||
.tag_out (div_tag_out),
|
||||
.valid_out (div_valid_out),
|
||||
.ready_out (div_ready_out)
|
||||
.reset (reset),
|
||||
.valid_in (div_sqrt_valid_in[0]),
|
||||
.ready_in (div_sqrt_ready_in[0]),
|
||||
.mask_in (div_sqrt_mask_in[0]),
|
||||
.tag_in (div_sqrt_tag_in[0]),
|
||||
.frm (div_sqrt_frm[0]),
|
||||
.dataa (div_sqrt_dataa[0]),
|
||||
.datab (div_sqrt_datab[0]),
|
||||
.has_fflags (div_sqrt_has_fflags[0]),
|
||||
.fflags (div_sqrt_fflags[0]),
|
||||
.result (div_sqrt_result[0]),
|
||||
.tag_out (div_sqrt_tag_out[0]),
|
||||
.valid_out (div_sqrt_valid_out[0]),
|
||||
.ready_out (div_sqrt_ready_out[0])
|
||||
);
|
||||
|
||||
VX_fpu_sqrt #(
|
||||
|
@ -173,92 +255,42 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) fpu_sqrt (
|
||||
.clk (clk),
|
||||
.reset (sqrt_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_DIVSQRT) && ~is_div),
|
||||
.ready_in (sqrt_ready_in),
|
||||
.mask_in (mask_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.has_fflags (sqrt_has_fflags),
|
||||
.fflags (sqrt_fflags),
|
||||
.result (sqrt_result),
|
||||
.tag_out (sqrt_tag_out),
|
||||
.valid_out (sqrt_valid_out),
|
||||
.ready_out (sqrt_ready_out)
|
||||
.reset (reset),
|
||||
.valid_in (div_sqrt_valid_in[1]),
|
||||
.ready_in (div_sqrt_ready_in[1]),
|
||||
.mask_in (div_sqrt_mask_in[1]),
|
||||
.tag_in (div_sqrt_tag_in[1]),
|
||||
.frm (div_sqrt_frm[1]),
|
||||
.dataa (div_sqrt_dataa[1]),
|
||||
.has_fflags (div_sqrt_has_fflags[1]),
|
||||
.fflags (div_sqrt_fflags[1]),
|
||||
.result (div_sqrt_result[1]),
|
||||
.tag_out (div_sqrt_tag_out[1]),
|
||||
.valid_out (div_sqrt_valid_out[1]),
|
||||
.ready_out (div_sqrt_ready_out[1])
|
||||
);
|
||||
|
||||
wire cvt_ret_int_in = ~is_itof;
|
||||
wire cvt_ret_int_out;
|
||||
|
||||
VX_fpu_cvt #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH+1)
|
||||
) fpu_cvt (
|
||||
.clk (clk),
|
||||
.reset (cvt_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_CVT)),
|
||||
.ready_in (per_core_ready_in[FPU_CVT]),
|
||||
.mask_in (mask_in),
|
||||
.tag_in ({cvt_ret_int_in, tag_in}),
|
||||
.frm (frm),
|
||||
.is_itof (is_itof),
|
||||
.is_signed (is_signed),
|
||||
.dataa (dataa_s),
|
||||
.has_fflags (per_core_has_fflags[FPU_CVT]),
|
||||
.fflags (per_core_fflags[FPU_CVT]),
|
||||
.result (per_core_result[FPU_CVT]),
|
||||
.tag_out ({cvt_ret_int_out, per_core_tag_out[FPU_CVT]}),
|
||||
.valid_out (per_core_valid_out[FPU_CVT]),
|
||||
.ready_out (per_core_ready_out[FPU_CVT])
|
||||
);
|
||||
|
||||
wire ncp_ret_int_in = (op_type == `INST_FPU_CMP)
|
||||
|| `INST_FPU_IS_CLASS(op_type, frm)
|
||||
|| `INST_FPU_IS_MVXW(op_type, frm);
|
||||
wire ncp_ret_int_out;
|
||||
|
||||
wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(op_type, frm);
|
||||
wire ncp_ret_sext_out;
|
||||
|
||||
VX_fpu_ncp #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH+2)
|
||||
) fpu_ncp (
|
||||
.clk (clk),
|
||||
.reset (ncp_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_NCP)),
|
||||
.ready_in (per_core_ready_in[FPU_NCP]),
|
||||
.mask_in (mask_in),
|
||||
.tag_in ({ncp_ret_sext_in, ncp_ret_int_in, tag_in}),
|
||||
.op_type (op_type),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.result (per_core_result[FPU_NCP]),
|
||||
.has_fflags (per_core_has_fflags[FPU_NCP]),
|
||||
.fflags (per_core_fflags[FPU_NCP]),
|
||||
.tag_out ({ncp_ret_sext_out, ncp_ret_int_out, per_core_tag_out[FPU_NCP]}),
|
||||
.valid_out (per_core_valid_out[FPU_NCP]),
|
||||
.ready_out (per_core_ready_out[FPU_NCP])
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign per_core_ready_in[FPU_DIVSQRT] = is_div ? div_ready_in : sqrt_ready_in;
|
||||
wire [1:0][RSP_DATAW-1:0] div_sqrt_arb_data_in;
|
||||
for (genvar i = 0; i < 2; ++i) begin : g_div_sqrt_arb_data_in
|
||||
assign div_sqrt_arb_data_in[i] = {
|
||||
div_sqrt_result[i],
|
||||
div_sqrt_has_fflags[i],
|
||||
div_sqrt_fflags[i],
|
||||
div_sqrt_tag_out[i]
|
||||
};
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.ARBITER ("P"),
|
||||
.OUT_BUF (0)
|
||||
) div_sqrt_arb (
|
||||
) div_sqrt_rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in ({sqrt_valid_out, div_valid_out}),
|
||||
.ready_in ({sqrt_ready_out, div_ready_out}),
|
||||
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
|
||||
{div_result, div_has_fflags, div_fflags, div_tag_out}}),
|
||||
.valid_in (div_sqrt_valid_out),
|
||||
.ready_in (div_sqrt_ready_out),
|
||||
.data_in (div_sqrt_arb_data_in),
|
||||
.data_out ({
|
||||
per_core_result[FPU_DIVSQRT],
|
||||
per_core_has_fflags[FPU_DIVSQRT],
|
||||
|
@ -270,12 +302,73 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
// CVT core ///////////////////////////////////////////////////////////////
|
||||
|
||||
wire is_itof = per_core_op_type[FPU_CVT][1];
|
||||
wire is_signed = ~per_core_op_type[FPU_CVT][0];
|
||||
wire cvt_ret_int_in = ~is_itof;
|
||||
wire cvt_ret_int_out;
|
||||
|
||||
VX_fpu_cvt #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (1+TAG_WIDTH)
|
||||
) fpu_cvt (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_valid_in[FPU_CVT]),
|
||||
.ready_in (per_core_ready_in[FPU_CVT]),
|
||||
.mask_in (per_core_mask_in[FPU_CVT]),
|
||||
.tag_in ({cvt_ret_int_in, per_core_tag_in[FPU_CVT]}),
|
||||
.frm (per_core_frm[FPU_CVT]),
|
||||
.is_itof (is_itof),
|
||||
.is_signed (is_signed),
|
||||
.dataa (per_core_dataa[FPU_CVT]),
|
||||
.has_fflags (per_core_has_fflags[FPU_CVT]),
|
||||
.fflags (per_core_fflags[FPU_CVT]),
|
||||
.result (per_core_result[FPU_CVT]),
|
||||
.tag_out ({cvt_ret_int_out, per_core_tag_out[FPU_CVT]}),
|
||||
.valid_out (per_core_valid_out[FPU_CVT]),
|
||||
.ready_out (per_core_ready_out[FPU_CVT])
|
||||
);
|
||||
|
||||
// NCP core ///////////////////////////////////////////////////////////////
|
||||
|
||||
wire ncp_ret_int_in = (per_core_op_type[FPU_NCP] == `INST_FPU_CMP)
|
||||
|| `INST_FPU_IS_CLASS(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP])
|
||||
|| `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]);
|
||||
wire ncp_ret_int_out;
|
||||
|
||||
wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]);
|
||||
wire ncp_ret_sext_out;
|
||||
|
||||
VX_fpu_ncp #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH+2)
|
||||
) fpu_ncp (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_valid_in[FPU_NCP]),
|
||||
.ready_in (per_core_ready_in[FPU_NCP]),
|
||||
.mask_in (per_core_mask_in[FPU_NCP]),
|
||||
.tag_in ({ncp_ret_sext_in, ncp_ret_int_in, per_core_tag_in[FPU_NCP]}),
|
||||
.op_type (per_core_op_type[FPU_NCP]),
|
||||
.frm (per_core_frm[FPU_NCP]),
|
||||
.dataa (per_core_dataa[FPU_NCP]),
|
||||
.datab (per_core_datab[FPU_NCP]),
|
||||
.result (per_core_result[FPU_NCP]),
|
||||
.has_fflags (per_core_has_fflags[FPU_NCP]),
|
||||
.fflags (per_core_fflags[FPU_NCP]),
|
||||
.tag_out ({ncp_ret_sext_out, ncp_ret_int_out, per_core_tag_out[FPU_NCP]}),
|
||||
.valid_out (per_core_valid_out[FPU_NCP]),
|
||||
.ready_out (per_core_ready_out[FPU_NCP])
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
reg [NUM_FPC-1:0][RSP_DATAW+2-1:0] per_core_data_out;
|
||||
reg [NUM_FPCORES-1:0][RSP_DATAW+2-1:0] per_core_data_out;
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_FPC; ++i) begin
|
||||
for (integer i = 0; i < NUM_FPCORES; ++i) begin
|
||||
per_core_data_out[i][RSP_DATAW+1:2] = {
|
||||
per_core_result[i],
|
||||
per_core_has_fflags[i],
|
||||
|
@ -294,9 +387,9 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
`UNUSED_VAR (op_ret_int_out)
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_FPC),
|
||||
.NUM_INPUTS (NUM_FPCORES),
|
||||
.DATAW (RSP_DATAW + 2),
|
||||
.ARBITER ("F"),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
|
@ -310,25 +403,22 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result
|
||||
`ifdef FPU_RV64F
|
||||
reg [`XLEN-1:0] result_r;
|
||||
reg [`XLEN-1:0] result_w;
|
||||
always @(*) begin
|
||||
case (op_ret_int_out)
|
||||
2'b11: result_r = `XLEN'($signed(result_s[i]));
|
||||
2'b01: result_r = {32'h00000000, result_s[i]};
|
||||
default: result_r = {32'hffffffff, result_s[i]};
|
||||
2'b11: result_w = `XLEN'($signed(result_s[i]));
|
||||
2'b01: result_w = {32'h00000000, result_s[i]};
|
||||
default: result_w = {32'hffffffff, result_s[i]};
|
||||
endcase
|
||||
end
|
||||
assign result[i] = result_r;
|
||||
assign result[i] = result_w;
|
||||
`else
|
||||
assign result[i] = result_s[i];
|
||||
`endif
|
||||
end
|
||||
|
||||
// can accept new request?
|
||||
assign ready_in = per_core_ready_in[core_select];
|
||||
|
||||
endmodule
|
||||
|
||||
`endif
|
||||
|
|
|
@ -49,26 +49,27 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
localparam DATAW = 3 * 32 + `INST_FRM_BITS;
|
||||
|
||||
wire [NUM_LANES-1:0][DATAW-1:0] data_in;
|
||||
|
||||
wire [NUM_LANES-1:0][3*32-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][3*32-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
reg [NUM_LANES-1:0][31:0] a, b, c;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_select
|
||||
always @(*) begin
|
||||
if (is_madd) begin
|
||||
// MADD / MSUB / NMADD / NMSUB
|
||||
a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
|
||||
a[i] = {is_neg ^ dataa[i][31], dataa[i][30:0]};
|
||||
b[i] = datab[i];
|
||||
c[i] = (is_neg ^ is_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i];
|
||||
c[i] = {is_neg ^ is_sub ^ datac[i][31], datac[i][30:0]};
|
||||
end else begin
|
||||
if (is_neg) begin
|
||||
// MUL
|
||||
|
@ -77,29 +78,30 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
c[i] = '0;
|
||||
end else begin
|
||||
// ADD / SUB
|
||||
a[i] = 32'h3f800000; // 1.0f
|
||||
b[i] = dataa[i];
|
||||
c[i] = is_sub ? {~datab[i][31], datab[i][30:0]} : datab[i];
|
||||
a[i] = dataa[i];
|
||||
b[i] = 32'h3f800000; // 1.0f
|
||||
c[i] = {is_sub ^ datab[i][31], datab[i][30:0]};
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in
|
||||
assign data_in[i][0 +: 32] = a[i];
|
||||
assign data_in[i][32 +: 32] = b[i];
|
||||
assign data_in[i][64 +: 32] = c[i];
|
||||
assign data_in[i][96 +: `INST_FRM_BITS] = frm;
|
||||
end
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FMA),
|
||||
.DATA_IN_WIDTH(3*32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.DATA_IN_WIDTH (DATAW),
|
||||
.DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (2)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -108,15 +110,17 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
.tag_in ({mask_in, tag_in}),
|
||||
.ready_in (ready_in),
|
||||
.pe_enable (pe_enable),
|
||||
.pe_data_in (pe_data_in),
|
||||
.pe_data_out(pe_data_out),
|
||||
.pe_data_out(pe_data_in),
|
||||
.pe_data_in (pe_data_out),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.tag_out ({mask_out, tag_out}),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`UNUSED_VAR (pe_data_in)
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result
|
||||
assign result[i] = data_out[i][0 +: 32];
|
||||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
end
|
||||
|
@ -125,7 +129,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
|
||||
`ifdef QUARTUS
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas
|
||||
acl_fmadd fmadd (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
@ -143,7 +147,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
|
||||
`elsif VIVADO
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas
|
||||
wire [2:0] tuser;
|
||||
|
||||
xil_fma fma (
|
||||
|
@ -168,7 +172,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas
|
||||
reg [63:0] r;
|
||||
`UNUSED_VAR (r)
|
||||
fflags_t f;
|
||||
|
@ -177,10 +181,10 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
dpi_fmadd (
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][64 +: 32]},
|
||||
frm,
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]}, // a
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]}, // b
|
||||
{32'hffffffff, pe_data_in[i][64 +: 32]}, // c
|
||||
pe_data_in[0][96 +: `INST_FRM_BITS], // frm
|
||||
r,
|
||||
f
|
||||
);
|
||||
|
|
|
@ -90,7 +90,7 @@ module VX_fpu_fpnew
|
|||
|
||||
reg [TAG_WIDTH-1:0] fpu_tag_in, fpu_tag_out;
|
||||
|
||||
reg [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands;
|
||||
logic [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands;
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] fpu_result;
|
||||
fpnew_pkg::status_t fpu_status;
|
||||
|
@ -134,20 +134,13 @@ module VX_fpu_fpnew
|
|||
fpu_op = fpnew_pkg::ADD;
|
||||
fpu_operands[1] = dataa;
|
||||
fpu_operands[2] = datab;
|
||||
end
|
||||
`INST_FPU_SUB: begin
|
||||
fpu_op = fpnew_pkg::ADD;
|
||||
fpu_operands[1] = dataa;
|
||||
fpu_operands[2] = datab;
|
||||
fpu_op_mod = 1;
|
||||
fpu_op_mod = fmt[1]; // FADD or FSUB
|
||||
end
|
||||
`INST_FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
|
||||
`INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = fmt[1]; end
|
||||
`INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = ~fmt[1]; end
|
||||
`INST_FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
|
||||
`INST_FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
|
||||
`INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
|
||||
`INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
|
||||
`INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
|
||||
`INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
|
||||
`ifdef FLEN_64
|
||||
`INST_FPU_F2F: begin fpu_op = fpnew_pkg::F2F; fpu_src_fmt = fmt[0] ? fpnew_pkg::FP32 : fpnew_pkg::FP64; end
|
||||
`endif
|
||||
|
@ -169,7 +162,7 @@ module VX_fpu_fpnew
|
|||
end
|
||||
|
||||
`UNUSED_VAR (mask_in)
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_fpnew_coreses
|
||||
wire [(TAG_WIDTH+1)-1:0] fpu_tag;
|
||||
wire fpu_valid_out_uq;
|
||||
wire fpu_ready_in_uq;
|
||||
|
@ -183,8 +176,7 @@ module VX_fpu_fpnew
|
|||
.Features (FPU_FEATURES),
|
||||
.Implementation (FPU_IMPLEMENTATION),
|
||||
.TagType (logic[(TAG_WIDTH+1)-1:0]),
|
||||
.TrueSIMDClass (1),
|
||||
.EnableSIMDMask (1)
|
||||
.DivSqrtSel (fpnew_pkg::PULP)
|
||||
) fpnew_core (
|
||||
.clk_i (clk),
|
||||
.rst_ni (~reset),
|
||||
|
@ -196,11 +188,11 @@ module VX_fpu_fpnew
|
|||
.dst_fmt_i (fpu_dst_fmt),
|
||||
.int_fmt_i (fpu_int_fmt),
|
||||
.vectorial_op_i (1'b0),
|
||||
.simd_mask_i (mask_in[i]),
|
||||
.simd_mask_i (1'b1),
|
||||
.tag_i ({fpu_tag_in, fpu_has_fflags}),
|
||||
.in_valid_i (fpu_valid_in),
|
||||
.in_ready_o (fpu_ready_in_uq),
|
||||
.flush_i (reset),
|
||||
.flush_i (1'b0),
|
||||
.result_o (fpu_result[i]),
|
||||
.status_o (fpu_status_uq),
|
||||
.tag_o (fpu_tag),
|
||||
|
@ -209,7 +201,7 @@ module VX_fpu_fpnew
|
|||
`UNUSED_PIN (busy_o)
|
||||
);
|
||||
|
||||
if (i == 0) begin
|
||||
if (i == 0) begin : g_output_0
|
||||
assign {fpu_tag_out, fpu_has_fflags_out} = fpu_tag;
|
||||
assign fpu_valid_out = fpu_valid_out_uq;
|
||||
assign fpu_ready_in = fpu_ready_in_uq;
|
||||
|
|
|
@ -45,31 +45,34 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
localparam DATAW = 2 * 32 + `INST_FRM_BITS + `INST_FPU_BITS;
|
||||
|
||||
wire [NUM_LANES-1:0][DATAW-1:0] data_in;
|
||||
|
||||
wire [NUM_LANES-1:0][2*32-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
fflags_t [NUM_LANES-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in
|
||||
assign data_in[i][0 +: 32] = dataa[i];
|
||||
assign data_in[i][32 +: 32] = datab[i];
|
||||
assign data_in[i][64 +: `INST_FRM_BITS] = frm;
|
||||
assign data_in[i][64 + `INST_FRM_BITS +: `INST_FPU_BITS] = op_type;
|
||||
end
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FNCP),
|
||||
.DATA_IN_WIDTH(2*32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.DATA_IN_WIDTH (DATAW),
|
||||
.DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
.OUT_BUF (2)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -78,28 +81,31 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
.tag_in ({mask_in, tag_in}),
|
||||
.ready_in (ready_in),
|
||||
.pe_enable (pe_enable),
|
||||
.pe_data_in (pe_data_in),
|
||||
.pe_data_out(pe_data_out),
|
||||
.pe_data_out(pe_data_in),
|
||||
.pe_data_in (pe_data_out),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.tag_out ({mask_out, tag_out}),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`UNUSED_VAR (pe_data_in)
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result
|
||||
assign result[i] = data_out[i][0 +: 32];
|
||||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fncp_units
|
||||
VX_fncp_unit #(
|
||||
.LATENCY (`LATENCY_FNCP)
|
||||
.LATENCY (`LATENCY_FNCP),
|
||||
.OUT_REG (1)
|
||||
) fncp_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (pe_enable),
|
||||
.frm (frm),
|
||||
.op_type (op_type),
|
||||
.frm (pe_data_in[0][64 +: `INST_FRM_BITS]),
|
||||
.op_type (pe_data_in[0][64 + `INST_FRM_BITS +: `INST_FPU_BITS]),
|
||||
.dataa (pe_data_in[i][0 +: 32]),
|
||||
.datab (pe_data_in[i][32 +: 32]),
|
||||
.result (pe_data_out[i][0 +: 32]),
|
||||
|
|
|
@ -43,43 +43,51 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
localparam DATAW = 32 + `INST_FRM_BITS;
|
||||
|
||||
`UNUSED_VAR (frm)
|
||||
wire [NUM_LANES-1:0][DATAW-1:0] data_in;
|
||||
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][31:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in
|
||||
assign data_in[i][0 +: 32] = dataa[i];
|
||||
assign data_in[i][32 +: `INST_FRM_BITS] = frm;
|
||||
end
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FSQRT),
|
||||
.DATA_IN_WIDTH(32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.DATA_IN_WIDTH (DATAW),
|
||||
.DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
.OUT_BUF (2)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.data_in (dataa),
|
||||
.data_in (data_in),
|
||||
.tag_in ({mask_in, tag_in}),
|
||||
.ready_in (ready_in),
|
||||
.pe_enable (pe_enable),
|
||||
.pe_data_in (pe_data_in),
|
||||
.pe_data_out(pe_data_out),
|
||||
.pe_data_out(pe_data_in),
|
||||
.pe_data_in (pe_data_out),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.tag_out ({mask_out, tag_out}),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`UNUSED_VAR (pe_data_in)
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result
|
||||
assign result[i] = data_out[i][0 +: 32];
|
||||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
end
|
||||
|
@ -88,12 +96,12 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
|
||||
`ifdef QUARTUS
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fsqrts
|
||||
acl_fsqrt fsqrt (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
.en (pe_enable),
|
||||
.a (pe_data_in[i]),
|
||||
.a (pe_data_in[i][0 +: 32]),
|
||||
.q (pe_data_out[i][0 +: 32])
|
||||
);
|
||||
assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x;
|
||||
|
@ -105,14 +113,14 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
|
||||
`elsif VIVADO
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fsqrts
|
||||
wire tuser;
|
||||
|
||||
xil_fsqrt fsqrt (
|
||||
.aclk (clk),
|
||||
.aclken (pe_enable),
|
||||
.s_axis_a_tvalid (1'b1),
|
||||
.s_axis_a_tdata (pe_data_in[i]),
|
||||
.s_axis_a_tdata (pe_data_in[i][0 +: 32]),
|
||||
`UNUSED_PIN (m_axis_result_tvalid),
|
||||
.m_axis_result_tdata (pe_data_out[i][0 +: 32]),
|
||||
.m_axis_result_tuser (tuser)
|
||||
|
@ -126,7 +134,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fsqrts
|
||||
reg [63:0] r;
|
||||
`UNUSED_VAR (r)
|
||||
fflags_t f;
|
||||
|
@ -135,8 +143,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
dpi_fsqrt (
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i]},
|
||||
frm,
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]}, // a
|
||||
pe_data_in[0][32 +: `INST_FRM_BITS], // frm
|
||||
r,
|
||||
f
|
||||
);
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,18 +16,18 @@
|
|||
interface VX_decode_sched_if ();
|
||||
|
||||
wire valid;
|
||||
wire is_wstall;
|
||||
wire unlock;
|
||||
wire [`NW_WIDTH-1:0] wid;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output is_wstall,
|
||||
output unlock,
|
||||
output wid
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input is_wstall,
|
||||
input unlock,
|
||||
input wid
|
||||
);
|
||||
|
||||
|
|
|
@ -16,11 +16,13 @@
|
|||
`TRACING_OFF
|
||||
module VX_avs_adapter #(
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter ADDR_WIDTH = 1,
|
||||
parameter ADDR_WIDTH_IN = 1,
|
||||
parameter ADDR_WIDTH_OUT= 32,
|
||||
parameter BURST_WIDTH = 1,
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter TAG_WIDTH = 1,
|
||||
parameter RD_QUEUE_SIZE = 1,
|
||||
parameter BANK_INTERLEAVE= 0,
|
||||
parameter REQ_OUT_BUF = 0,
|
||||
parameter RSP_OUT_BUF = 0
|
||||
) (
|
||||
|
@ -31,7 +33,7 @@ module VX_avs_adapter #(
|
|||
input wire mem_req_valid,
|
||||
input wire mem_req_rw,
|
||||
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
|
||||
input wire [ADDR_WIDTH-1:0] mem_req_addr,
|
||||
input wire [ADDR_WIDTH_IN-1:0] mem_req_addr,
|
||||
input wire [DATA_WIDTH-1:0] mem_req_data,
|
||||
input wire [TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire mem_req_ready,
|
||||
|
@ -45,7 +47,7 @@ module VX_avs_adapter #(
|
|||
// AVS bus
|
||||
output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS],
|
||||
input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS],
|
||||
output wire [ADDR_WIDTH-1:0] avs_address [NUM_BANKS],
|
||||
output wire [ADDR_WIDTH_OUT-1:0] avs_address [NUM_BANKS],
|
||||
input wire avs_waitrequest [NUM_BANKS],
|
||||
output wire avs_write [NUM_BANKS],
|
||||
output wire avs_read [NUM_BANKS],
|
||||
|
@ -53,59 +55,66 @@ module VX_avs_adapter #(
|
|||
output wire [BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS],
|
||||
input wire avs_readdatavalid [NUM_BANKS]
|
||||
);
|
||||
localparam DATA_SIZE = DATA_WIDTH/8;
|
||||
localparam RD_QUEUE_ADDR_WIDTH = `CLOG2(RD_QUEUE_SIZE+1);
|
||||
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
|
||||
localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS;
|
||||
localparam DATA_SIZE = DATA_WIDTH/8;
|
||||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS; // to input space
|
||||
localparam BANK_OFFSETW = DST_ADDR_WDITH - BANK_SEL_BITS;
|
||||
|
||||
`STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN))
|
||||
|
||||
// Requests handling //////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out;
|
||||
wire [NUM_BANKS-1:0] req_queue_going_full;
|
||||
wire [NUM_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size;
|
||||
wire [BANK_ADDRW-1:0] req_bank_sel;
|
||||
wire [BANK_OFFSETW-1:0] req_bank_off;
|
||||
wire [NUM_BANKS-1:0] bank_req_ready;
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
|
||||
end else begin
|
||||
wire [BANK_OFFSETW-1:0] req_bank_off;
|
||||
wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
|
||||
|
||||
wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr);
|
||||
|
||||
if (NUM_BANKS > 1) begin : g_bank_sel
|
||||
if (BANK_INTERLEAVE) begin : g_interleave
|
||||
assign req_bank_sel = mem_req_addr_out[BANK_SEL_BITS-1:0];
|
||||
assign req_bank_off = mem_req_addr_out[BANK_SEL_BITS +: BANK_OFFSETW];
|
||||
end else begin : g_no_interleave
|
||||
assign req_bank_sel = mem_req_addr_out[BANK_OFFSETW +: BANK_SEL_BITS];
|
||||
assign req_bank_off = mem_req_addr_out[BANK_OFFSETW-1:0];
|
||||
end
|
||||
end else begin : g_no_bank_sel
|
||||
assign req_bank_sel = '0;
|
||||
assign req_bank_off = mem_req_addr_out;
|
||||
end
|
||||
|
||||
assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push
|
||||
assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i);
|
||||
end
|
||||
|
||||
`RESET_RELAY_EX (bank_reset, reset, NUM_BANKS, 1);
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_pending_sizes
|
||||
VX_pending_size #(
|
||||
.SIZE (RD_QUEUE_SIZE)
|
||||
) pending_size (
|
||||
.clk (clk),
|
||||
.reset (bank_reset[i]),
|
||||
.reset (reset),
|
||||
.incr (req_queue_push[i]),
|
||||
.decr (req_queue_pop[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (req_queue_going_full[i]),
|
||||
`UNUSED_PIN (alm_full),
|
||||
.size (req_queue_size[i])
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
`UNUSED_VAR (req_queue_size)
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rd_req_queues
|
||||
VX_fifo_queue #(
|
||||
.DATAW (TAG_WIDTH),
|
||||
.DEPTH (RD_QUEUE_SIZE)
|
||||
) rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (bank_reset[i]),
|
||||
.reset (reset),
|
||||
.push (req_queue_push[i]),
|
||||
.pop (req_queue_pop[i]),
|
||||
.data_in (mem_req_tag),
|
||||
|
@ -118,7 +127,7 @@ module VX_avs_adapter #(
|
|||
);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_out_bufs
|
||||
wire valid_out;
|
||||
wire rw_out;
|
||||
wire [DATA_SIZE-1:0] byteen_out;
|
||||
|
@ -135,7 +144,7 @@ module VX_avs_adapter #(
|
|||
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF))
|
||||
) req_out_buf (
|
||||
.clk (clk),
|
||||
.reset (bank_reset[i]),
|
||||
.reset (reset),
|
||||
.valid_in (valid_out_w),
|
||||
.ready_in (ready_out_w),
|
||||
.data_in ({mem_req_rw, mem_req_byteen, req_bank_off, mem_req_data}),
|
||||
|
@ -146,7 +155,7 @@ module VX_avs_adapter #(
|
|||
|
||||
assign avs_read[i] = valid_out && ~rw_out;
|
||||
assign avs_write[i] = valid_out && rw_out;
|
||||
assign avs_address[i] = ADDR_WIDTH'(addr_out);
|
||||
assign avs_address[i] = ADDR_WIDTH_OUT'(addr_out);
|
||||
assign avs_byteenable[i] = byteen_out;
|
||||
assign avs_writedata[i] = data_out;
|
||||
assign avs_burstcount[i] = BURST_WIDTH'(1);
|
||||
|
@ -155,11 +164,7 @@ module VX_avs_adapter #(
|
|||
assign bank_req_ready[i] = ready_out_w && ~req_queue_going_full[i];
|
||||
end
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
assign mem_req_ready = bank_req_ready[req_bank_sel];
|
||||
end else begin
|
||||
assign mem_req_ready = bank_req_ready;
|
||||
end
|
||||
assign mem_req_ready = bank_req_ready[req_bank_sel];
|
||||
|
||||
// Responses handling /////////////////////////////////////////////////////
|
||||
|
||||
|
@ -170,14 +175,13 @@ module VX_avs_adapter #(
|
|||
wire [NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_queue_data_out;
|
||||
wire [NUM_BANKS-1:0] rsp_queue_empty;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rd_rsp_queues
|
||||
VX_fifo_queue #(
|
||||
.DATAW (DATA_WIDTH),
|
||||
.DEPTH (RD_QUEUE_SIZE)
|
||||
) rd_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (bank_reset[i]),
|
||||
.reset (reset),
|
||||
.push (avs_readdatavalid[i]),
|
||||
.pop (req_queue_pop[i]),
|
||||
.data_in (avs_readdata[i]),
|
||||
|
@ -190,8 +194,8 @@ module VX_avs_adapter #(
|
|||
);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign rsp_arb_valid_in[i] = !rsp_queue_empty[i];
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rsp_arbs
|
||||
assign rsp_arb_valid_in[i] = ~rsp_queue_empty[i];
|
||||
assign rsp_arb_data_in[i] = {rsp_queue_data_out[i], req_queue_tag_out[i]};
|
||||
assign req_queue_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i];
|
||||
end
|
||||
|
@ -199,7 +203,7 @@ module VX_avs_adapter #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (DATA_WIDTH + TAG_WIDTH),
|
||||
.ARBITER ("F"),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (RSP_OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
|
|
|
@ -16,10 +16,13 @@
|
|||
`TRACING_OFF
|
||||
module VX_axi_adapter #(
|
||||
parameter DATA_WIDTH = 512,
|
||||
parameter ADDR_WIDTH = 32,
|
||||
parameter TAG_WIDTH = 8,
|
||||
parameter ADDR_WIDTH_IN = 1,
|
||||
parameter ADDR_WIDTH_OUT = 32,
|
||||
parameter TAG_WIDTH_IN = 8,
|
||||
parameter TAG_WIDTH_OUT = 8,
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter AVS_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)),
|
||||
parameter BANK_INTERLEAVE= 0,
|
||||
parameter TAG_BUFFER_SIZE= 32,
|
||||
parameter RSP_OUT_BUF = 0
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -29,22 +32,22 @@ module VX_axi_adapter #(
|
|||
input wire mem_req_valid,
|
||||
input wire mem_req_rw,
|
||||
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
|
||||
input wire [AVS_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
input wire [ADDR_WIDTH_IN-1:0] mem_req_addr,
|
||||
input wire [DATA_WIDTH-1:0] mem_req_data,
|
||||
input wire [TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire [TAG_WIDTH_IN-1:0] mem_req_tag,
|
||||
output wire mem_req_ready,
|
||||
|
||||
// Vortex response
|
||||
output wire mem_rsp_valid,
|
||||
output wire [DATA_WIDTH-1:0] mem_rsp_data,
|
||||
output wire [TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire [TAG_WIDTH_IN-1:0] mem_rsp_tag,
|
||||
input wire mem_rsp_ready,
|
||||
|
||||
// AXI write request address channel
|
||||
output wire m_axi_awvalid [NUM_BANKS],
|
||||
input wire m_axi_awready [NUM_BANKS],
|
||||
output wire [ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS],
|
||||
output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS],
|
||||
output wire [ADDR_WIDTH_OUT-1:0] m_axi_awaddr [NUM_BANKS],
|
||||
output wire [TAG_WIDTH_OUT-1:0] m_axi_awid [NUM_BANKS],
|
||||
output wire [7:0] m_axi_awlen [NUM_BANKS],
|
||||
output wire [2:0] m_axi_awsize [NUM_BANKS],
|
||||
output wire [1:0] m_axi_awburst [NUM_BANKS],
|
||||
|
@ -64,14 +67,14 @@ module VX_axi_adapter #(
|
|||
// AXI write response channel
|
||||
input wire m_axi_bvalid [NUM_BANKS],
|
||||
output wire m_axi_bready [NUM_BANKS],
|
||||
input wire [TAG_WIDTH-1:0] m_axi_bid [NUM_BANKS],
|
||||
input wire [TAG_WIDTH_OUT-1:0] m_axi_bid [NUM_BANKS],
|
||||
input wire [1:0] m_axi_bresp [NUM_BANKS],
|
||||
|
||||
// AXI read address channel
|
||||
output wire m_axi_arvalid [NUM_BANKS],
|
||||
input wire m_axi_arready [NUM_BANKS],
|
||||
output wire [ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS],
|
||||
output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS],
|
||||
output wire [ADDR_WIDTH_OUT-1:0] m_axi_araddr [NUM_BANKS],
|
||||
output wire [TAG_WIDTH_OUT-1:0] m_axi_arid [NUM_BANKS],
|
||||
output wire [7:0] m_axi_arlen [NUM_BANKS],
|
||||
output wire [2:0] m_axi_arsize [NUM_BANKS],
|
||||
output wire [1:0] m_axi_arburst [NUM_BANKS],
|
||||
|
@ -86,68 +89,95 @@ module VX_axi_adapter #(
|
|||
output wire m_axi_rready [NUM_BANKS],
|
||||
input wire [DATA_WIDTH-1:0] m_axi_rdata [NUM_BANKS],
|
||||
input wire m_axi_rlast [NUM_BANKS],
|
||||
input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS],
|
||||
input wire [TAG_WIDTH_OUT-1:0] m_axi_rid [NUM_BANKS],
|
||||
input wire [1:0] m_axi_rresp [NUM_BANKS]
|
||||
);
|
||||
localparam AXSIZE = `CLOG2(DATA_WIDTH/8);
|
||||
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
|
||||
localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS);
|
||||
localparam DATA_SIZE = `CLOG2(DATA_WIDTH/8);
|
||||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS - `CLOG2(DATA_WIDTH/8); // to input space
|
||||
localparam BANK_OFFSETW = DST_ADDR_WDITH - BANK_SEL_BITS;
|
||||
|
||||
wire [BANK_ADDRW-1:0] req_bank_sel;
|
||||
`STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN))
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
|
||||
end else begin
|
||||
assign req_bank_sel = '0;
|
||||
end
|
||||
wire [BANK_OFFSETW-1:0] req_bank_off;
|
||||
wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
|
||||
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr);
|
||||
|
||||
reg [NUM_BANKS-1:0] m_axi_aw_ack;
|
||||
reg [NUM_BANKS-1:0] m_axi_w_ack;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i];
|
||||
wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i];
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
m_axi_aw_ack[i] <= 0;
|
||||
m_axi_w_ack[i] <= 0;
|
||||
end else begin
|
||||
if (mem_req_fire && (req_bank_sel == i)) begin
|
||||
m_axi_aw_ack[i] <= 0;
|
||||
m_axi_w_ack[i] <= 0;
|
||||
end else begin
|
||||
if (m_axi_aw_fire)
|
||||
m_axi_aw_ack[i] <= 1;
|
||||
if (m_axi_w_fire)
|
||||
m_axi_w_ack[i] <= 1;
|
||||
end
|
||||
end
|
||||
if (NUM_BANKS > 1) begin : g_bank_sel
|
||||
if (BANK_INTERLEAVE) begin : g_interleave
|
||||
assign req_bank_sel = mem_req_addr_out[BANK_SEL_BITS-1:0];
|
||||
assign req_bank_off = mem_req_addr_out[BANK_SEL_BITS +: BANK_OFFSETW];
|
||||
end else begin : g_no_interleave
|
||||
assign req_bank_sel = mem_req_addr_out[BANK_OFFSETW +: BANK_SEL_BITS];
|
||||
assign req_bank_off = mem_req_addr_out[BANK_OFFSETW-1:0];
|
||||
end
|
||||
end else begin : g_no_bank_sel
|
||||
assign req_bank_sel = '0;
|
||||
assign req_bank_off = mem_req_addr_out;
|
||||
end
|
||||
|
||||
wire axi_write_ready [NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i])
|
||||
&& (m_axi_wready[i] || m_axi_w_ack[i]);
|
||||
// AXi write request synchronization
|
||||
reg [NUM_BANKS-1:0] m_axi_aw_ack, m_axi_w_ack, axi_write_ready;
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready
|
||||
VX_axi_write_ack axi_write_ack (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.awvalid(m_axi_awvalid[i]),
|
||||
.awready(m_axi_awready[i]),
|
||||
.wvalid (m_axi_wvalid[i]),
|
||||
.wready (m_axi_wready[i]),
|
||||
.aw_ack (m_axi_aw_ack[i]),
|
||||
.w_ack (m_axi_w_ack[i]),
|
||||
.tx_rdy (axi_write_ready[i]),
|
||||
`UNUSED_PIN (tx_ack)
|
||||
);
|
||||
end
|
||||
|
||||
// Vortex request ack
|
||||
if (NUM_BANKS > 1) begin
|
||||
assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel];
|
||||
end else begin
|
||||
assign mem_req_ready = mem_req_rw ? axi_write_ready[0] : m_axi_arready[0];
|
||||
wire tbuf_full;
|
||||
wire [TAG_WIDTH_OUT-1:0] mem_req_tag_out;
|
||||
wire [TAG_WIDTH_OUT-1:0] mem_rsp_tag_out;
|
||||
|
||||
// handle tag width mismatch
|
||||
if (TAG_WIDTH_IN > TAG_WIDTH_OUT) begin : g_tag_buf
|
||||
localparam TBUF_ADDRW = `CLOG2(TAG_BUFFER_SIZE);
|
||||
wire [TBUF_ADDRW-1:0] tbuf_waddr, tbuf_raddr;
|
||||
VX_index_buffer #(
|
||||
.DATAW (TAG_WIDTH_IN),
|
||||
.SIZE (TAG_BUFFER_SIZE)
|
||||
) tag_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_en (mem_req_valid && !mem_req_rw && mem_req_ready),
|
||||
.write_addr (tbuf_waddr),
|
||||
.write_data (mem_req_tag),
|
||||
.read_data (mem_rsp_tag),
|
||||
.read_addr (tbuf_raddr),
|
||||
.release_en (mem_rsp_valid && mem_rsp_ready),
|
||||
.full (tbuf_full),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
assign mem_req_tag_out = TAG_WIDTH_OUT'(tbuf_waddr);
|
||||
assign tbuf_raddr = mem_rsp_tag_out[TBUF_ADDRW-1:0];
|
||||
`UNUSED_VAR (mem_rsp_tag_out)
|
||||
end else begin : g_no_tag_buf
|
||||
assign tbuf_full = 0;
|
||||
assign mem_req_tag_out = TAG_WIDTH_OUT'(mem_req_tag);
|
||||
assign mem_rsp_tag = mem_rsp_tag_out[TAG_WIDTH_IN-1:0];
|
||||
`UNUSED_VAR (mem_rsp_tag_out)
|
||||
end
|
||||
|
||||
// request ack
|
||||
assign mem_req_ready = (mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]) && ~tbuf_full;
|
||||
|
||||
// AXI write request address channel
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i];
|
||||
assign m_axi_awaddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE;
|
||||
assign m_axi_awid[i] = mem_req_tag;
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr
|
||||
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_aw_ack[i];
|
||||
assign m_axi_awaddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8);
|
||||
assign m_axi_awid[i] = mem_req_tag_out;
|
||||
assign m_axi_awlen[i] = 8'b00000000;
|
||||
assign m_axi_awsize[i] = 3'(AXSIZE);
|
||||
assign m_axi_awsize[i] = 3'(DATA_SIZE);
|
||||
assign m_axi_awburst[i] = 2'b00;
|
||||
assign m_axi_awlock[i] = 2'b00;
|
||||
assign m_axi_awcache[i] = 4'b0000;
|
||||
|
@ -157,29 +187,29 @@ module VX_axi_adapter #(
|
|||
end
|
||||
|
||||
// AXI write request data channel
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i];
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data
|
||||
assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_w_ack[i];
|
||||
assign m_axi_wdata[i] = mem_req_data;
|
||||
assign m_axi_wstrb[i] = mem_req_byteen;
|
||||
assign m_axi_wlast[i] = 1'b1;
|
||||
end
|
||||
|
||||
// AXI write response channel (ignore)
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_rsp
|
||||
`UNUSED_VAR (m_axi_bvalid[i])
|
||||
`UNUSED_VAR (m_axi_bid[i])
|
||||
`UNUSED_VAR (m_axi_bresp[i])
|
||||
assign m_axi_bready[i] = 1'b1;
|
||||
`RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time));
|
||||
`RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time))
|
||||
end
|
||||
|
||||
// AXI read request channel
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i);
|
||||
assign m_axi_araddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE;
|
||||
assign m_axi_arid[i] = mem_req_tag;
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req
|
||||
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i) && ~tbuf_full;
|
||||
assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8);
|
||||
assign m_axi_arid[i] = mem_req_tag_out;
|
||||
assign m_axi_arlen[i] = 8'b00000000;
|
||||
assign m_axi_arsize[i] = 3'(AXSIZE);
|
||||
assign m_axi_arsize[i] = 3'(DATA_SIZE);
|
||||
assign m_axi_arburst[i] = 2'b00;
|
||||
assign m_axi_arlock[i] = 2'b00;
|
||||
assign m_axi_arcache[i] = 4'b0000;
|
||||
|
@ -191,23 +221,22 @@ module VX_axi_adapter #(
|
|||
// AXI read response channel
|
||||
|
||||
wire [NUM_BANKS-1:0] rsp_arb_valid_in;
|
||||
wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in;
|
||||
wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH_OUT-1:0] rsp_arb_data_in;
|
||||
wire [NUM_BANKS-1:0] rsp_arb_ready_in;
|
||||
|
||||
`UNUSED_VAR (m_axi_rlast)
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_rsp
|
||||
assign rsp_arb_valid_in[i] = m_axi_rvalid[i];
|
||||
assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]};
|
||||
assign m_axi_rready[i] = rsp_arb_ready_in[i];
|
||||
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time));
|
||||
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time));
|
||||
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time))
|
||||
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time))
|
||||
`UNUSED_VAR (m_axi_rlast[i])
|
||||
end
|
||||
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (DATA_WIDTH + TAG_WIDTH),
|
||||
.ARBITER ("F"),
|
||||
.DATAW (DATA_WIDTH + TAG_WIDTH_OUT),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (RSP_OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
|
@ -215,7 +244,7 @@ module VX_axi_adapter #(
|
|||
.valid_in (rsp_arb_valid_in),
|
||||
.data_in (rsp_arb_data_in),
|
||||
.ready_in (rsp_arb_ready_in),
|
||||
.data_out ({mem_rsp_data, mem_rsp_tag}),
|
||||
.data_out ({mem_rsp_data, mem_rsp_tag_out}),
|
||||
.valid_out (mem_rsp_valid),
|
||||
.ready_out (mem_rsp_ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
|
|
60
hw/rtl/libs/VX_axi_write_ack.sv
Normal file
60
hw/rtl/libs/VX_axi_write_ack.sv
Normal file
|
@ -0,0 +1,60 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_axi_write_ack (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire awvalid,
|
||||
input wire awready,
|
||||
input wire wvalid,
|
||||
input wire wready,
|
||||
output wire aw_ack,
|
||||
output wire w_ack,
|
||||
output wire tx_ack,
|
||||
output wire tx_rdy
|
||||
);
|
||||
reg awfired;
|
||||
reg wfired;
|
||||
|
||||
wire awfire = awvalid && awready;
|
||||
wire wfire = wvalid && wready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
awfired <= 0;
|
||||
wfired <= 0;
|
||||
end else begin
|
||||
if (awfire) begin
|
||||
awfired <= 1;
|
||||
end
|
||||
if (wfire) begin
|
||||
wfired <= 1;
|
||||
end
|
||||
if (tx_ack) begin
|
||||
awfired <= 0;
|
||||
wfired <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign aw_ack = awfired;
|
||||
assign w_ack = wfired;
|
||||
|
||||
assign tx_ack = (awfire || awfired) && (wfire || wfired);
|
||||
assign tx_rdy = (awready || awfired) && (wready || wfired);
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -19,19 +19,19 @@ module VX_bits_insert #(
|
|||
parameter S = 1,
|
||||
parameter POS = 0
|
||||
) (
|
||||
input wire [N-1:0] data_in,
|
||||
input wire [`UP(S)-1:0] ins_in,
|
||||
input wire [N-1:0] data_in,
|
||||
input wire [`UP(S)-1:0] ins_in,
|
||||
output wire [N+S-1:0] data_out
|
||||
);
|
||||
if (S == 0) begin
|
||||
);
|
||||
if (S == 0) begin : g_passthru
|
||||
`UNUSED_VAR (ins_in)
|
||||
assign data_out = data_in;
|
||||
end else begin
|
||||
if (POS == 0) begin
|
||||
end else begin : g_insert
|
||||
if (POS == 0) begin : g_pos_0
|
||||
assign data_out = {data_in, ins_in};
|
||||
end else if (POS == N) begin
|
||||
end else if (POS == N) begin : g_pos_N
|
||||
assign data_out = {ins_in, data_in};
|
||||
end else begin
|
||||
end else begin : g_pos
|
||||
assign data_out = {data_in[N-1:POS], ins_in, data_in[POS-1:0]};
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -19,17 +19,19 @@ module VX_bits_remove #(
|
|||
parameter S = 1,
|
||||
parameter POS = 0
|
||||
) (
|
||||
input wire [N-1:0] data_in,
|
||||
input wire [N-1:0] data_in,
|
||||
output wire [N-S-1:0] data_out
|
||||
);
|
||||
`STATIC_ASSERT (((0 == S) || ((POS + S) <= N)), ("invalid parameter"))
|
||||
|
||||
if (POS == 0 || S == 0) begin
|
||||
|
||||
if (S == 0) begin : g_passthru
|
||||
assign data_out = data_in;
|
||||
end else if (POS == 0) begin : g_pos_0
|
||||
assign data_out = data_in[N-1:S];
|
||||
end else if ((POS + S) < N) begin
|
||||
assign data_out = {data_in[N-1:(POS+S)], data_in[POS-1:0]};
|
||||
end else begin
|
||||
end else if ((POS + S) == N) begin : g_pos_N
|
||||
assign data_out = data_in[POS-1:0];
|
||||
end else begin : g_pos
|
||||
assign data_out = {data_in[N-1:(POS+S)], data_in[POS-1:0]};
|
||||
end
|
||||
|
||||
`UNUSED_VAR (data_in)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -25,30 +25,33 @@
|
|||
module VX_bypass_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter PASSTHRU = 0
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
output wire ready_in,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [DATAW-1:0] data_out,
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
if (PASSTHRU != 0) begin
|
||||
);
|
||||
if (PASSTHRU != 0) begin : g_passthru
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
assign ready_in = ready_out;
|
||||
assign valid_out = valid_in;
|
||||
assign valid_out = valid_in;
|
||||
assign data_out = data_in;
|
||||
end else begin
|
||||
|
||||
end else begin : g_buffer
|
||||
|
||||
reg [DATAW-1:0] buffer;
|
||||
reg has_data;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
has_data <= 0;
|
||||
end else begin
|
||||
end else begin
|
||||
if (ready_out) begin
|
||||
has_data <= 0;
|
||||
end else if (~has_data) begin
|
||||
|
@ -63,6 +66,7 @@ module VX_bypass_buffer #(
|
|||
assign ready_in = ready_out || ~has_data;
|
||||
assign data_out = has_data ? buffer : data_in;
|
||||
assign valid_out = valid_in || has_data;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -26,42 +26,58 @@ module VX_cyclic_arbiter #(
|
|||
output wire grant_valid,
|
||||
input wire grant_ready
|
||||
);
|
||||
if (NUM_REQS == 1) begin
|
||||
if (NUM_REQS == 1) begin : g_passthru
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (grant_ready)
|
||||
|
||||
assign grant_index = '0;
|
||||
assign grant_onehot = requests;
|
||||
assign grant_valid = requests[0];
|
||||
|
||||
end else begin
|
||||
end else begin : g_arbiter
|
||||
|
||||
localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS;
|
||||
|
||||
wire [LOG_NUM_REQS-1:0] grant_index_um;
|
||||
wire [NUM_REQS-1:0] grant_onehot_w, grant_onehot_um;
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
grant_index_r <= '0;
|
||||
end else begin
|
||||
if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin
|
||||
end else if (grant_valid && grant_ready) begin
|
||||
if (!IS_POW2 && grant_index == LOG_NUM_REQS'(NUM_REQS-1)) begin
|
||||
grant_index_r <= '0;
|
||||
end else if (~grant_valid || grant_ready) begin
|
||||
grant_index_r <= grant_index_r + LOG_NUM_REQS'(1);
|
||||
end else begin
|
||||
grant_index_r <= grant_index + LOG_NUM_REQS'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
always @(*) begin
|
||||
grant_onehot_r = '0;
|
||||
grant_onehot_r[grant_index_r] = 1'b1;
|
||||
end
|
||||
VX_priority_encoder #(
|
||||
.N (NUM_REQS)
|
||||
) priority_encoder (
|
||||
.data_in (requests),
|
||||
.onehot_out (grant_onehot_um),
|
||||
.index_out (grant_index_um),
|
||||
.valid_out (grant_valid)
|
||||
);
|
||||
|
||||
assign grant_index = grant_index_r;
|
||||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = requests[grant_index_r];
|
||||
VX_decoder #(
|
||||
.N (LOG_NUM_REQS),
|
||||
.D (NUM_REQS)
|
||||
) grant_decoder (
|
||||
.data_in (grant_index),
|
||||
.valid_in (1'b1),
|
||||
.data_out (grant_onehot_w)
|
||||
);
|
||||
|
||||
wire is_hit = requests[grant_index_r];
|
||||
|
||||
assign grant_index = is_hit ? grant_index_r : grant_index_um;
|
||||
assign grant_onehot = is_hit ? grant_onehot_w : grant_onehot_um;
|
||||
|
||||
end
|
||||
|
||||
|
|
42
hw/rtl/libs/VX_decoder.sv
Normal file
42
hw/rtl/libs/VX_decoder.sv
Normal file
|
@ -0,0 +1,42 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
// Fast encoder using parallel prefix computation
|
||||
// Adapted from BaseJump STL: http://bjump.org/data_out.html
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_decoder #(
|
||||
parameter N = 1,
|
||||
parameter M = 1,
|
||||
parameter MODEL = 0,
|
||||
parameter D = 1 << N
|
||||
) (
|
||||
input wire [N-1:0] data_in,
|
||||
input wire [M-1:0] valid_in,
|
||||
output wire [D-1:0][M-1:0] data_out
|
||||
);
|
||||
logic [D-1:0][M-1:0] shift;
|
||||
if (MODEL == 1) begin : g_model1
|
||||
always @(*) begin
|
||||
shift = '0;
|
||||
shift[data_in] = {M{1'b1}};
|
||||
end
|
||||
end else begin : g_model0
|
||||
assign shift = ((D*M)'({M{1'b1}})) << (data_in * M);
|
||||
end
|
||||
assign data_out = {D{valid_in}} & shift;
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -24,7 +24,7 @@ module VX_divider #(
|
|||
parameter LATENCY = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire enable,
|
||||
input wire enable,
|
||||
input wire [N_WIDTH-1:0] numer,
|
||||
input wire [D_WIDTH-1:0] denom,
|
||||
output wire [Q_WIDTH-1:0] quotient,
|
||||
|
@ -37,7 +37,7 @@ module VX_divider #(
|
|||
wire [D_WIDTH-1:0] remainder_unqual;
|
||||
|
||||
lpm_divide divide (
|
||||
.clock (clk),
|
||||
.clock (clk),
|
||||
.clken (enable),
|
||||
.numer (numer),
|
||||
.denom (denom),
|
||||
|
@ -47,7 +47,7 @@ module VX_divider #(
|
|||
|
||||
defparam
|
||||
divide.lpm_type = "LPM_DIVIDE",
|
||||
divide.lpm_widthn = N_WIDTH,
|
||||
divide.lpm_widthn = N_WIDTH,
|
||||
divide.lpm_widthd = D_WIDTH,
|
||||
divide.lpm_nrepresentation = N_SIGNED ? "SIGNED" : "UNSIGNED",
|
||||
divide.lpm_drepresentation = D_SIGNED ? "SIGNED" : "UNSIGNED",
|
||||
|
@ -62,36 +62,36 @@ module VX_divider #(
|
|||
reg [N_WIDTH-1:0] quotient_unqual;
|
||||
reg [D_WIDTH-1:0] remainder_unqual;
|
||||
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
begin
|
||||
if (N_SIGNED && D_SIGNED) begin
|
||||
quotient_unqual = $signed(numer) / $signed(denom);
|
||||
remainder_unqual = $signed(numer) % $signed(denom);
|
||||
end
|
||||
end
|
||||
else if (N_SIGNED && !D_SIGNED) begin
|
||||
quotient_unqual = $signed(numer) / denom;
|
||||
remainder_unqual = $signed(numer) % denom;
|
||||
end
|
||||
end
|
||||
else if (!N_SIGNED && D_SIGNED) begin
|
||||
quotient_unqual = numer / $signed(denom);
|
||||
remainder_unqual = numer % $signed(denom);
|
||||
end
|
||||
end
|
||||
else begin
|
||||
quotient_unqual = numer / denom;
|
||||
remainder_unqual = numer % denom;
|
||||
remainder_unqual = numer % denom;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (LATENCY == 0) begin
|
||||
if (LATENCY == 0) begin : g_comb
|
||||
assign quotient = quotient_unqual [Q_WIDTH-1:0];
|
||||
assign remainder = remainder_unqual [R_WIDTH-1:0];
|
||||
end else begin
|
||||
end else begin : g_pipe
|
||||
reg [N_WIDTH-1:0] quotient_pipe [LATENCY-1:0];
|
||||
reg [D_WIDTH-1:0] remainder_pipe [LATENCY-1:0];
|
||||
|
||||
for (genvar i = 0; i < LATENCY; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
for (genvar i = 0; i < LATENCY; ++i) begin : g_reg
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
quotient_pipe[i] <= (0 == i) ? quotient_unqual : quotient_pipe[i-1];
|
||||
remainder_pipe[i] <= (0 == i) ? remainder_unqual : remainder_pipe[i-1];
|
||||
|
@ -101,7 +101,7 @@ module VX_divider #(
|
|||
|
||||
assign quotient = quotient_pipe[LATENCY-1][Q_WIDTH-1:0];
|
||||
assign remainder = remainder_pipe[LATENCY-1][R_WIDTH-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue