mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'develop' into tensor-core
This commit is contained in:
commit
4a606061d2
374 changed files with 16486 additions and 29573 deletions
175
.github/workflows/ci.yml
vendored
Normal file
175
.github/workflows/ci.yml
vendored
Normal file
|
@ -0,0 +1,175 @@
|
|||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: CI
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
runs-on: ubuntu-20.04
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-toolchain-
|
||||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Install Dependencies
|
||||
if: steps.cache-toolchain.outputs.cache-hit != 'true' || steps.cache-thirdparty.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
sudo bash ./ci/install_dependencies.sh
|
||||
|
||||
- name: Setup Toolchain
|
||||
if: steps.cache-toolchain.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
TOOLDIR=$PWD/tools
|
||||
mkdir -p build
|
||||
cd build
|
||||
../configure --tooldir=$TOOLDIR
|
||||
ci/toolchain_install.sh --all
|
||||
|
||||
- name: Setup Third Party
|
||||
if: steps.cache-thirdparty.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make -C third_party > /dev/null
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: setup
|
||||
strategy:
|
||||
matrix:
|
||||
xlen: [32, 64]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo bash ./ci/install_dependencies.sh
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-toolchain-
|
||||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Run Build
|
||||
run: |
|
||||
TOOLDIR=$PWD/tools
|
||||
mkdir -p build${{ matrix.xlen }}
|
||||
cd build${{ matrix.xlen }}
|
||||
../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }}
|
||||
source ci/toolchain_env.sh
|
||||
make software -s > /dev/null
|
||||
make tests -s > /dev/null
|
||||
|
||||
- name: Upload Build Artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: build-${{ matrix.xlen }}
|
||||
path: build${{ matrix.xlen }}
|
||||
|
||||
tests:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: build
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis]
|
||||
xlen: [32, 64]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo bash ./ci/install_dependencies.sh
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-toolchain-
|
||||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Download Build Artifact
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: build-${{ matrix.xlen }}
|
||||
path: build${{ matrix.xlen }}
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
cd build${{ matrix.xlen }}
|
||||
source ci/toolchain_env.sh
|
||||
chmod -R +x . # Ensure all files have executable permissions
|
||||
if [ "${{ matrix.name }}" == "regression" ]; then
|
||||
./ci/regression.sh --unittest
|
||||
./ci/regression.sh --isa
|
||||
./ci/regression.sh --kernel
|
||||
./ci/regression.sh --regression
|
||||
else
|
||||
./ci/regression.sh --${{ matrix.name }}
|
||||
fi
|
||||
|
||||
complete:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: tests
|
||||
|
||||
steps:
|
||||
- name: Check Completion
|
||||
run: echo "All matrix jobs passed"
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1,2 +1,3 @@
|
|||
/build*
|
||||
/.vscode
|
||||
/.vscode
|
||||
*.cache
|
8
.gitmodules
vendored
8
.gitmodules
vendored
|
@ -1,9 +1,9 @@
|
|||
[submodule "third_party/fpnew"]
|
||||
path = third_party/fpnew
|
||||
url = https://github.com/pulp-platform/fpnew.git
|
||||
[submodule "third_party/softfloat"]
|
||||
path = third_party/softfloat
|
||||
url = https://github.com/ucb-bar/berkeley-softfloat-3.git
|
||||
[submodule "third_party/ramulator"]
|
||||
path = third_party/ramulator
|
||||
url = https://github.com/CMU-SAFARI/ramulator.git
|
||||
url = https://github.com/CMU-SAFARI/ramulator2.git
|
||||
[submodule "third_party/cvfpu"]
|
||||
path = third_party/cvfpu
|
||||
url = https://github.com/openhwgroup/cvfpu.git
|
||||
|
|
102
.travis.yml
102
.travis.yml
|
@ -1,102 +0,0 @@
|
|||
language: cpp
|
||||
dist: focal
|
||||
os: linux
|
||||
compiler: gcc
|
||||
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- build-essential
|
||||
- valgrind
|
||||
- libstdc++6
|
||||
- binutils
|
||||
- python
|
||||
- uuid-dev
|
||||
|
||||
env:
|
||||
global:
|
||||
- TOOLDIR=$HOME/tools
|
||||
|
||||
cache:
|
||||
directories:
|
||||
- $TOOLDIR
|
||||
- $HOME/third_party
|
||||
- $HOME/build32
|
||||
- $HOME/build64
|
||||
|
||||
before_install:
|
||||
- if [ ! -d "$TOOLDIR" ] || [ -z "$(ls -A $TOOLDIR)" ] || [ "$(cat "$TOOLDIR/version.txt")" != "v0.4" ]; then
|
||||
rm -rf $TOOLDIR;
|
||||
mkdir -p $TRAVIS_BUILD_DIR/build && cd $TRAVIS_BUILD_DIR/build;
|
||||
../configure --tooldir=$TOOLDIR;
|
||||
ci/toolchain_install.sh --all;
|
||||
echo "v0.3" > "$TOOLDIR/version.txt";
|
||||
else
|
||||
echo "using existing tooldir build";
|
||||
fi
|
||||
- if [ ! -d "$HOME/third_party" ] || [ -z "$(ls -A $HOME/third_party)" ] || [ "$(cat "$HOME/third_party/version.txt")" != "v0.2" ]; then
|
||||
cd $TRAVIS_BUILD_DIR;
|
||||
make -C third_party > /dev/null;
|
||||
echo "v0.2" > "third_party/version.txt";
|
||||
cp -rf third_party $HOME;
|
||||
else
|
||||
echo "using existing third_party build";
|
||||
cp -rf $HOME/third_party $TRAVIS_BUILD_DIR;
|
||||
fi
|
||||
|
||||
install:
|
||||
- if [ ! -d "$HOME/build$XLEN" ] || [ -z "$(ls -A $HOME/build$XLEN)" ] || [ "$(cat "$HOME/build$XLEN/version.txt")" != "$TRAVIS_COMMIT" ]; then
|
||||
mkdir -p $TRAVIS_BUILD_DIR/build$XLEN && cd $TRAVIS_BUILD_DIR/build$XLEN;
|
||||
../configure --tooldir=$TOOLDIR --xlen=$XLEN;
|
||||
source ci/toolchain_env.sh;
|
||||
make build -s > /dev/null;
|
||||
echo "$TRAVIS_COMMIT" > version.txt;
|
||||
cp -rf $TRAVIS_BUILD_DIR/build$XLEN $HOME;
|
||||
else
|
||||
echo "using existing build for commit $TRAVIS_COMMIT";
|
||||
cp -rf $HOME/build$XLEN $TRAVIS_BUILD_DIR;
|
||||
fi
|
||||
|
||||
before_script:
|
||||
- cd $TRAVIS_BUILD_DIR/build$XLEN
|
||||
- source ci/toolchain_env.sh
|
||||
|
||||
stages:
|
||||
- test
|
||||
|
||||
jobs:
|
||||
include:
|
||||
- stage: test
|
||||
name: regression32
|
||||
env: XLEN=32
|
||||
script:
|
||||
- ./ci/travis_run.py ./ci/regression.sh --unittest
|
||||
- ./ci/travis_run.py ./ci/regression.sh --isa
|
||||
- ./ci/travis_run.py ./ci/regression.sh --kernel
|
||||
- ./ci/travis_run.py ./ci/regression.sh --synthesis
|
||||
- ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
- ./ci/travis_run.py ./ci/regression.sh --opencl
|
||||
|
||||
- stage: test
|
||||
name: regression64
|
||||
env: XLEN=64
|
||||
script:
|
||||
- ./ci/travis_run.py ./ci/regression.sh --isa
|
||||
- ./ci/travis_run.py ./ci/regression.sh --kernel
|
||||
- ./ci/travis_run.py ./ci/regression.sh --synthesis
|
||||
- ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
- ./ci/travis_run.py ./ci/regression.sh --opencl
|
||||
|
||||
- stage: test
|
||||
name: config
|
||||
env: XLEN=32
|
||||
script:
|
||||
- ./ci/travis_run.py ./ci/regression.sh --cluster
|
||||
- ./ci/travis_run.py ./ci/regression.sh --config
|
||||
|
||||
- stage: test
|
||||
name: debug
|
||||
env: XLEN=32
|
||||
script:
|
||||
- ./ci/travis_run.py ./ci/regression.sh --debug
|
||||
- ./ci/travis_run.py ./ci/regression.sh --stress
|
23
Makefile.in
23
Makefile.in
|
@ -1,5 +1,7 @@
|
|||
include config.mk
|
||||
|
||||
.PHONY: build software tests
|
||||
|
||||
all:
|
||||
$(MAKE) -C $(VORTEX_HOME)/third_party
|
||||
$(MAKE) -C hw
|
||||
|
@ -15,18 +17,29 @@ build:
|
|||
$(MAKE) -C runtime
|
||||
$(MAKE) -C tests
|
||||
|
||||
clean:
|
||||
software:
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C kernel
|
||||
$(MAKE) -C runtime/stub
|
||||
|
||||
tests:
|
||||
$(MAKE) -C tests
|
||||
|
||||
clean-build:
|
||||
$(MAKE) -C hw clean
|
||||
$(MAKE) -C sim clean
|
||||
$(MAKE) -C kernel clean
|
||||
$(MAKE) -C runtime clean
|
||||
$(MAKE) -C tests clean
|
||||
|
||||
clean: clean-build
|
||||
$(MAKE) -C $(VORTEX_HOME)/third_party clean
|
||||
|
||||
# Install setup
|
||||
KERNEL_INC_DST = $(PREFIX)/kernel/include
|
||||
KERNEL_LIB_DST = $(PREFIX)/kernel/lib$(XLEN)
|
||||
RUNTIME_INC_DST = $(PREFIX)/runtime/include
|
||||
RUNTIME_LIB_DST = $(PREFIX)/runtime/lib
|
||||
KERNEL_INC_DST = $(INSTALLDIR)/kernel/include
|
||||
KERNEL_LIB_DST = $(INSTALLDIR)/kernel/lib$(XLEN)
|
||||
RUNTIME_INC_DST = $(INSTALLDIR)/runtime/include
|
||||
RUNTIME_LIB_DST = $(INSTALLDIR)/runtime/lib
|
||||
|
||||
KERNEL_HEADERS = $(wildcard $(VORTEX_HOME)/kernel/include/*.h)
|
||||
KERNEL_LIBS = $(wildcard kernel/*.a)
|
||||
|
|
92
README.md
92
README.md
|
@ -1,5 +1,3 @@
|
|||
[](https://travis-ci.com/vortexgpgpu/vortex)
|
||||
|
||||
# Vortex GPGPU
|
||||
|
||||
Vortex is a full-stack open-source RISC-V GPGPU.
|
||||
|
@ -35,63 +33,73 @@ Vortex is a full-stack open-source RISC-V GPGPU.
|
|||
## Build Instructions
|
||||
More detailed build instructions can be found [here](docs/install_vortex.md).
|
||||
### Supported OS Platforms
|
||||
- Ubuntu 18.04, 20.04
|
||||
- Ubuntu 18.04, 20.04, 22.04, 24.04
|
||||
- Centos 7
|
||||
### Toolchain Dependencies
|
||||
- [POCL](http://portablecl.org/)
|
||||
- [LLVM](https://llvm.org/)
|
||||
- [RISCV-GNU-TOOLCHAIN](https://github.com/riscv-collab/riscv-gnu-toolchain)
|
||||
- [Verilator](https://www.veripool.org/verilator)
|
||||
- [FpNew](https://github.com/pulp-platform/fpnew.git)
|
||||
- [cvfpu](https://github.com/openhwgroup/cvfpu.git)
|
||||
- [SoftFloat](https://github.com/ucb-bar/berkeley-softfloat-3.git)
|
||||
- [Ramulator](https://github.com/CMU-SAFARI/ramulator.git)
|
||||
- [Yosys](https://github.com/YosysHQ/yosys)
|
||||
- [Sv2v](https://github.com/zachjs/sv2v)
|
||||
### Install development tools
|
||||
$ sudo apt-get install build-essential
|
||||
$ sudo apt-get install binutils
|
||||
$ sudo apt-get install python
|
||||
$ sudo apt-get install uuid-dev
|
||||
$ sudo apt-get install git
|
||||
### Install Vortex codebase
|
||||
$ git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
$ cd Vortex
|
||||
```sh
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
cd vortex
|
||||
```
|
||||
### Install system dependencies
|
||||
```sh
|
||||
# ensure dependent libraries are present
|
||||
sudo ./ci/install_dependencies.sh
|
||||
```
|
||||
### Configure your build folder
|
||||
# By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir.
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ ../configure --xlen=32 --tooldir=$HOME/tools
|
||||
```sh
|
||||
mkdir build
|
||||
cd build
|
||||
../configure --xlen=32 --tooldir=$HOME/tools
|
||||
```
|
||||
### Install prebuilt toolchain
|
||||
$ ./ci/toolchain_install.sh --all
|
||||
### set environment variables
|
||||
# should always run before using the toolchain!
|
||||
$ source ./ci/toolchain_env.sh
|
||||
```sh
|
||||
./ci/toolchain_install.sh --all
|
||||
```
|
||||
### Set environment variables
|
||||
```sh
|
||||
# should always run before using the toolchain!
|
||||
source ./ci/toolchain_env.sh
|
||||
```
|
||||
### Building Vortex
|
||||
$ make -s
|
||||
```sh
|
||||
make -s
|
||||
```
|
||||
### Quick demo running vecadd OpenCL kernel on 2 cores
|
||||
$ ./ci/blackbox.sh --cores=2 --app=vecadd
|
||||
```sh
|
||||
./ci/blackbox.sh --cores=2 --app=vecadd
|
||||
```
|
||||
|
||||
### Common Developer Tips
|
||||
- Installing Vortex kernel and runtime libraries to use with external tools requires passing --prefix=<install-path> to the configure script.
|
||||
```sh
|
||||
$ ../configure --xlen=32 --tooldir=$HOME/tools --prefix=<install-path>
|
||||
$ make -s
|
||||
$ make install
|
||||
``````
|
||||
- Building Vortex 64-bit simply requires using --xlen=64 configure option.
|
||||
```sh
|
||||
$ ../configure --xlen=32 --tooldir=$HOME/tools
|
||||
```
|
||||
```sh
|
||||
../configure --xlen=32 --tooldir=$HOME/tools --prefix=<install-path>
|
||||
make -s
|
||||
make install
|
||||
```
|
||||
- Building Vortex 64-bit requires setting --xlen=64 configure option.
|
||||
```sh
|
||||
../configure --xlen=64 --tooldir=$HOME/tools
|
||||
```
|
||||
- Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source <build-path>/ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login.
|
||||
```sh
|
||||
$ echo "source <build-path>/ci/toolchain_env.sh" >> ~/.bashrc
|
||||
```
|
||||
- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder.
|
||||
```sh
|
||||
$ ../configure
|
||||
```
|
||||
- To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information.
|
||||
```sh
|
||||
$ ./ci/blackbox.sh --app=demo --debug=3
|
||||
```
|
||||
```sh
|
||||
echo "source <build-path>/ci/toolchain_env.sh" >> ~/.bashrc
|
||||
```
|
||||
- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again without any options to get changes propagated to your build folder.
|
||||
```sh
|
||||
../configure
|
||||
```
|
||||
- To debug the GPU, the simulation can generate a runtime trace for analysis. See /docs/debugging.md for more information.
|
||||
```sh
|
||||
./ci/blackbox.sh --app=demo --debug=3
|
||||
```
|
||||
- For additional information, check out the /docs.
|
||||
|
|
462
ci/blackbox.sh
462
ci/blackbox.sh
|
@ -13,6 +13,9 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
SCRIPT_DIR=$(dirname "$0")
|
||||
ROOT_DIR=$SCRIPT_DIR/..
|
||||
|
||||
show_usage()
|
||||
{
|
||||
echo "Vortex BlackBox Test Driver v1.0"
|
||||
|
@ -29,301 +32,174 @@ show_help()
|
|||
echo "--rebuild: 0=disable, 1=force, 2=auto, 3=temp"
|
||||
}
|
||||
|
||||
SCRIPT_DIR=$(dirname "$0")
|
||||
ROOT_DIR=$SCRIPT_DIR/..
|
||||
|
||||
DRIVER=simx
|
||||
APP=sgemm
|
||||
CLUSTERS=1
|
||||
CORES=1
|
||||
WARPS=4
|
||||
THREADS=4
|
||||
L2=
|
||||
L3=
|
||||
DEBUG=0
|
||||
DEBUG_LEVEL=0
|
||||
SCOPE=0
|
||||
HAS_ARGS=0
|
||||
PERF_CLASS=0
|
||||
REBUILD=2
|
||||
TEMPBUILD=0
|
||||
LOGFILE=run.log
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
case $i in
|
||||
--driver=*)
|
||||
DRIVER=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--app=*)
|
||||
APP=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--clusters=*)
|
||||
CLUSTERS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--cores=*)
|
||||
CORES=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--warps=*)
|
||||
WARPS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--threads=*)
|
||||
THREADS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--l2cache)
|
||||
L2=-DL2_ENABLE
|
||||
shift
|
||||
;;
|
||||
--l3cache)
|
||||
L3=-DL3_ENABLE
|
||||
shift
|
||||
;;
|
||||
--debug=*)
|
||||
DEBUG_LEVEL=${i#*=}
|
||||
DEBUG=1
|
||||
shift
|
||||
;;
|
||||
--scope)
|
||||
SCOPE=1
|
||||
CORES=1
|
||||
shift
|
||||
;;
|
||||
--perf=*)
|
||||
PERF_FLAG=-DPERF_ENABLE
|
||||
PERF_CLASS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--args=*)
|
||||
ARGS=${i#*=}
|
||||
HAS_ARGS=1
|
||||
shift
|
||||
;;
|
||||
--rebuild=*)
|
||||
REBUILD=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--log=*)
|
||||
LOGFILE=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
show_usage
|
||||
exit -1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $REBUILD -eq 3 ];
|
||||
then
|
||||
REBUILD=1
|
||||
TEMPBUILD=1
|
||||
fi
|
||||
|
||||
case $DRIVER in
|
||||
gpu)
|
||||
DRIVER_PATH=
|
||||
;;
|
||||
simx)
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/simx
|
||||
;;
|
||||
rtlsim)
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/rtlsim
|
||||
;;
|
||||
opae)
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/opae
|
||||
;;
|
||||
xrt)
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/xrt
|
||||
;;
|
||||
*)
|
||||
echo "invalid driver: $DRIVER"
|
||||
exit -1
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ -d "$ROOT_DIR/tests/opencl/$APP" ];
|
||||
then
|
||||
APP_PATH=$ROOT_DIR/tests/opencl/$APP
|
||||
elif [ -d "$ROOT_DIR/tests/regression/$APP" ];
|
||||
then
|
||||
APP_PATH=$ROOT_DIR/tests/regression/$APP
|
||||
else
|
||||
echo "Application folder not found: $APP"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
if [ "$DRIVER" = "gpu" ];
|
||||
then
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER"
|
||||
OPTS=$ARGS make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
add_option() {
|
||||
if [ -n "$1" ]; then
|
||||
echo "$1 $2"
|
||||
else
|
||||
echo "running: make -C $APP_PATH run-$DRIVER"
|
||||
make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
echo "$2"
|
||||
fi
|
||||
}
|
||||
|
||||
DEFAULTS() {
|
||||
DRIVER=simx
|
||||
APP=sgemm
|
||||
DEBUG=0
|
||||
DEBUG_LEVEL=0
|
||||
SCOPE=0
|
||||
HAS_ARGS=0
|
||||
PERF_CLASS=0
|
||||
CONFIGS="$CONFIGS"
|
||||
REBUILD=2
|
||||
TEMPBUILD=0
|
||||
LOGFILE=run.log
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
DEFAULTS
|
||||
for i in "$@"; do
|
||||
case $i in
|
||||
--driver=*) DRIVER=${i#*=} ;;
|
||||
--app=*) APP=${i#*=} ;;
|
||||
--clusters=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CLUSTERS=${i#*=}") ;;
|
||||
--cores=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CORES=${i#*=}") ;;
|
||||
--warps=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_WARPS=${i#*=}") ;;
|
||||
--threads=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_THREADS=${i#*=}") ;;
|
||||
--l2cache) CONFIGS=$(add_option "$CONFIGS" "-DL2_ENABLE") ;;
|
||||
--l3cache) CONFIGS=$(add_option "$CONFIGS" "-DL3_ENABLE") ;;
|
||||
--perf=*) CONFIGS=$(add_option "$CONFIGS" "-DPERF_ENABLE"); PERF_CLASS=${i#*=} ;;
|
||||
--debug=*) DEBUG=1; DEBUG_LEVEL=${i#*=} ;;
|
||||
--scope) SCOPE=1; ;;
|
||||
--args=*) HAS_ARGS=1; ARGS=${i#*=} ;;
|
||||
--rebuild=*) REBUILD=${i#*=} ;;
|
||||
--log=*) LOGFILE=${i#*=} ;;
|
||||
--help) show_help; exit 0 ;;
|
||||
*) show_usage; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $REBUILD -eq 3 ];
|
||||
then
|
||||
REBUILD=1
|
||||
TEMPBUILD=1
|
||||
fi
|
||||
}
|
||||
|
||||
set_driver_path() {
|
||||
case $DRIVER in
|
||||
gpu) DRIVER_PATH="" ;;
|
||||
simx|rtlsim|opae|xrt) DRIVER_PATH="$ROOT_DIR/runtime/$DRIVER" ;;
|
||||
*) echo "Invalid driver: $DRIVER"; exit 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
set_app_path() {
|
||||
if [ -d "$ROOT_DIR/tests/opencl/$APP" ]; then
|
||||
APP_PATH="$ROOT_DIR/tests/opencl/$APP"
|
||||
elif [ -d "$ROOT_DIR/tests/regression/$APP" ]; then
|
||||
APP_PATH="$ROOT_DIR/tests/regression/$APP"
|
||||
else
|
||||
echo "Application folder not found: $APP"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
build_driver() {
|
||||
local cmd_opts=""
|
||||
[ $DEBUG -ne 0 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=$DEBUG_LEVEL")
|
||||
[ $SCOPE -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "SCOPE=1")
|
||||
[ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DESTDIR=\"$TEMPDIR\"")
|
||||
[ -n "$CONFIGS" ] && cmd_opts=$(add_option "$cmd_opts" "CONFIGS=\"$CONFIGS\"")
|
||||
|
||||
if [ -n "$cmd_opts" ]; then
|
||||
echo "Running: $cmd_opts make -C $DRIVER_PATH > /dev/null"
|
||||
eval "$cmd_opts make -C $DRIVER_PATH > /dev/null"
|
||||
else
|
||||
echo "Running: make -C $DRIVER_PATH > /dev/null"
|
||||
make -C $DRIVER_PATH > /dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
run_app() {
|
||||
local cmd_opts=""
|
||||
[ $DEBUG -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=1")
|
||||
[ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "VORTEX_RT_PATH=\"$TEMPDIR\"")
|
||||
[ $HAS_ARGS -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "OPTS=\"$ARGS\"")
|
||||
|
||||
if [ $DEBUG -ne 0 ]; then
|
||||
if [ -n "$cmd_opts" ]; then
|
||||
echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
eval "$cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
else
|
||||
echo "Running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
fi
|
||||
else
|
||||
if [ -n "$cmd_opts" ]; then
|
||||
echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER"
|
||||
eval "$cmd_opts make -C $APP_PATH run-$DRIVER"
|
||||
else
|
||||
echo "Running: make -C $APP_PATH run-$DRIVER"
|
||||
make -C $APP_PATH run-$DRIVER
|
||||
fi
|
||||
fi
|
||||
status=$?
|
||||
return $status
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_args "$@"
|
||||
set_driver_path
|
||||
set_app_path
|
||||
|
||||
# execute on default installed GPU
|
||||
if [ "$DRIVER" = "gpu" ]; then
|
||||
run_app
|
||||
exit $?
|
||||
fi
|
||||
|
||||
if [ -n "$CONFIGS" ]; then
|
||||
echo "CONFIGS=$CONFIGS"
|
||||
fi
|
||||
|
||||
if [ $REBUILD -ne 0 ]; then
|
||||
BLACKBOX_CACHE=blackbox.$DRIVER.cache
|
||||
LAST_CONFIGS=$(cat "$BLACKBOX_CACHE" 2>/dev/null || echo "")
|
||||
|
||||
if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ]; then
|
||||
make -C $DRIVER_PATH clean-driver > /dev/null
|
||||
echo "$CONFIGS+$DEBUG+$SCOPE" > "$BLACKBOX_CACHE"
|
||||
fi
|
||||
fi
|
||||
|
||||
export VORTEX_PROFILING=$PERF_CLASS
|
||||
|
||||
make -C "$ROOT_DIR/hw" config > /dev/null
|
||||
make -C "$ROOT_DIR/runtime/stub" > /dev/null
|
||||
|
||||
if [ $TEMPBUILD -eq 1 ]; then
|
||||
# setup temp directory
|
||||
TEMPDIR=$(mktemp -d)
|
||||
mkdir -p "$TEMPDIR"
|
||||
# build stub driver
|
||||
echo "running: DESTDIR=$TEMPDIR make -C $ROOT_DIR/runtime/stub"
|
||||
DESTDIR="$TEMPDIR" make -C $ROOT_DIR/runtime/stub > /dev/null
|
||||
# register tempdir cleanup on exit
|
||||
trap "rm -rf $TEMPDIR" EXIT
|
||||
fi
|
||||
|
||||
build_driver
|
||||
run_app
|
||||
status=$?
|
||||
|
||||
if [ $DEBUG -eq 1 ] && [ -f "$APP_PATH/trace.vcd" ]; then
|
||||
mv -f $APP_PATH/trace.vcd .
|
||||
fi
|
||||
|
||||
if [ $SCOPE -eq 1 ] && [ -f "$APP_PATH/scope.vcd" ]; then
|
||||
mv -f $APP_PATH/scope.vcd .
|
||||
fi
|
||||
|
||||
exit $status
|
||||
fi
|
||||
}
|
||||
|
||||
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS $L2 $L3 $PERF_FLAG $CONFIGS"
|
||||
echo "CONFIGS=$CONFIGS"
|
||||
|
||||
if [ $REBUILD -ne 0 ]
|
||||
then
|
||||
BLACKBOX_CACHE=blackbox.$DRIVER.cache
|
||||
if [ -f "$BLACKBOX_CACHE" ]
|
||||
then
|
||||
LAST_CONFIGS=`cat $BLACKBOX_CACHE`
|
||||
fi
|
||||
|
||||
if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ];
|
||||
then
|
||||
make -C $DRIVER_PATH clean-driver > /dev/null
|
||||
echo "$CONFIGS+$DEBUG+$SCOPE" > $BLACKBOX_CACHE
|
||||
fi
|
||||
fi
|
||||
|
||||
# export performance monitor class identifier
|
||||
export VORTEX_PROFILING=$PERF_CLASS
|
||||
|
||||
status=0
|
||||
|
||||
# ensure config update
|
||||
make -C $ROOT_DIR/hw config > /dev/null
|
||||
|
||||
# ensure the stub driver is present
|
||||
make -C $ROOT_DIR/runtime/stub > /dev/null
|
||||
|
||||
if [ $DEBUG -ne 0 ]
|
||||
then
|
||||
# running application
|
||||
if [ $TEMPBUILD -eq 1 ]
|
||||
then
|
||||
# setup temp directory
|
||||
TEMPDIR=$(mktemp -d)
|
||||
mkdir -p "$TEMPDIR/$DRIVER"
|
||||
|
||||
# driver initialization
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
echo "running: DESTDIR=$TEMPDIR/$DRIVER DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DESTDIR="$TEMPDIR/$DRIVER" DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
else
|
||||
echo "running: DESTDIR=$TEMPDIR/$DRIVER DEBUG=$DEBUG_LEVEL CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DESTDIR="$TEMPDIR/$DRIVER" DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
fi
|
||||
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
echo "running: VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
DEBUG=1 VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
status=$?
|
||||
else
|
||||
echo "running: VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
DEBUG=1 VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
status=$?
|
||||
fi
|
||||
|
||||
# cleanup temp directory
|
||||
trap "rm -rf $TEMPDIR" EXIT
|
||||
else
|
||||
# driver initialization
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
echo "running: DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
else
|
||||
echo "running: DEBUG=$DEBUG_LEVEL CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
fi
|
||||
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
DEBUG=1 OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
status=$?
|
||||
else
|
||||
echo "running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
DEBUG=1 make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
status=$?
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -f "$APP_PATH/trace.vcd" ]
|
||||
then
|
||||
mv -f $APP_PATH/trace.vcd .
|
||||
fi
|
||||
else
|
||||
if [ $TEMPBUILD -eq 1 ]
|
||||
then
|
||||
# setup temp directory
|
||||
TEMPDIR=$(mktemp -d)
|
||||
mkdir -p "$TEMPDIR/$DRIVER"
|
||||
|
||||
# driver initialization
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
echo "running: DESTDIR=$TEMPDIR/$DRIVER SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DESTDIR="$TEMPDIR/$DRIVER" SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
else
|
||||
echo "running: DESTDIR=$TEMPDIR/$DRIVER CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DESTDIR="$TEMPDIR/$DRIVER" CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
fi
|
||||
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
echo "running: VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER"
|
||||
VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
else
|
||||
echo "running: VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER"
|
||||
VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
fi
|
||||
|
||||
# cleanup temp directory
|
||||
trap "rm -rf $TEMPDIR" EXIT
|
||||
else
|
||||
|
||||
# driver initialization
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
echo "running: SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
else
|
||||
echo "running: CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
fi
|
||||
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER"
|
||||
OPTS=$ARGS make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
else
|
||||
echo "running: make -C $APP_PATH run-$DRIVER"
|
||||
make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
exit $status
|
||||
main "$@"
|
46
ci/install_dependencies.sh
Executable file
46
ci/install_dependencies.sh
Executable file
|
@ -0,0 +1,46 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -e
|
||||
|
||||
# Function to check if GCC version is less than 11
|
||||
check_gcc_version() {
|
||||
local gcc_version
|
||||
gcc_version=$(gcc -dumpversion)
|
||||
if dpkg --compare-versions "$gcc_version" lt 11; then
|
||||
return 0 # GCC version is less than 11
|
||||
else
|
||||
return 1 # GCC version is 11 or greater
|
||||
fi
|
||||
}
|
||||
|
||||
# Update package list
|
||||
apt-get update -y
|
||||
|
||||
# install system dependencies
|
||||
apt-get install -y build-essential valgrind libstdc++6 binutils python3 uuid-dev ccache
|
||||
|
||||
# Check and install GCC 11 if necessary
|
||||
if check_gcc_version; then
|
||||
echo "GCC version is less than 11. Installing GCC 11..."
|
||||
add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
apt-get update
|
||||
apt-get install -y g++-11 gcc-11
|
||||
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100
|
||||
else
|
||||
echo "GCC version is 11 or greater. No need to install GCC 11."
|
||||
fi
|
|
@ -21,39 +21,10 @@ rm -f blackbox.*.cache
|
|||
|
||||
XLEN=${XLEN:=@XLEN@}
|
||||
|
||||
XSIZE=$((XLEN / 8))
|
||||
|
||||
echo "Vortex Regression Test: XLEN=$XLEN"
|
||||
|
||||
split_file() {
|
||||
if [[ $# -ne 2 ]]; then
|
||||
echo "Usage: $0 <filename> <start_with>"
|
||||
return 1
|
||||
fi
|
||||
input_file="$1"
|
||||
start_with="$2"
|
||||
if [[ ! -r "$input_file" ]]; then
|
||||
echo "Error: File '$input_file' is not readable or does not exist."
|
||||
return 1
|
||||
fi
|
||||
count=0
|
||||
output_file=""
|
||||
while IFS= read -r line; do
|
||||
if [[ $line == $start_with* ]]; then
|
||||
count=$((count + 1))
|
||||
output_file="$input_file.part$count"
|
||||
> "$output_file" # ensure empty
|
||||
fi
|
||||
if [[ -n "$output_file" ]]; then
|
||||
echo "$line" >> "$output_file"
|
||||
fi
|
||||
done < "$input_file"
|
||||
|
||||
if [[ $count -eq 0 ]]; then
|
||||
echo "No lines starting with '$start_with' were found in '$input_file'."
|
||||
fi
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
|
||||
unittest()
|
||||
{
|
||||
make -C tests/unittest run
|
||||
|
@ -64,38 +35,33 @@ isa()
|
|||
{
|
||||
echo "begin isa tests..."
|
||||
|
||||
make -C sim/simx
|
||||
make -C sim/rtlsim
|
||||
|
||||
make -C tests/riscv/isa run-simx
|
||||
make -C tests/riscv/isa run-rtlsim
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
if [ "$XLEN" == "64" ]
|
||||
then
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64fx
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64fx
|
||||
fi
|
||||
|
||||
# restore default prebuilt configuration
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
|
||||
# clean build
|
||||
make -C sim/rtlsim clean
|
||||
|
||||
echo "isa tests done!"
|
||||
}
|
||||
|
@ -104,6 +70,9 @@ kernel()
|
|||
{
|
||||
echo "begin kernel tests..."
|
||||
|
||||
make -C sim/simx
|
||||
make -C sim/rtlsim
|
||||
|
||||
make -C tests/kernel run-simx
|
||||
make -C tests/kernel run-rtlsim
|
||||
|
||||
|
@ -114,16 +83,24 @@ regression()
|
|||
{
|
||||
echo "begin regression tests..."
|
||||
|
||||
make -C runtime/simx
|
||||
make -C runtime/rtlsim
|
||||
|
||||
make -C tests/regression run-simx
|
||||
make -C tests/regression run-rtlsim
|
||||
|
||||
# test global barrier
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
|
||||
# test local barrier
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
|
||||
./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tbar"
|
||||
./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar"
|
||||
./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tbar"
|
||||
|
||||
# test temp driver mode for
|
||||
./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3
|
||||
|
||||
# test for matmul
|
||||
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
|
||||
|
@ -135,6 +112,9 @@ opencl()
|
|||
{
|
||||
echo "begin opencl tests..."
|
||||
|
||||
make -C runtime/simx
|
||||
make -C runtime/rtlsim
|
||||
|
||||
make -C tests/opencl run-simx
|
||||
make -C tests/opencl run-rtlsim
|
||||
|
||||
|
@ -144,78 +124,75 @@ opencl()
|
|||
echo "opencl tests done!"
|
||||
}
|
||||
|
||||
cluster()
|
||||
cache()
|
||||
{
|
||||
echo "begin clustering tests..."
|
||||
echo "begin cache tests..."
|
||||
|
||||
# cores clustering
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1"
|
||||
# disable local memory
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
|
||||
|
||||
# disable L1 cache
|
||||
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
|
||||
# reduce l1 line size
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache ways
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test writeback
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
|
||||
# cache clustering
|
||||
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2
|
||||
|
||||
# L2/L3
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=4 --l2cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
|
||||
echo "clustering tests done!"
|
||||
echo "begin cache tests..."
|
||||
}
|
||||
|
||||
test_csv_trace()
|
||||
config1()
|
||||
{
|
||||
# test CSV trace generation
|
||||
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
|
||||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||
split_file run_simx.log "Running "
|
||||
split_file run_rtlsim.log "Running "
|
||||
for file in ./run_simx.log.part*; do
|
||||
if [[ -f "$file" ]]; then
|
||||
file2="${file//simx/rtlsim}"
|
||||
if [[ -f "$file2" ]]; then
|
||||
./ci/trace_csv.py -tsimx $file -otrace_simx.csv
|
||||
./ci/trace_csv.py -trtlsim $file2 -otrace_rtlsim.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
else
|
||||
echo "File $file2 not found."
|
||||
fi
|
||||
fi
|
||||
done
|
||||
# restore default prebuilt configuration
|
||||
make -C sim/simx clean && make -C sim/simx > /dev/null
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
|
||||
}
|
||||
echo "begin configuration-1 tests..."
|
||||
|
||||
debug()
|
||||
{
|
||||
echo "begin debugging tests..."
|
||||
test_csv_trace
|
||||
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
|
||||
# warp/threads
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=8 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=8 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge
|
||||
|
||||
echo "debugging tests done!"
|
||||
}
|
||||
|
||||
config()
|
||||
{
|
||||
echo "begin configuration tests..."
|
||||
|
||||
# warp/threads configurations
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --cores=1 --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --cores=1 --warps=8 --threads=16 --app=diverge
|
||||
|
||||
# disable DPI
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
|
||||
# cores clustering
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=4 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
|
||||
# issue width
|
||||
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
|
@ -235,66 +212,132 @@ config()
|
|||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
|
||||
# FPU's PE scaling
|
||||
CONFIGS="-DFMA_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfmadd"
|
||||
CONFIGS="-DFCVT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tftoi"
|
||||
CONFIGS="-DFDIV_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfdiv"
|
||||
CONFIGS="-DFSQRT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfsqrt"
|
||||
CONFIGS="-DFNCP_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfclamp"
|
||||
|
||||
# LSU scaling
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
|
||||
echo "configuration-1 tests done!"
|
||||
}
|
||||
|
||||
config2()
|
||||
{
|
||||
echo "begin configuration-2 tests..."
|
||||
|
||||
# test opaesim
|
||||
./ci/blackbox.sh --driver=opae --app=printf
|
||||
./ci/blackbox.sh --driver=opae --app=diverge
|
||||
./ci/blackbox.sh --driver=xrt --app=diverge
|
||||
|
||||
# disable DPI
|
||||
if [ "$XLEN" == "64" ]; then
|
||||
# need to disable trig on 64-bit due to a bug inside fpnew's sqrt core.
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-xtrig -xbar -xgbar"
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-xtrig -xbar -xgbar"
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-xtrig -xbar -xgbar"
|
||||
else
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood
|
||||
fi
|
||||
|
||||
# custom program startup address
|
||||
make -C tests/regression/dogfood clean-kernel
|
||||
STARTUP_ADDR=0x40000000 make -C tests/regression/dogfood
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood
|
||||
./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
make -C tests/regression/dogfood clean-kernel
|
||||
|
||||
# disabling M & F extensions
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32i
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32i
|
||||
make -C sim/rtlsim clean
|
||||
|
||||
# disabling ZICOND extension
|
||||
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
|
||||
|
||||
# disable local memory
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --perf=1
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=demo --perf=1
|
||||
# test 128-bit memory block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
|
||||
# disable L1 cache
|
||||
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
|
||||
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
|
||||
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
|
||||
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
|
||||
# test XLEN-bit memory block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
|
||||
# multiple L1 caches per socket
|
||||
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=8 --warps=1 --threads=2
|
||||
# test memory coalescing
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
|
||||
|
||||
# test AXI bus
|
||||
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
# test single-bank memory
|
||||
if [ "$XLEN" == "64" ]; then
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
else
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
fi
|
||||
|
||||
# reduce l1 line size
|
||||
CONFIGS="-DL1_LINE_SIZE=4" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=4" ./ci/blackbox.sh --driver=simx --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=4 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_LINE_SIZE=4 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
# test larger memory address
|
||||
if [ "$XLEN" == "64" ]; then
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
else
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
fi
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemmx
|
||||
# test memory banks interleaving
|
||||
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
|
||||
# test 128-bit MEM block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo
|
||||
echo "configuration-2 tests done!"
|
||||
}
|
||||
|
||||
# test single-bank DRAM
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo
|
||||
test_csv_trace()
|
||||
{
|
||||
# test CSV trace generation
|
||||
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
|
||||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
# clean build
|
||||
make -C sim/simx clean
|
||||
make -C sim/rtlsim clean
|
||||
}
|
||||
|
||||
# test 27-bit DRAM address
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo
|
||||
debug()
|
||||
{
|
||||
echo "begin debugging tests..."
|
||||
|
||||
echo "configuration tests done!"
|
||||
test_csv_trace
|
||||
|
||||
CONFIGS="-O0" ./ci/blackbox.sh --driver=opae --app=demo --args="-n1"
|
||||
CONFIGS="-O0" ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
|
||||
echo "debugging tests done!"
|
||||
}
|
||||
|
||||
scope()
|
||||
{
|
||||
echo "begin scope tests..."
|
||||
|
||||
SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope
|
||||
SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope
|
||||
|
||||
echo "debugging scope done!"
|
||||
}
|
||||
|
||||
stress()
|
||||
|
@ -302,10 +345,8 @@ stress()
|
|||
echo "begin stress tests..."
|
||||
|
||||
# test verilator reset values
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --app=printf
|
||||
./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n128" --l2cache
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=xrt --app=sgemmx --args="-n128" --l2cache
|
||||
|
||||
echo "stress tests done!"
|
||||
}
|
||||
|
@ -315,7 +356,7 @@ synthesis()
|
|||
echo "begin synthesis tests..."
|
||||
|
||||
PREFIX=build_base make -C hw/syn/yosys clean
|
||||
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys elaborate
|
||||
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys synthesis
|
||||
|
||||
echo "synthesis tests done!"
|
||||
}
|
||||
|
@ -323,11 +364,9 @@ synthesis()
|
|||
show_usage()
|
||||
{
|
||||
echo "Vortex Regression Test"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cluster] [--debug] [--config] [--stress] [--synthesis] [--all] [--h|--help]"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--all] [--h|--help]"
|
||||
}
|
||||
|
||||
start=$SECONDS
|
||||
|
||||
declare -a tests=()
|
||||
clean=0
|
||||
|
||||
|
@ -351,14 +390,20 @@ while [ "$1" != "" ]; do
|
|||
--opencl )
|
||||
tests+=("opencl")
|
||||
;;
|
||||
--cluster )
|
||||
tests+=("cluster")
|
||||
--cache )
|
||||
tests+=("cache")
|
||||
;;
|
||||
--config1 )
|
||||
tests+=("config1")
|
||||
;;
|
||||
--config2 )
|
||||
tests+=("config2")
|
||||
;;
|
||||
--debug )
|
||||
tests+=("debug")
|
||||
;;
|
||||
--config )
|
||||
tests+=("config")
|
||||
--scope )
|
||||
tests+=("scope")
|
||||
;;
|
||||
--stress )
|
||||
tests+=("stress")
|
||||
|
@ -373,9 +418,11 @@ while [ "$1" != "" ]; do
|
|||
tests+=("kernel")
|
||||
tests+=("regression")
|
||||
tests+=("opencl")
|
||||
tests+=("cluster")
|
||||
tests+=("cache")
|
||||
tests+=("config1")
|
||||
tests+=("config2")
|
||||
tests+=("debug")
|
||||
tests+=("config")
|
||||
tests+=("scope")
|
||||
tests+=("stress")
|
||||
tests+=("synthesis")
|
||||
;;
|
||||
|
@ -396,6 +443,8 @@ then
|
|||
make -s
|
||||
fi
|
||||
|
||||
start=$SECONDS
|
||||
|
||||
for test in "${tests[@]}"; do
|
||||
$test
|
||||
done
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,8 +16,7 @@
|
|||
|
||||
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
|
||||
|
||||
export VERILATOR_ROOT=$TOOLDIR/verilator
|
||||
export PATH=$VERILATOR_ROOT/bin:$PATH
|
||||
export PATH=$TOOLDIR/verilator/bin:$PATH
|
||||
|
||||
export SV2V_PATH=$TOOLDIR/sv2v
|
||||
export PATH=$SV2V_PATH/bin:$PATH
|
||||
|
|
|
@ -23,9 +23,9 @@ OSVERSION=${OSVERSION:=@OSVERSION@}
|
|||
riscv32()
|
||||
{
|
||||
case $OSVERSION in
|
||||
"centos/7") parts=$(eval echo {a..h}) ;;
|
||||
"ubuntu/focal") parts=$(eval echo {a..k}) ;;
|
||||
*) parts=$(eval echo {a..j}) ;;
|
||||
"centos/7") parts=$(eval echo {a..l}) ;;
|
||||
"ubuntu/bionic") parts=$(eval echo {a..j}) ;;
|
||||
*) parts=$(eval echo {a..k}) ;;
|
||||
esac
|
||||
rm -f riscv32-gnu-toolchain.tar.bz2.parta*
|
||||
for x in $parts
|
||||
|
@ -41,7 +41,7 @@ riscv32()
|
|||
riscv64()
|
||||
{
|
||||
case $OSVERSION in
|
||||
"centos/7") parts=$(eval echo {a..h}) ;;
|
||||
"centos/7") parts=$(eval echo {a..l}) ;;
|
||||
*) parts=$(eval echo {a..j}) ;;
|
||||
esac
|
||||
rm -f riscv64-gnu-toolchain.tar.bz2.parta*
|
||||
|
|
320
ci/trace_csv.py
320
ci/trace_csv.py
|
@ -19,6 +19,8 @@ import csv
|
|||
import re
|
||||
import inspect
|
||||
|
||||
configs = None
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='CPU trace log to CSV format converter.')
|
||||
parser.add_argument('-t', '--type', default='simx', help='log type (rtlsim or simx)')
|
||||
|
@ -26,7 +28,26 @@ def parse_args():
|
|||
parser.add_argument('log', help='Input log file')
|
||||
return parser.parse_args()
|
||||
|
||||
def parse_simx(log_filename):
|
||||
def load_config(filename):
|
||||
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=0x([0-9a-fA-F]+), num_barriers=(\d+)"
|
||||
with open(filename, 'r') as file:
|
||||
for line in file:
|
||||
config_match = re.search(config_pattern, line)
|
||||
if config_match:
|
||||
config = {
|
||||
'num_threads': int(config_match.group(1)),
|
||||
'num_warps': int(config_match.group(2)),
|
||||
'num_cores': int(config_match.group(3)),
|
||||
'num_clusters': int(config_match.group(4)),
|
||||
'socket_size': int(config_match.group(5)),
|
||||
'local_mem_base': int(config_match.group(6), 16),
|
||||
'num_barriers': int(config_match.group(7)),
|
||||
}
|
||||
return config
|
||||
print("Error: missing CONFIGS: header")
|
||||
sys.exit(1)
|
||||
|
||||
def parse_simx(log_lines):
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
|
||||
opcode_pattern = r"Instr 0x[0-9a-fA-F]+: ([0-9a-zA-Z_\.]+)"
|
||||
|
@ -37,32 +58,32 @@ def parse_simx(log_filename):
|
|||
destination_pattern = r"Dest Reg: (.+)"
|
||||
uuid_pattern = r"#(\d+)"
|
||||
entries = []
|
||||
with open(log_filename, 'r') as log_file:
|
||||
instr_data = None
|
||||
for lineno, line in enumerate(log_file, start=1):
|
||||
try:
|
||||
if line.startswith("DEBUG Fetch:"):
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
instr_data = {}
|
||||
instr_data["lineno"] = lineno
|
||||
instr_data["PC"] = re.search(pc_pattern, line).group(1)
|
||||
instr_data["core_id"] = re.search(core_id_pattern, line).group(1)
|
||||
instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1)
|
||||
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
|
||||
instr_data["uuid"] = re.search(uuid_pattern, line).group(1)
|
||||
elif line.startswith("DEBUG Instr"):
|
||||
instr_data["instr"] = re.search(instr_pattern, line).group(1)
|
||||
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
|
||||
elif line.startswith("DEBUG Src"):
|
||||
src_reg = re.search(operands_pattern, line).group(1)
|
||||
instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg
|
||||
elif line.startswith("DEBUG Dest"):
|
||||
instr_data["destination"] = re.search(destination_pattern, line).group(1)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
instr_data = None
|
||||
for lineno, line in enumerate(log_lines, start=1):
|
||||
try:
|
||||
if line.startswith("DEBUG Fetch:"):
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
instr_data = {}
|
||||
instr_data["lineno"] = lineno
|
||||
instr_data["PC"] = re.search(pc_pattern, line).group(1)
|
||||
instr_data["core_id"] = int(re.search(core_id_pattern, line).group(1))
|
||||
instr_data["warp_id"] = int(re.search(warp_id_pattern, line).group(1))
|
||||
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
|
||||
instr_data["uuid"] = int(re.search(uuid_pattern, line).group(1))
|
||||
elif line.startswith("DEBUG Instr"):
|
||||
instr_data["instr"] = re.search(instr_pattern, line).group(1)
|
||||
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
|
||||
elif line.startswith("DEBUG Src"):
|
||||
src_reg = re.search(operands_pattern, line).group(1)
|
||||
instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg
|
||||
elif line.startswith("DEBUG Dest"):
|
||||
instr_data["destination"] = re.search(destination_pattern, line).group(1)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
instr_data = None
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
return entries
|
||||
|
||||
def reverse_binary(bin_str):
|
||||
|
@ -95,8 +116,9 @@ def append_value(text, reg, value, tmask_arr, sep):
|
|||
text += "}"
|
||||
return text, sep
|
||||
|
||||
def parse_rtlsim(log_filename):
|
||||
line_pattern = r"\d+: core(\d+)-(decode|issue|commit)"
|
||||
def parse_rtlsim(log_lines):
|
||||
global configs
|
||||
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
|
||||
ex_pattern = r"ex=([a-zA-Z]+)"
|
||||
|
@ -116,124 +138,154 @@ def parse_rtlsim(log_filename):
|
|||
eop_pattern = r"eop=(\d)"
|
||||
uuid_pattern = r"#(\d+)"
|
||||
entries = []
|
||||
with open(log_filename, 'r') as log_file:
|
||||
instr_data = {}
|
||||
for lineno, line in enumerate(log_file, start=1):
|
||||
try:
|
||||
line_match = re.search(line_pattern, line)
|
||||
if line_match:
|
||||
PC = re.search(pc_pattern, line).group(1)
|
||||
warp_id = re.search(warp_id_pattern, line).group(1)
|
||||
tmask = re.search(tmask_pattern, line).group(1)
|
||||
uuid = re.search(uuid_pattern, line).group(1)
|
||||
core_id = line_match.group(1)
|
||||
stage = line_match.group(2)
|
||||
if stage == "decode":
|
||||
trace = {}
|
||||
trace["uuid"] = uuid
|
||||
trace["PC"] = PC
|
||||
trace["core_id"] = core_id
|
||||
trace["warp_id"] = warp_id
|
||||
trace["tmask"] = reverse_binary(tmask)
|
||||
trace["instr"] = re.search(instr_pattern, line).group(1)
|
||||
trace["opcode"] = re.search(op_pattern, line).group(1)
|
||||
trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1))
|
||||
trace["rd"] = re.search(rd_pattern, line).group(1)
|
||||
trace["rs1"] = re.search(rs1_pattern, line).group(1)
|
||||
trace["rs2"] = re.search(rs2_pattern, line).group(1)
|
||||
trace["rs3"] = re.search(rs3_pattern, line).group(1)
|
||||
instr_data = {}
|
||||
num_cores = configs['num_cores']
|
||||
socket_size = configs['socket_size']
|
||||
num_sockets = (num_cores + socket_size - 1) // socket_size
|
||||
for lineno, line in enumerate(log_lines, start=1):
|
||||
try:
|
||||
line_match = re.search(line_pattern, line)
|
||||
if line_match:
|
||||
PC = re.search(pc_pattern, line).group(1)
|
||||
warp_id = int(re.search(warp_id_pattern, line).group(1))
|
||||
tmask = re.search(tmask_pattern, line).group(1)
|
||||
uuid = int(re.search(uuid_pattern, line).group(1))
|
||||
cluster_id = int(line_match.group(1))
|
||||
socket_id = int(line_match.group(2))
|
||||
core_id = int(line_match.group(3))
|
||||
stage = line_match.group(4)
|
||||
if stage == "decode":
|
||||
trace = {}
|
||||
trace["uuid"] = uuid
|
||||
trace["PC"] = PC
|
||||
trace["core_id"] = ((((cluster_id * num_sockets) + socket_id) * socket_size) + core_id)
|
||||
trace["warp_id"] = warp_id
|
||||
trace["tmask"] = reverse_binary(tmask)
|
||||
trace["instr"] = re.search(instr_pattern, line).group(1)
|
||||
trace["opcode"] = re.search(op_pattern, line).group(1)
|
||||
trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1))
|
||||
trace["rd"] = re.search(rd_pattern, line).group(1)
|
||||
trace["rs1"] = re.search(rs1_pattern, line).group(1)
|
||||
trace["rs2"] = re.search(rs2_pattern, line).group(1)
|
||||
trace["rs3"] = re.search(rs3_pattern, line).group(1)
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "issue":
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
trace["lineno"] = lineno
|
||||
opds = trace["opds"]
|
||||
if opds[1]:
|
||||
trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if opds[2]:
|
||||
trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if opds[3]:
|
||||
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
|
||||
trace["issued"] = True
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "issue":
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
trace["lineno"] = lineno
|
||||
elif stage == "commit":
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
if "issued" in trace:
|
||||
opds = trace["opds"]
|
||||
if opds[1]:
|
||||
trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if opds[2]:
|
||||
trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if opds[3]:
|
||||
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
|
||||
trace["issued"] = True
|
||||
dst_tmask_arr = bin_to_array(tmask)[::-1]
|
||||
wb = re.search(wb_pattern, line).group(1) == "1"
|
||||
if wb:
|
||||
rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if 'rd_data' in trace:
|
||||
merged_rd_data = trace['rd_data']
|
||||
for i in range(len(dst_tmask_arr)):
|
||||
if dst_tmask_arr[i] == 1:
|
||||
merged_rd_data[i] = rd_data[i]
|
||||
trace['rd_data'] = merged_rd_data
|
||||
else:
|
||||
trace['rd_data'] = rd_data
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "commit":
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
if "issued" in trace:
|
||||
opds = trace["opds"]
|
||||
dst_tmask_arr = bin_to_array(tmask)[::-1]
|
||||
wb = re.search(wb_pattern, line).group(1) == "1"
|
||||
eop = re.search(eop_pattern, line).group(1) == "1"
|
||||
if eop:
|
||||
tmask_arr = bin_to_array(trace["tmask"])
|
||||
destination = ''
|
||||
if wb:
|
||||
rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if 'rd_data' in trace:
|
||||
merged_rd_data = trace['rd_data']
|
||||
for i in range(len(dst_tmask_arr)):
|
||||
if dst_tmask_arr[i] == 1:
|
||||
merged_rd_data[i] = rd_data[i]
|
||||
trace['rd_data'] = merged_rd_data
|
||||
else:
|
||||
trace['rd_data'] = rd_data
|
||||
instr_data[uuid] = trace
|
||||
eop = re.search(eop_pattern, line).group(1) == "1"
|
||||
if eop:
|
||||
tmask_arr = bin_to_array(trace["tmask"])
|
||||
destination = ''
|
||||
if wb:
|
||||
destination, sep = append_value(destination, trace["rd"], trace['rd_data'], tmask_arr, False)
|
||||
del trace['rd_data']
|
||||
trace["destination"] = destination
|
||||
operands = ''
|
||||
sep = False
|
||||
if opds[1]:
|
||||
operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep)
|
||||
del trace["rs1_data"]
|
||||
if opds[2]:
|
||||
operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep)
|
||||
del trace["rs2_data"]
|
||||
if opds[3]:
|
||||
operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
|
||||
del trace["rs3_data"]
|
||||
trace["operands"] = operands
|
||||
del trace["opds"]
|
||||
del trace["rd"]
|
||||
del trace["rs1"]
|
||||
del trace["rs2"]
|
||||
del trace["rs3"]
|
||||
del trace["issued"]
|
||||
del instr_data[uuid]
|
||||
entries.append(trace)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
destination, sep = append_value(destination, trace["rd"], trace['rd_data'], tmask_arr, False)
|
||||
del trace['rd_data']
|
||||
trace["destination"] = destination
|
||||
operands = ''
|
||||
sep = False
|
||||
if opds[1]:
|
||||
operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep)
|
||||
del trace["rs1_data"]
|
||||
if opds[2]:
|
||||
operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep)
|
||||
del trace["rs2_data"]
|
||||
if opds[3]:
|
||||
operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
|
||||
del trace["rs3_data"]
|
||||
trace["operands"] = operands
|
||||
del trace["opds"]
|
||||
del trace["rd"]
|
||||
del trace["rs1"]
|
||||
del trace["rs2"]
|
||||
del trace["rs3"]
|
||||
del trace["issued"]
|
||||
del instr_data[uuid]
|
||||
entries.append(trace)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
return entries
|
||||
|
||||
def write_csv(log_filename, csv_filename, log_type):
|
||||
entries = None
|
||||
|
||||
# parse log file
|
||||
if log_type == "rtlsim":
|
||||
entries = parse_rtlsim(log_filename)
|
||||
elif log_type == "simx":
|
||||
entries = parse_simx(log_filename)
|
||||
else:
|
||||
print('Error: invalid log type')
|
||||
sys.exit()
|
||||
|
||||
# sort entries by uuid
|
||||
entries.sort(key=lambda x: (int(x['uuid'])))
|
||||
for entry in entries:
|
||||
del entry['lineno']
|
||||
|
||||
# write to CSV
|
||||
def write_csv(sublogs, csv_filename, log_type):
|
||||
with open(csv_filename, 'w', newline='') as csv_file:
|
||||
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"]
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for entry in entries:
|
||||
writer.writerow(entry)
|
||||
|
||||
for sublog in sublogs:
|
||||
entries = None
|
||||
|
||||
# parse sublog
|
||||
if log_type == "rtlsim":
|
||||
entries = parse_rtlsim(sublog)
|
||||
elif log_type == "simx":
|
||||
entries = parse_simx(sublog)
|
||||
else:
|
||||
print('Error: invalid log type')
|
||||
sys.exit()
|
||||
|
||||
# sort entries by uuid
|
||||
entries.sort(key=lambda x: (int(x['uuid'])))
|
||||
for entry in entries:
|
||||
del entry['lineno']
|
||||
|
||||
for entry in entries:
|
||||
writer.writerow(entry)
|
||||
|
||||
def split_log_file(log_filename):
|
||||
with open(log_filename, 'r') as log_file:
|
||||
log_lines = log_file.readlines()
|
||||
|
||||
sublogs = []
|
||||
current_sublog = None
|
||||
|
||||
for line in log_lines:
|
||||
if line.startswith("[VXDRV] START"):
|
||||
if current_sublog is not None:
|
||||
sublogs.append(current_sublog)
|
||||
current_sublog = [line]
|
||||
elif current_sublog is not None:
|
||||
current_sublog.append(line)
|
||||
|
||||
if current_sublog is not None:
|
||||
sublogs.append(current_sublog)
|
||||
else:
|
||||
sublogs.append(log_lines)
|
||||
|
||||
return sublogs
|
||||
|
||||
def main():
|
||||
global configs
|
||||
args = parse_args()
|
||||
write_csv(args.log, args.csv, args.type)
|
||||
configs = load_config(args.log)
|
||||
sublogs = split_log_file(args.log)
|
||||
write_csv(sublogs, args.csv, args.type)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
|
|
|
@ -19,7 +19,7 @@ TOOLDIR ?= @TOOLDIR@
|
|||
|
||||
OSVERSION ?= @OSVERSION@
|
||||
|
||||
PREFIX ?= @PREFIX@
|
||||
INSTALLDIR ?= @INSTALLDIR@
|
||||
|
||||
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
|
||||
|
||||
|
@ -31,5 +31,4 @@ RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv$(XLEN)-gnu-toolchain
|
|||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
|
||||
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
|
||||
|
||||
VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime
|
||||
VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel
|
||||
THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party
|
10
configure
vendored
10
configure
vendored
|
@ -26,6 +26,8 @@ detect_osversion() {
|
|||
case "$VERSION_CODENAME" in
|
||||
bionic) osversion="ubuntu/bionic";;
|
||||
focal) osversion="ubuntu/focal";;
|
||||
jammy) osversion="ubuntu/focal";;
|
||||
noble) osversion="ubuntu/focal";;
|
||||
# Add new versions as needed
|
||||
esac
|
||||
;;
|
||||
|
@ -63,7 +65,7 @@ copy_files() {
|
|||
filename_no_ext="${filename%.in}"
|
||||
dest_file="$dest_dir/$filename_no_ext"
|
||||
mkdir -p "$dest_dir"
|
||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g" "$file" > "$dest_file"
|
||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@CURRENTDIR@|$CURRENT_DIR|g" "$file" > "$dest_file"
|
||||
# apply permissions to bash scripts
|
||||
read -r firstline < "$dest_file"
|
||||
if [[ "$firstline" =~ ^#!.*bash ]]; then
|
||||
|
@ -111,7 +113,7 @@ copy_files() {
|
|||
|
||||
# default configuration parameters
|
||||
default_xlen=32
|
||||
default_tooldir=/opt
|
||||
default_tooldir=$HOME/tools
|
||||
default_osversion=$(detect_osversion)
|
||||
default_prefix=$CURRENT_DIR
|
||||
|
||||
|
@ -140,8 +142,8 @@ PREFIX=${PREFIX:=$default_prefix}
|
|||
usage() {
|
||||
echo "Usage: $0 [--xlen=<value>] [--tooldir=<path>] [--osversion=<version>]"
|
||||
echo " --xlen=<value> Set the XLEN value (default: 32)"
|
||||
echo " --tooldir=<path> Set the TOOLDIR path (default: /opt)"
|
||||
echo " --osversion=<version> Set the OS Version (default: $(detect_os))"
|
||||
echo " --tooldir=<path> Set the TOOLDIR path (default: $HOME/tools)"
|
||||
echo " --osversion=<version> Set the OS Version (default: $(detect_osversion))"
|
||||
echo " --prefix=<path> Set installation directory"
|
||||
exit 1
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware
|
|||
- `NUM_THREADS`: Number of threads per warps
|
||||
- `PERF_ENABLE`: enable the use of all profile counters
|
||||
|
||||
You configure the syntesis build from the command line:
|
||||
You can configure the synthesis build from the command line:
|
||||
|
||||
$ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make
|
||||
|
||||
|
@ -43,7 +43,7 @@ OPAE Build Progress
|
|||
|
||||
You could check the last 10 lines in the build log for possible errors until build completion.
|
||||
|
||||
$ tail -n 10 <build_dir>/build.log
|
||||
$ tail -n 10 <build_dir>/synth/build.log
|
||||
|
||||
Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs.
|
||||
|
||||
|
@ -53,7 +53,7 @@ If the build fails and you need to restart it, clean up the build folder using t
|
|||
|
||||
$ make clean
|
||||
|
||||
The file `vortex_afu.gbs` should exist when the build is done:
|
||||
The bitstream file `vortex_afu.gbs` should exist when the build is done:
|
||||
|
||||
$ ls -lsa <build_dir>/synth/vortex_afu.gbs
|
||||
|
||||
|
@ -65,10 +65,28 @@ Signing the bitstream and Programming the FPGA
|
|||
$ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs
|
||||
$ fpgasupdate vortex_afu_unsigned_ssl.gbs
|
||||
|
||||
FPGA sample test running OpenCL sgemm kernel
|
||||
--------------------------------------------
|
||||
Sample FPGA Run Test
|
||||
--------------------
|
||||
|
||||
Run the following from the Vortex root directory
|
||||
Ensure you have the correct opae runtime for the FPGA target
|
||||
|
||||
$ TARGET=FPGA make -C runtime/opae
|
||||
|
||||
Run the following from your Vortex build directory
|
||||
|
||||
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"
|
||||
|
||||
Testing Vortex using OPAE with Intel ASE Simulation
|
||||
---------------------------------------------------
|
||||
|
||||
Building ASE synthesis
|
||||
|
||||
$ TARGET=asesim make -C runtime/opae
|
||||
|
||||
Building ASE runtime
|
||||
|
||||
$ TARGET=asesim make -C runtime/opae
|
||||
|
||||
Running ASE simulation
|
||||
|
||||
$ ASE_LOG=0 ASE_WORKDIR=<build_dir>/synth/work TARGET=asesim ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n16"
|
|
@ -53,9 +53,9 @@ A waveform trace `trace.vcd` will be generated in the current directory during t
|
|||
## Analyzing Vortex trace log
|
||||
|
||||
When debugging Vortex RTL or SimX Simulator, reading the trace run.log file can be overwhelming when the trace gets really large.
|
||||
We provide a trace sanitizer tool under ./hw/scripts/trace_csv.py that you can use to convert the large trace into a CSV file containing all the instructions that executed with their source and destination operands. To increase compatibility between traces you will need to initialize RTLSIM's GPRs to zero by defining GPR_RESET.
|
||||
We provide a trace sanitizer tool under ./hw/scripts/trace_csv.py that you can use to convert the large trace into a CSV file containing all the instructions that executed with their source and destination operands.
|
||||
|
||||
$ CONFIGS="-DGPR_RESET" ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=3 --log=run_rtlsim.log
|
||||
$ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=3 --log=run_rtlsim.log
|
||||
$ ./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
|
||||
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=3 --log=run_simx.log
|
||||
|
|
|
@ -7,7 +7,8 @@
|
|||
- [Cache Subsystem](cache_subsystem.md)
|
||||
- [Software](software.md)
|
||||
- [Simulation](simulation.md)
|
||||
- [FPGA Setup Guide](fpga_setup.md)
|
||||
- [Altera FPGA Setup Guide](altera_fpga_guide.md)
|
||||
- [Xilinx FPGA Setup Guide](xilinx_fpga_guide.md)
|
||||
- [Debugging](debugging.md)
|
||||
- [Useful Links](references.md)
|
||||
|
||||
|
@ -27,6 +28,6 @@ Running Vortex simulators with different configurations:
|
|||
|
||||
$ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo
|
||||
|
||||
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
|
||||
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
|
||||
|
||||
$ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood
|
||||
|
|
52
docs/xilinx_fpga_guide.md
Normal file
52
docs/xilinx_fpga_guide.md
Normal file
|
@ -0,0 +1,52 @@
|
|||
# FPGA Startup and Configuration Guide
|
||||
|
||||
XRT Environment Setup
|
||||
----------------------
|
||||
|
||||
$ source /opt/xilinx/Vitis/2023.1/settings64.sh
|
||||
$ source /opt/xilinx/xrt/setup.sh
|
||||
|
||||
|
||||
Check Installed FPGA Platforms
|
||||
------------------------------
|
||||
|
||||
$ platforminfo -l
|
||||
|
||||
|
||||
Build FPGA image
|
||||
----------------
|
||||
|
||||
$ cd hw/syn/xilinx/xrt
|
||||
$ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make
|
||||
|
||||
Will run the synthesis under new build directory: BUILD_DIR := "\<PREFIX>\_\<PLATFORM>\_\<TARGET>"
|
||||
|
||||
The generated bitstream will be located under <BUILD_DIR>/bin/vortex_afu.xclbin
|
||||
|
||||
Sample FPGA Run Test
|
||||
--------------------
|
||||
|
||||
Ensure you have the correct opae runtime for the FPGA target
|
||||
|
||||
$ make -C runtime/xrt clean
|
||||
$ TARGET=hw make -C runtime/xrt
|
||||
|
||||
Run the following from your Vortex build directory
|
||||
|
||||
$ TARGET=hw FPGA_BIN_DIR=<BUILD_DIR>/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128"
|
||||
|
||||
Testing Vortex using XRT Hardware Emulation
|
||||
-------------------------------------------
|
||||
|
||||
Building XRT's hw_emu target
|
||||
|
||||
$ cd hw/syn/xilinx/xrt
|
||||
$ PREFIX=test2 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw_emu make
|
||||
|
||||
Building XRT hw_meu runtime
|
||||
|
||||
$ TARGET=hw_emu make -C runtime/xrt
|
||||
|
||||
Running XRT hw_emu simulation
|
||||
|
||||
$ TARGET=hw_emu FPGA_BIN_DIR=<BUILD_DIR>/bin ./ci/blackbox.sh --driver=xrt --app=sgemm
|
|
@ -9,13 +9,14 @@ all: config
|
|||
|
||||
config: VX_config.h VX_types.h
|
||||
|
||||
VX_config.h: $(RTL_DIR)/VX_config.vh
|
||||
VX_config.h: $(RTL_DIR)/VX_config.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_config.vh -o VX_config.h
|
||||
|
||||
VX_types.h: $(RTL_DIR)/VX_types.vh
|
||||
VX_types.h: $(RTL_DIR)/VX_types.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_types.vh -o VX_types.h
|
||||
|
||||
clean:
|
||||
$(MAKE) -C unittest clean
|
||||
rm -f VX_config.h VX_types.h
|
||||
|
||||
.PHONY: VX_config.h VX_types.h
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,8 +14,6 @@
|
|||
`ifndef FLOAT_DPI_VH
|
||||
`define FLOAT_DPI_VH
|
||||
|
||||
`include "VX_config.vh"
|
||||
|
||||
import "DPI-C" function void dpi_fadd(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fsub(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmul(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
|
|
|
@ -47,8 +47,6 @@ extern "C" {
|
|||
void dpi_trace(int level, const char* format, ...);
|
||||
void dpi_trace_start();
|
||||
void dpi_trace_stop();
|
||||
|
||||
uint64_t dpi_uuid_gen(bool reset, int wid);
|
||||
}
|
||||
|
||||
bool sim_trace_enabled();
|
||||
|
@ -204,17 +202,3 @@ void dpi_trace_start() {
|
|||
void dpi_trace_stop() {
|
||||
sim_trace_enable(false);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
std::unordered_map<uint32_t, uint32_t> g_uuid_gens;
|
||||
|
||||
uint64_t dpi_uuid_gen(bool reset, int wid) {
|
||||
if (reset) {
|
||||
g_uuid_gens.clear();
|
||||
return 0;
|
||||
}
|
||||
uint32_t instr_uuid = g_uuid_gens[wid]++;
|
||||
uint64_t uuid = (uint64_t(wid) << 32) | instr_uuid;
|
||||
return uuid;
|
||||
}
|
|
@ -14,8 +14,6 @@
|
|||
`ifndef UTIL_DPI_VH
|
||||
`define UTIL_DPI_VH
|
||||
|
||||
`include "VX_config.vh"
|
||||
|
||||
`ifdef XLEN_64
|
||||
`define INT_TYPE longint
|
||||
`else
|
||||
|
@ -32,6 +30,4 @@ import "DPI-C" function void dpi_trace(input int level, input string format /*ve
|
|||
import "DPI-C" function void dpi_trace_start();
|
||||
import "DPI-C" function void dpi_trace_stop();
|
||||
|
||||
import "DPI-C" function longint dpi_uuid_gen(input logic reset, input int wid);
|
||||
|
||||
`endif
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_cluster import VX_gpu_pkg::*; #(
|
||||
parameter CLUSTER_ID = 0
|
||||
parameter CLUSTER_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -55,14 +56,12 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
VX_gbar_bus_if per_socket_gbar_bus_if[`NUM_SOCKETS]();
|
||||
VX_gbar_bus_if gbar_bus_if();
|
||||
|
||||
`RESET_RELAY (gbar_reset, reset);
|
||||
|
||||
VX_gbar_arb #(
|
||||
.NUM_REQS (`NUM_SOCKETS),
|
||||
.OUT_BUF ((`NUM_SOCKETS > 2) ? 1 : 0) // bgar_unit has no backpressure
|
||||
) gbar_arb (
|
||||
.clk (clk),
|
||||
.reset (gbar_reset),
|
||||
.reset (reset),
|
||||
.bus_in_if (per_socket_gbar_bus_if),
|
||||
.bus_out_if (gbar_bus_if)
|
||||
);
|
||||
|
@ -71,7 +70,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
.INSTANCE_ID ($sformatf("gbar%0d", CLUSTER_ID))
|
||||
) gbar_unit (
|
||||
.clk (clk),
|
||||
.reset (gbar_reset),
|
||||
.reset (reset),
|
||||
.gbar_bus_if (gbar_bus_if)
|
||||
);
|
||||
|
||||
|
@ -85,7 +84,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (l2_reset, reset);
|
||||
|
||||
VX_cache_wrap #(
|
||||
.INSTANCE_ID ("l2cache"),
|
||||
.INSTANCE_ID ($sformatf("%s-l2cache", INSTANCE_ID)),
|
||||
.CACHE_SIZE (`L2_CACHE_SIZE),
|
||||
.LINE_SIZE (`L2_LINE_SIZE),
|
||||
.NUM_BANKS (`L2_NUM_BANKS),
|
||||
|
@ -95,12 +94,14 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
.CRSQ_SIZE (`L2_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L2_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L2_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L2_MREQ_SIZE),
|
||||
.MREQ_SIZE (`L2_WRITEBACK ? `L2_MSHR_SIZE : `L2_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`L2_WRITEBACK),
|
||||
.DIRTY_BYTES (`L2_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (3),
|
||||
.NC_ENABLE (1),
|
||||
.PASSTHRU (!`L2_ENABLED)
|
||||
) l2cache (
|
||||
|
@ -115,24 +116,22 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_dcr_bus_if socket_dcr_bus_tmp_if();
|
||||
assign socket_dcr_bus_tmp_if.write_valid = dcr_bus_if.write_valid && (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END);
|
||||
assign socket_dcr_bus_tmp_if.write_addr = dcr_bus_if.write_addr;
|
||||
assign socket_dcr_bus_tmp_if.write_data = dcr_bus_if.write_data;
|
||||
|
||||
wire [`NUM_SOCKETS-1:0] per_socket_busy;
|
||||
|
||||
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1));
|
||||
|
||||
// Generate all sockets
|
||||
for (genvar i = 0; i < `NUM_SOCKETS; ++i) begin
|
||||
for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : g_sockets
|
||||
|
||||
`RESET_RELAY (socket_reset, reset);
|
||||
|
||||
VX_dcr_bus_if socket_dcr_bus_if();
|
||||
wire is_base_dcr_addr = (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END);
|
||||
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, dcr_bus_if, is_base_dcr_addr, (`NUM_SOCKETS > 1))
|
||||
|
||||
VX_socket #(
|
||||
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + i)
|
||||
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id),
|
||||
.INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id))
|
||||
) socket (
|
||||
`SCOPE_IO_BIND (scope_socket+i)
|
||||
`SCOPE_IO_BIND (scope_socket+socket_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (socket_reset),
|
||||
|
@ -143,13 +142,13 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
.dcr_bus_if (socket_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_socket_mem_bus_if[i]),
|
||||
.mem_bus_if (per_socket_mem_bus_if[socket_id]),
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (per_socket_gbar_bus_if[i]),
|
||||
.gbar_bus_if (per_socket_gbar_bus_if[socket_id]),
|
||||
`endif
|
||||
|
||||
.busy (per_socket_busy[i])
|
||||
.busy (per_socket_busy[socket_id])
|
||||
);
|
||||
end
|
||||
|
||||
|
|
|
@ -109,7 +109,6 @@
|
|||
`ifndef SOCKET_SIZE
|
||||
`define SOCKET_SIZE `MIN(4, `NUM_CORES)
|
||||
`endif
|
||||
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
|
||||
|
||||
// Size of Tensor Core
|
||||
`ifndef TC_SIZE
|
||||
|
@ -221,7 +220,7 @@
|
|||
`ifndef IO_COUT_ADDR
|
||||
`define IO_COUT_ADDR `IO_BASE_ADDR
|
||||
`endif
|
||||
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
|
||||
`define IO_COUT_SIZE 64
|
||||
|
||||
`ifndef IO_MPM_ADDR
|
||||
`define IO_MPM_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE)
|
||||
|
@ -233,15 +232,17 @@
|
|||
`endif
|
||||
`define STACK_SIZE (1 << `STACK_LOG2_SIZE)
|
||||
|
||||
`define RESET_DELAY 8
|
||||
`define RESET_DELAY 8
|
||||
|
||||
`ifndef STALL_TIMEOUT
|
||||
`define STALL_TIMEOUT (100000 * (1 ** (`L2_ENABLED + `L3_ENABLED)))
|
||||
`endif
|
||||
|
||||
`ifndef SV_DPI
|
||||
`ifndef DPI_DISABLE
|
||||
`define DPI_DISABLE
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef FPU_FPNEW
|
||||
`ifndef FPU_DSP
|
||||
|
@ -303,7 +304,7 @@
|
|||
|
||||
// Number of SFU units
|
||||
`ifndef NUM_SFU_LANES
|
||||
`define NUM_SFU_LANES `MIN(`NUM_THREADS, 4)
|
||||
`define NUM_SFU_LANES `NUM_THREADS
|
||||
`endif
|
||||
`ifndef NUM_SFU_BLOCKS
|
||||
`define NUM_SFU_BLOCKS 1
|
||||
|
@ -427,22 +428,27 @@
|
|||
`define LATENCY_FCVT 5
|
||||
`endif
|
||||
|
||||
// FMA Bandwidth ratio
|
||||
`ifndef FMA_PE_RATIO
|
||||
`define FMA_PE_RATIO 1
|
||||
`endif
|
||||
|
||||
// FDIV Bandwidth ratio
|
||||
`ifndef FDIV_PE_RATIO
|
||||
`define FDIV_PE_RATIO 8
|
||||
`endif
|
||||
|
||||
// FSQRT Bandwidth ratio
|
||||
`ifndef FSQRT_PE_RATIO
|
||||
`define FSQRT_PE_RATIO 8
|
||||
`endif
|
||||
|
||||
// FCVT Bandwidth ratio
|
||||
`ifndef FCVT_PE_RATIO
|
||||
`define FCVT_PE_RATIO 8
|
||||
`endif
|
||||
|
||||
// FNCP Bandwidth ratio
|
||||
`ifndef FNCP_PE_RATIO
|
||||
`define FNCP_PE_RATIO 2
|
||||
`endif
|
||||
|
@ -549,7 +555,12 @@
|
|||
`define DCACHE_NUM_WAYS 1
|
||||
`endif
|
||||
|
||||
// SM Configurable Knobs //////////////////////////////////////////////////////
|
||||
// Enable Cache Writeback
|
||||
`ifndef DCACHE_WRITEBACK
|
||||
`define DCACHE_WRITEBACK 0
|
||||
`endif
|
||||
|
||||
// LMEM Configurable Knobs ////////////////////////////////////////////////////
|
||||
|
||||
`ifndef LMEM_DISABLE
|
||||
`define LMEM_ENABLE
|
||||
|
@ -608,6 +619,11 @@
|
|||
`define L2_NUM_WAYS 2
|
||||
`endif
|
||||
|
||||
// Enable Cache Writeback
|
||||
`ifndef L2_WRITEBACK
|
||||
`define L2_WRITEBACK 0
|
||||
`endif
|
||||
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Cache Size
|
||||
|
@ -621,7 +637,7 @@
|
|||
|
||||
// Number of Banks
|
||||
`ifndef L3_NUM_BANKS
|
||||
`define L3_NUM_BANKS `MIN(4, `NUM_CLUSTERS)
|
||||
`define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS)
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
|
@ -649,6 +665,20 @@
|
|||
`define L3_NUM_WAYS 4
|
||||
`endif
|
||||
|
||||
// Enable Cache Writeback
|
||||
`ifndef L3_WRITEBACK
|
||||
`define L3_WRITEBACK 0
|
||||
`endif
|
||||
|
||||
`ifndef MEMORY_BANKS
|
||||
`define MEMORY_BANKS 2
|
||||
`endif
|
||||
|
||||
// Number of Memory Ports from LLC
|
||||
`ifndef NUM_MEM_PORTS
|
||||
`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS)
|
||||
`endif
|
||||
|
||||
// ISA Extensions /////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef EXT_A_ENABLE
|
||||
|
|
|
@ -52,13 +52,19 @@
|
|||
`ifndef NDEBUG
|
||||
`define UUID_WIDTH 44
|
||||
`else
|
||||
`ifdef SCOPE
|
||||
`define UUID_WIDTH 44
|
||||
`else
|
||||
`define UUID_WIDTH 1
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`define PC_BITS (`XLEN-1)
|
||||
`define OFFSET_BITS 12
|
||||
`define IMM_BITS `XLEN
|
||||
|
||||
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_ALU 0
|
||||
|
@ -225,22 +231,19 @@
|
|||
`define INST_FENCE_D 1'h0
|
||||
`define INST_FENCE_I 1'h1
|
||||
|
||||
`define INST_FPU_ADD 4'b0000
|
||||
`define INST_FPU_SUB 4'b0001
|
||||
`define INST_FPU_MUL 4'b0010
|
||||
`define INST_FPU_DIV 4'b0011
|
||||
`define INST_FPU_SQRT 4'b0100
|
||||
`define INST_FPU_CMP 4'b0101 // frm: LE=0, LT=1, EQ=2
|
||||
`define INST_FPU_F2F 4'b0110
|
||||
`define INST_FPU_MISC 4'b0111 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
|
||||
`define INST_FPU_F2I 4'b1000
|
||||
`define INST_FPU_F2U 4'b1001
|
||||
`define INST_FPU_I2F 4'b1010
|
||||
`define INST_FPU_U2F 4'b1011
|
||||
`define INST_FPU_MADD 4'b1100
|
||||
`define INST_FPU_MSUB 4'b1101
|
||||
`define INST_FPU_NMSUB 4'b1110
|
||||
`define INST_FPU_NMADD 4'b1111
|
||||
`define INST_FPU_ADD 4'b0000 // SUB=fmt[1]
|
||||
`define INST_FPU_MUL 4'b0001
|
||||
`define INST_FPU_MADD 4'b0010 // SUB=fmt[1]
|
||||
`define INST_FPU_NMADD 4'b0011 // SUB=fmt[1]
|
||||
`define INST_FPU_DIV 4'b0100
|
||||
`define INST_FPU_SQRT 4'b0101
|
||||
`define INST_FPU_F2I 4'b1000 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
|
||||
`define INST_FPU_F2U 4'b1001 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
|
||||
`define INST_FPU_I2F 4'b1010 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
|
||||
`define INST_FPU_U2F 4'b1011 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
|
||||
`define INST_FPU_CMP 4'b1100 // frm: LE=0, LT=1, EQ=2
|
||||
`define INST_FPU_F2F 4'b1101 // fmt[0]: F32=0, F64=1
|
||||
`define INST_FPU_MISC 4'b1110 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
|
||||
`define INST_FPU_BITS 4
|
||||
`define INST_FPU_IS_CLASS(op, frm) (op == `INST_FPU_MISC && frm == 3)
|
||||
`define INST_FPU_IS_MVXW(op, frm) (op == `INST_FPU_MISC && frm == 4)
|
||||
|
@ -265,14 +268,14 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks) \
|
||||
(`CLOG2(mshr_size) + `CLOG2(num_banks))
|
||||
`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width) \
|
||||
(uuid_width + `CLOG2(mshr_size) + `CLOG2(num_banks))
|
||||
|
||||
`define CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width) \
|
||||
(`CLOG2(num_reqs) + `CLOG2(line_size / word_size) + tag_width)
|
||||
|
||||
`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width) \
|
||||
(`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), `CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width)) + 1)
|
||||
`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, uuid_width) \
|
||||
(`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width), `CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width)) + 1)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -282,28 +285,29 @@
|
|||
`define CACHE_CLUSTER_MEM_ARB_TAG(tag_width, num_caches) \
|
||||
(tag_width + `ARB_SEL_BITS(`UP(num_caches), 1))
|
||||
|
||||
`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), num_caches)
|
||||
`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches, uuid_width) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches)
|
||||
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches, uuid_width) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches), uuid_width), num_caches)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef ICACHE_ENABLE
|
||||
`define L1_ENABLE
|
||||
`endif
|
||||
|
||||
`ifdef DCACHE_ENABLE
|
||||
`define L1_ENABLE
|
||||
`endif
|
||||
|
||||
`define ADDR_TYPE_FLUSH 0
|
||||
`define ADDR_TYPE_IO 1
|
||||
`define ADDR_TYPE_LOCAL 2 // shoud be last since optional
|
||||
`define ADDR_TYPE_WIDTH (`ADDR_TYPE_LOCAL + `LMEM_ENABLED)
|
||||
`define MEM_REQ_FLAG_FLUSH 0
|
||||
`define MEM_REQ_FLAG_IO 1
|
||||
`define MEM_REQ_FLAG_LOCAL 2 // shoud be last since optional
|
||||
`define MEM_REQ_FLAGS_WIDTH (`MEM_REQ_FLAG_LOCAL + `LMEM_ENABLED)
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
||||
`define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE))
|
||||
|
@ -317,12 +321,24 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define NEG_EDGE(dst, src) \
|
||||
wire dst; \
|
||||
VX_edge_trigger #( \
|
||||
.POS (0), \
|
||||
.INIT (0) \
|
||||
) __``dst``__ ( \
|
||||
.clk (clk), \
|
||||
.reset (1'b0), \
|
||||
.data_in (src), \
|
||||
.data_out (dst) \
|
||||
)
|
||||
|
||||
`define BUFFER_EX(dst, src, ena, latency) \
|
||||
VX_pipe_register #( \
|
||||
.DATAW ($bits(dst)), \
|
||||
.RESETW ($bits(dst)), \
|
||||
.DEPTH (latency) \
|
||||
) __``dst ( \
|
||||
) __``dst``__ ( \
|
||||
.clk (clk), \
|
||||
.reset (reset), \
|
||||
.enable (ena), \
|
||||
|
@ -336,13 +352,18 @@
|
|||
VX_popcount #( \
|
||||
.N ($bits(in)), \
|
||||
.MODEL (model) \
|
||||
) __``out ( \
|
||||
) __``out``__ ( \
|
||||
.data_in (in), \
|
||||
.data_out (out) \
|
||||
)
|
||||
|
||||
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
|
||||
|
||||
`define ASSIGN_VX_IF(dst, src) \
|
||||
assign dst.valid = src.valid; \
|
||||
assign dst.data = src.data; \
|
||||
assign src.ready = dst.ready
|
||||
|
||||
`define ASSIGN_VX_MEM_BUS_IF(dst, src) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data = src.req_data; \
|
||||
|
@ -351,71 +372,90 @@
|
|||
assign src.rsp_data = dst.rsp_data; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define ASSIGN_VX_MEM_BUS_RO_IF(dst, src) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data.rw = 0; \
|
||||
assign dst.req_data.addr = src.req_data.addr; \
|
||||
assign dst.req_data.data = '0; \
|
||||
assign dst.req_data.byteen = '1; \
|
||||
assign dst.req_data.flags = src.req_data.flags; \
|
||||
assign dst.req_data.tag = src.req_data.tag; \
|
||||
assign src.req_ready = dst.req_ready; \
|
||||
assign src.rsp_valid = dst.rsp_valid; \
|
||||
assign src.rsp_data.data = dst.rsp_data.data; \
|
||||
assign src.rsp_data.tag = dst.rsp_data.tag; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define ASSIGN_VX_MEM_BUS_IF_X(dst, src, TD, TS) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data.rw = src.req_data.rw; \
|
||||
assign dst.req_data.byteen = src.req_data.byteen; \
|
||||
assign dst.req_data.addr = src.req_data.addr; \
|
||||
assign dst.req_data.atype = src.req_data.atype; \
|
||||
assign dst.req_data.data = src.req_data.data; \
|
||||
if (TD != TS) \
|
||||
assign dst.req_data.byteen = src.req_data.byteen; \
|
||||
assign dst.req_data.flags = src.req_data.flags; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (TD != TS) begin \
|
||||
assign dst.req_data.tag = {src.req_data.tag, {(TD-TS){1'b0}}}; \
|
||||
else \
|
||||
end else begin \
|
||||
assign dst.req_data.tag = src.req_data.tag; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */ \
|
||||
assign src.req_ready = dst.req_ready; \
|
||||
assign src.rsp_valid = dst.rsp_valid; \
|
||||
assign src.rsp_data.data = dst.rsp_data.data; \
|
||||
assign src.rsp_data.tag = dst.rsp_data.tag[TD-1 -: TS]; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define ASSIGN_VX_LSU_MEM_IF(dst, src) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data = src.req_data; \
|
||||
assign src.req_ready = dst.req_ready; \
|
||||
assign src.rsp_valid = dst.rsp_valid; \
|
||||
assign src.rsp_data = dst.rsp_data; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define BUFFER_DCR_BUS_IF(dst, src, enable) \
|
||||
logic [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __``dst; \
|
||||
if (enable) begin \
|
||||
always @(posedge clk) begin \
|
||||
__``dst <= {src.write_valid, src.write_addr, src.write_data}; \
|
||||
end \
|
||||
`define BUFFER_DCR_BUS_IF(dst, src, ena, latency) \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (latency != 0) begin \
|
||||
VX_pipe_register #( \
|
||||
.DATAW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \
|
||||
.DEPTH (latency) \
|
||||
) pipe_reg ( \
|
||||
.clk (clk), \
|
||||
.reset (1'b0), \
|
||||
.enable (1'b1), \
|
||||
.data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \
|
||||
.data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \
|
||||
); \
|
||||
end else begin \
|
||||
assign __``dst = {src.write_valid, src.write_addr, src.write_data}; \
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid && ena, src.write_addr, src.write_data}; \
|
||||
end \
|
||||
VX_dcr_bus_if dst(); \
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = __``dst
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define PERF_COUNTER_ADD(dst, src, field, width, dst_count, src_count, reg_enable) \
|
||||
for (genvar __d = 0; __d < dst_count; ++__d) begin \
|
||||
localparam __count = ((src_count > dst_count) ? `CDIV(src_count, dst_count) : 1); \
|
||||
wire [__count-1:0][width-1:0] __reduce_add_i_``src``field; \
|
||||
wire [width-1:0] __reduce_add_o_``dst``field; \
|
||||
for (genvar __i = 0; __i < __count; ++__i) begin \
|
||||
assign __reduce_add_i_``src``field[__i] = ``src[__d * __count + __i].``field; \
|
||||
`define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (count > 1) begin \
|
||||
wire [count-1:0][width-1:0] __reduce_add_i_field; \
|
||||
wire [width-1:0] __reduce_add_o_field; \
|
||||
for (genvar __i = 0; __i < count; ++__i) begin \
|
||||
assign __reduce_add_i_field[__i] = src[__i].``field; \
|
||||
end \
|
||||
VX_reduce #(.DATAW_IN(width), .N(__count), .OP("+")) __reduce_add_``dst``field ( \
|
||||
__reduce_add_i_``src``field, \
|
||||
__reduce_add_o_``dst``field \
|
||||
VX_reduce #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_field ( \
|
||||
__reduce_add_i_field, \
|
||||
__reduce_add_o_field \
|
||||
); \
|
||||
if (reg_enable) begin \
|
||||
reg [width-1:0] __reduce_add_r_``dst``field; \
|
||||
reg [width-1:0] __reduce_add_r_field; \
|
||||
always @(posedge clk) begin \
|
||||
if (reset) begin \
|
||||
__reduce_add_r_``dst``field <= '0; \
|
||||
__reduce_add_r_field <= '0; \
|
||||
end else begin \
|
||||
__reduce_add_r_``dst``field <= __reduce_add_o_``dst``field; \
|
||||
__reduce_add_r_field <= __reduce_add_o_field; \
|
||||
end \
|
||||
end \
|
||||
assign ``dst[__d].``field = __reduce_add_r_``dst``field; \
|
||||
assign dst.``field = __reduce_add_r_field; \
|
||||
end else begin \
|
||||
assign ``dst[__d].``field = __reduce_add_o_``dst``field; \
|
||||
assign dst.``field = __reduce_add_o_field; \
|
||||
end \
|
||||
end
|
||||
end else begin \
|
||||
assign dst.``field = src[0].``field; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (block_size != 1) begin \
|
||||
if (block_size != `NUM_WARPS) begin \
|
||||
assign dst = {src[`NW_WIDTH-1:`CLOG2(block_size)], `CLOG2(block_size)'(block_idx)}; \
|
||||
|
@ -424,22 +464,7 @@
|
|||
end \
|
||||
end else begin \
|
||||
assign dst = src; \
|
||||
end
|
||||
|
||||
`define TO_DISPATCH_DATA(data, tid) { \
|
||||
data.uuid, \
|
||||
data.wis, \
|
||||
data.tmask, \
|
||||
data.PC, \
|
||||
data.op_type, \
|
||||
data.op_args, \
|
||||
data.wb, \
|
||||
data.rd, \
|
||||
tid, \
|
||||
data.rs1_data, \
|
||||
data.rs2_data, \
|
||||
data.rs3_data}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`endif // VX_DEFINE_VH
|
||||
|
|
|
@ -60,6 +60,8 @@ package VX_gpu_pkg;
|
|||
logic [7:0] mpm_class;
|
||||
} base_dcrs_t;
|
||||
|
||||
//////////////////////////// Perf counter types ///////////////////////////
|
||||
|
||||
typedef struct packed {
|
||||
logic [`PERF_CTR_BITS-1:0] reads;
|
||||
logic [`PERF_CTR_BITS-1:0] writes;
|
||||
|
@ -77,48 +79,63 @@ package VX_gpu_pkg;
|
|||
logic [`PERF_CTR_BITS-1:0] latency;
|
||||
} mem_perf_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [`PERF_CTR_BITS-1:0] idles;
|
||||
logic [`PERF_CTR_BITS-1:0] stalls;
|
||||
} sched_perf_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [`PERF_CTR_BITS-1:0] ibf_stalls;
|
||||
logic [`PERF_CTR_BITS-1:0] scb_stalls;
|
||||
logic [`PERF_CTR_BITS-1:0] opd_stalls;
|
||||
logic [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] units_uses;
|
||||
logic [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] sfu_uses;
|
||||
} issue_perf_t;
|
||||
|
||||
//////////////////////// instruction arguments ////////////////////////////
|
||||
|
||||
typedef struct packed {
|
||||
logic use_PC;
|
||||
logic use_imm;
|
||||
logic is_w;
|
||||
logic [`ALU_TYPE_BITS-1:0] xtype;
|
||||
logic [`IMM_BITS-1:0] imm;
|
||||
} alu_mod_t;
|
||||
} alu_args_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [($bits(alu_mod_t)-`INST_FRM_BITS-`INST_FMT_BITS)-1:0] __padding;
|
||||
logic [($bits(alu_args_t)-`INST_FRM_BITS-`INST_FMT_BITS)-1:0] __padding;
|
||||
logic [`INST_FRM_BITS-1:0] frm;
|
||||
logic [`INST_FMT_BITS-1:0] fmt;
|
||||
} fpu_mod_t;
|
||||
} fpu_args_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [($bits(alu_mod_t)-1-1-`OFFSET_BITS)-1:0] __padding;
|
||||
logic [($bits(alu_args_t)-1-1-`OFFSET_BITS)-1:0] __padding;
|
||||
logic is_store;
|
||||
logic is_float;
|
||||
logic [`OFFSET_BITS-1:0] offset;
|
||||
} lsu_mod_t;
|
||||
} lsu_args_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [($bits(alu_mod_t)-1-`VX_CSR_ADDR_BITS-5)-1:0] __padding;
|
||||
logic [($bits(alu_args_t)-1-`VX_CSR_ADDR_BITS-5)-1:0] __padding;
|
||||
logic use_imm;
|
||||
logic [`VX_CSR_ADDR_BITS-1:0] addr;
|
||||
logic [4:0] imm;
|
||||
} csr_mod_t;
|
||||
} csr_args_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [($bits(alu_mod_t)-1)-1:0] __padding;
|
||||
logic [($bits(alu_args_t)-1)-1:0] __padding;
|
||||
logic is_neg;
|
||||
} wctl_mod_t;
|
||||
} wctl_args_t;
|
||||
|
||||
typedef union packed {
|
||||
alu_mod_t alu;
|
||||
fpu_mod_t fpu;
|
||||
lsu_mod_t lsu;
|
||||
csr_mod_t csr;
|
||||
wctl_mod_t wctl;
|
||||
alu_args_t alu;
|
||||
fpu_args_t fpu;
|
||||
lsu_args_t lsu;
|
||||
csr_args_t csr;
|
||||
wctl_args_t wctl;
|
||||
} op_args_t;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
|
||||
///////////////////////// LSU memory Parameters ///////////////////////////
|
||||
|
||||
|
@ -129,6 +146,31 @@ package VX_gpu_pkg;
|
|||
localparam LSU_TAG_WIDTH = (`UUID_WIDTH + LSU_TAG_ID_BITS);
|
||||
localparam LSU_NUM_REQS = `NUM_LSU_BLOCKS * `NUM_LSU_LANES;
|
||||
|
||||
////////////////////////// Icache Parameters //////////////////////////////
|
||||
|
||||
// Word size in bytes
|
||||
localparam ICACHE_WORD_SIZE = 4;
|
||||
localparam ICACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(ICACHE_WORD_SIZE));
|
||||
|
||||
// Block size in bytes
|
||||
localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE;
|
||||
|
||||
// Core request tag Id bits
|
||||
localparam ICACHE_TAG_ID_BITS = `NW_WIDTH;
|
||||
|
||||
// Core request tag bits
|
||||
localparam ICACHE_TAG_WIDTH = (`UUID_WIDTH + ICACHE_TAG_ID_BITS);
|
||||
|
||||
// Memory request data bits
|
||||
localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef ICACHE_ENABLE
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES, `UUID_WIDTH);
|
||||
`else
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
|
||||
`endif
|
||||
|
||||
////////////////////////// Dcache Parameters //////////////////////////////
|
||||
|
||||
// Word size in bytes
|
||||
|
@ -154,36 +196,11 @@ package VX_gpu_pkg;
|
|||
localparam DCACHE_MEM_DATA_WIDTH = (DCACHE_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef DCACHE_ENABLE
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
||||
`else
|
||||
`ifdef DCACHE_ENABLE
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES, `UUID_WIDTH);
|
||||
`else
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
||||
`endif
|
||||
|
||||
////////////////////////// Icache Parameters //////////////////////////////
|
||||
|
||||
// Word size in bytes
|
||||
localparam ICACHE_WORD_SIZE = 4;
|
||||
localparam ICACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(ICACHE_WORD_SIZE));
|
||||
|
||||
// Block size in bytes
|
||||
localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE;
|
||||
|
||||
// Core request tag Id bits
|
||||
localparam ICACHE_TAG_ID_BITS = `NW_WIDTH;
|
||||
|
||||
// Core request tag bits
|
||||
localparam ICACHE_TAG_WIDTH = (`UUID_WIDTH + ICACHE_TAG_ID_BITS);
|
||||
|
||||
// Memory request data bits
|
||||
localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef ICACHE_ENABLE
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES);
|
||||
`else
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
|
||||
`endif
|
||||
`endif
|
||||
|
||||
/////////////////////////////// L1 Parameters /////////////////////////////
|
||||
|
||||
|
@ -208,11 +225,11 @@ package VX_gpu_pkg;
|
|||
localparam L2_MEM_DATA_WIDTH = (`L2_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef L2_ENABLE
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
||||
`else
|
||||
`ifdef L2_ENABLE
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH, `UUID_WIDTH);
|
||||
`else
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
||||
`endif
|
||||
`endif
|
||||
|
||||
/////////////////////////////// L3 Parameters /////////////////////////////
|
||||
|
||||
|
@ -229,23 +246,20 @@ package VX_gpu_pkg;
|
|||
localparam L3_MEM_DATA_WIDTH = (`L3_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef L3_ENABLE
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
||||
`else
|
||||
`ifdef L3_ENABLE
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH, `UUID_WIDTH);
|
||||
`else
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
||||
`endif
|
||||
|
||||
/* verilator lint_on UNUSED */
|
||||
`endif
|
||||
|
||||
/////////////////////////////// Issue parameters //////////////////////////
|
||||
|
||||
localparam ISSUE_ISW = `CLOG2(`ISSUE_WIDTH);
|
||||
localparam ISSUE_ISW_W = `UP(ISSUE_ISW);
|
||||
localparam ISSUE_RATIO = `NUM_WARPS / `ISSUE_WIDTH;
|
||||
localparam ISSUE_WIS = `CLOG2(ISSUE_RATIO);
|
||||
localparam PER_ISSUE_WARPS = `NUM_WARPS / `ISSUE_WIDTH;
|
||||
localparam ISSUE_WIS = `CLOG2(PER_ISSUE_WARPS);
|
||||
localparam ISSUE_WIS_W = `UP(ISSUE_WIS);
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
function logic [`NW_WIDTH-1:0] wis_to_wid(
|
||||
input logic [ISSUE_WIS_W-1:0] wis,
|
||||
input logic [ISSUE_ISW_W-1:0] isw
|
||||
|
@ -278,8 +292,446 @@ package VX_gpu_pkg;
|
|||
wid_to_wis = 0;
|
||||
end
|
||||
endfunction
|
||||
|
||||
///////////////////////// Miscaellaneous functions ////////////////////////
|
||||
|
||||
function logic [`SFU_WIDTH-1:0] op_to_sfu_type(
|
||||
input logic [`INST_OP_BITS-1:0] op_type
|
||||
);
|
||||
case (op_type)
|
||||
`INST_SFU_CSRRW,
|
||||
`INST_SFU_CSRRS,
|
||||
`INST_SFU_CSRRC: op_to_sfu_type = `SFU_CSRS;
|
||||
default: op_to_sfu_type = `SFU_WCTL;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
////////////////////////////////// Tracing ////////////////////////////////////
|
||||
|
||||
`ifdef SIMULATION
|
||||
|
||||
`ifdef SV_DPI
|
||||
import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/);
|
||||
`endif
|
||||
|
||||
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
|
||||
case (ex_type)
|
||||
`EX_ALU: `TRACE(level, ("ALU"))
|
||||
`EX_LSU: `TRACE(level, ("LSU"))
|
||||
`EX_SFU: `TRACE(level, ("SFU"))
|
||||
`ifdef EXT_F_ENABLE
|
||||
`EX_FPU: `TRACE(level, ("FPU"))
|
||||
`endif
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_ex_op(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
case (op_args.alu.xtype)
|
||||
`ALU_TYPE_ARITH: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDIW"))
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLIW"))
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLIW"))
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAIW"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDW"))
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUBW"))
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLW"))
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLW"))
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAW"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDI"))
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLI"))
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLI"))
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAI"))
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLTI"))
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"))
|
||||
`INST_ALU_XOR: `TRACE(level, ("XORI"))
|
||||
`INST_ALU_OR: `TRACE(level, ("ORI"))
|
||||
`INST_ALU_AND: `TRACE(level, ("ANDI"))
|
||||
`INST_ALU_LUI: `TRACE(level, ("LUI"))
|
||||
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADD"))
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUB"))
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLL"))
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRL"))
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRA"))
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLT"))
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTU"))
|
||||
`INST_ALU_XOR: `TRACE(level, ("XOR"))
|
||||
`INST_ALU_OR: `TRACE(level, ("OR"))
|
||||
`INST_ALU_AND: `TRACE(level, ("AND"))
|
||||
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"))
|
||||
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
`ALU_TYPE_BRANCH: begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: `TRACE(level, ("BEQ"))
|
||||
`INST_BR_NE: `TRACE(level, ("BNE"))
|
||||
`INST_BR_LT: `TRACE(level, ("BLT"))
|
||||
`INST_BR_GE: `TRACE(level, ("BGE"))
|
||||
`INST_BR_LTU: `TRACE(level, ("BLTU"))
|
||||
`INST_BR_GEU: `TRACE(level, ("BGEU"))
|
||||
`INST_BR_JAL: `TRACE(level, ("JAL"))
|
||||
`INST_BR_JALR: `TRACE(level, ("JALR"))
|
||||
`INST_BR_ECALL: `TRACE(level, ("ECALL"))
|
||||
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"))
|
||||
`INST_BR_URET: `TRACE(level, ("URET"))
|
||||
`INST_BR_SRET: `TRACE(level, ("SRET"))
|
||||
`INST_BR_MRET: `TRACE(level, ("MRET"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
`ALU_TYPE_MULDIV: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MULW"))
|
||||
`INST_M_DIV: `TRACE(level, ("DIVW"))
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVUW"))
|
||||
`INST_M_REM: `TRACE(level, ("REMW"))
|
||||
`INST_M_REMU: `TRACE(level, ("REMUW"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MUL"))
|
||||
`INST_M_MULH: `TRACE(level, ("MULH"))
|
||||
`INST_M_MULHSU:`TRACE(level, ("MULHSU"))
|
||||
`INST_M_MULHU: `TRACE(level, ("MULHU"))
|
||||
`INST_M_DIV: `TRACE(level, ("DIV"))
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVU"))
|
||||
`INST_M_REM: `TRACE(level, ("REM"))
|
||||
`INST_M_REMU: `TRACE(level, ("REMU"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
`EX_LSU: begin
|
||||
if (op_args.lsu.is_float) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LW: `TRACE(level, ("FLW"))
|
||||
`INST_LSU_LD: `TRACE(level, ("FLD"))
|
||||
`INST_LSU_SW: `TRACE(level, ("FSW"))
|
||||
`INST_LSU_SD: `TRACE(level, ("FSD"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: `TRACE(level, ("LB"))
|
||||
`INST_LSU_LH: `TRACE(level, ("LH"))
|
||||
`INST_LSU_LW: `TRACE(level, ("LW"))
|
||||
`INST_LSU_LD: `TRACE(level, ("LD"))
|
||||
`INST_LSU_LBU:`TRACE(level, ("LBU"))
|
||||
`INST_LSU_LHU:`TRACE(level, ("LHU"))
|
||||
`INST_LSU_LWU:`TRACE(level, ("LWU"))
|
||||
`INST_LSU_SB: `TRACE(level, ("SB"))
|
||||
`INST_LSU_SH: `TRACE(level, ("SH"))
|
||||
`INST_LSU_SW: `TRACE(level, ("SW"))
|
||||
`INST_LSU_SD: `TRACE(level, ("SD"))
|
||||
`INST_LSU_FENCE:`TRACE(level,("FENCE"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_SFU: begin
|
||||
case (`INST_SFU_BITS'(op_type))
|
||||
`INST_SFU_TMC: `TRACE(level, ("TMC"))
|
||||
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"))
|
||||
`INST_SFU_SPLIT: begin
|
||||
if (op_args.wctl.is_neg) begin
|
||||
`TRACE(level, ("SPLIT.N"))
|
||||
end else begin
|
||||
`TRACE(level, ("SPLIT"))
|
||||
end
|
||||
end
|
||||
`INST_SFU_JOIN: `TRACE(level, ("JOIN"))
|
||||
`INST_SFU_BAR: `TRACE(level, ("BAR"))
|
||||
`INST_SFU_PRED: begin
|
||||
if (op_args.wctl.is_neg) begin
|
||||
`TRACE(level, ("PRED.N"))
|
||||
end else begin
|
||||
`TRACE(level, ("PRED"))
|
||||
end
|
||||
end
|
||||
`INST_SFU_CSRRW: begin
|
||||
if (op_args.csr.use_imm) begin
|
||||
`TRACE(level, ("CSRRWI"))
|
||||
end else begin
|
||||
`TRACE(level, ("CSRRW"))
|
||||
end
|
||||
end
|
||||
`INST_SFU_CSRRS: begin
|
||||
if (op_args.csr.use_imm) begin
|
||||
`TRACE(level, ("CSRRSI"))
|
||||
end else begin
|
||||
`TRACE(level, ("CSRRS"))
|
||||
end
|
||||
end
|
||||
`INST_SFU_CSRRC: begin
|
||||
if (op_args.csr.use_imm) begin
|
||||
`TRACE(level, ("CSRRCI"))
|
||||
end else begin
|
||||
`TRACE(level, ("CSRRC"))
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`EX_FPU: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FSUB.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FSUB.S"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FADD.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FADD.S"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MADD: begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FMSUB.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FMSUB.S"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FMADD.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FMADD.S"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_NMADD: begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FNMSUB.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FNMSUB.S"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FNMADD.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FNMADD.S"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MUL: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FMUL.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FMUL.S"))
|
||||
end
|
||||
end
|
||||
`INST_FPU_DIV: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FDIV.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FDIV.S"))
|
||||
end
|
||||
end
|
||||
`INST_FPU_SQRT: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FSQRT.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FSQRT.S"))
|
||||
end
|
||||
end
|
||||
`INST_FPU_CMP: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.D"))
|
||||
1: `TRACE(level, ("FLT.D"))
|
||||
2: `TRACE(level, ("FEQ.D"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.S"))
|
||||
1: `TRACE(level, ("FLT.S"))
|
||||
2: `TRACE(level, ("FEQ.S"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FCVT.D.S"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.D"))
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2I: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.D"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.S"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.S"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2U: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.D"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.D"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.S"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.S"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_I2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.L"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.W"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.L"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.W"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_U2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.LU"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.WU"))
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.LU"))
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.WU"))
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MISC: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.D"))
|
||||
1: `TRACE(level, ("FSGNJN.D"))
|
||||
2: `TRACE(level, ("FSGNJX.D"))
|
||||
3: `TRACE(level, ("FCLASS.D"))
|
||||
4: `TRACE(level, ("FMV.X.D"))
|
||||
5: `TRACE(level, ("FMV.D.X"))
|
||||
6: `TRACE(level, ("FMIN.D"))
|
||||
7: `TRACE(level, ("FMAX.D"))
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.S"))
|
||||
1: `TRACE(level, ("FSGNJN.S"))
|
||||
2: `TRACE(level, ("FSGNJX.S"))
|
||||
3: `TRACE(level, ("FCLASS.S"))
|
||||
4: `TRACE(level, ("FMV.X.S"))
|
||||
5: `TRACE(level, ("FMV.S.X"))
|
||||
6: `TRACE(level, ("FMIN.S"))
|
||||
7: `TRACE(level, ("FMAX.S"))
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
end
|
||||
`endif
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_op_args(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
`TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm))
|
||||
end
|
||||
`EX_LSU: begin
|
||||
`TRACE(level, (", offset=0x%0h", op_args.lsu.offset))
|
||||
end
|
||||
`EX_SFU: begin
|
||||
if (`INST_SFU_IS_CSR(op_type)) begin
|
||||
`TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm))
|
||||
end
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`EX_FPU: begin
|
||||
`TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm))
|
||||
end
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
|
||||
case (addr)
|
||||
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"))
|
||||
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"))
|
||||
`VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0"))
|
||||
`VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1"))
|
||||
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"))
|
||||
default: `TRACE(level, ("?"))
|
||||
endcase
|
||||
endtask
|
||||
|
||||
`endif
|
||||
|
||||
endpackage
|
||||
|
||||
`endif // VX_GPU_PKG_VH
|
||||
|
|
|
@ -22,36 +22,39 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef VIVADO
|
||||
`define STRING
|
||||
`else
|
||||
`define STRING string
|
||||
`endif
|
||||
`ifdef SIMULATION
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
`define TRACING_ON
|
||||
`define TRACING_OFF
|
||||
`ifndef NDEBUG
|
||||
`define DEBUG_BLOCK(x) x
|
||||
`else
|
||||
`define DEBUG_BLOCK(x)
|
||||
`endif
|
||||
`define IGNORE_UNOPTFLAT_BEGIN
|
||||
`define IGNORE_UNOPTFLAT_END
|
||||
`define IGNORE_UNUSED_BEGIN
|
||||
`define IGNORE_UNUSED_END
|
||||
`define IGNORE_WARNINGS_BEGIN
|
||||
`define IGNORE_WARNINGS_END
|
||||
`define UNUSED_PARAM(x)
|
||||
`define UNUSED_SPARAM(x)
|
||||
`define UNUSED_VAR(x)
|
||||
`define UNUSED_PIN(x) . x ()
|
||||
`define UNUSED_ARG(x) x
|
||||
`define TRACE(level, args) $write args
|
||||
`else
|
||||
`ifdef VERILATOR
|
||||
`define STATIC_ASSERT(cond, msg) \
|
||||
generate \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (!(cond)) $error msg; \
|
||||
/* verilator lint_on GENUNNAMED */ \
|
||||
endgenerate
|
||||
|
||||
`define ERROR(msg) \
|
||||
$error msg
|
||||
|
||||
`define ASSERT(cond, msg) \
|
||||
assert(cond) else $error msg
|
||||
|
||||
`define RUNTIME_ASSERT(cond, msg) \
|
||||
always @(posedge clk) begin \
|
||||
assert(cond) else $error msg; \
|
||||
end
|
||||
|
||||
`define __SCOPE
|
||||
`define __SCOPE_X
|
||||
`define __SCOPE_ON
|
||||
`define __SCOPE_OFF
|
||||
|
||||
`ifndef TRACING_ALL
|
||||
`define TRACING_ON /* verilator tracing_on */
|
||||
`define TRACING_OFF /* verilator tracing_off */
|
||||
`else
|
||||
`define TRACING_ON
|
||||
`define TRACING_OFF
|
||||
`endif
|
||||
|
||||
`ifndef NDEBUG
|
||||
`define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \
|
||||
x \
|
||||
|
@ -100,43 +103,68 @@
|
|||
localparam `STRING __``x = x; \
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
`define UNUSED_VAR(x) if (1) begin \
|
||||
`define UNUSED_VAR(x) /* verilator lint_off GENUNNAMED */ \
|
||||
if (1) begin \
|
||||
/* verilator lint_off UNUSED */ \
|
||||
wire [$bits(x)-1:0] __x = x; \
|
||||
/* verilator lint_on UNUSED */ \
|
||||
end
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \
|
||||
. x () \
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
`define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \
|
||||
x \
|
||||
/* verilator lint_on UNUSED */
|
||||
`define TRACE(level, args) dpi_trace(level, $sformatf args)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef SIMULATION
|
||||
`define STATIC_ASSERT(cond, msg) \
|
||||
generate \
|
||||
if (!(cond)) $error msg; \
|
||||
endgenerate
|
||||
|
||||
`define ERROR(msg) \
|
||||
$error msg
|
||||
|
||||
`define ASSERT(cond, msg) \
|
||||
assert(cond) else $error msg
|
||||
|
||||
`define RUNTIME_ASSERT(cond, msg) \
|
||||
always @(posedge clk) begin \
|
||||
assert(cond) else $error msg; \
|
||||
end
|
||||
`ifdef SV_DPI
|
||||
`define TRACE(level, args) dpi_trace(level, $sformatf args);
|
||||
`else
|
||||
`define STATIC_ASSERT(cond, msg)
|
||||
`define ERROR(msg) //
|
||||
`define ASSERT(cond, msg) //
|
||||
`define RUNTIME_ASSERT(cond, msg)
|
||||
`define TRACE(level, args) \
|
||||
if (level <= `DEBUG_LEVEL) begin \
|
||||
$write args; \
|
||||
end
|
||||
`endif
|
||||
|
||||
`else // SYNTHESIS
|
||||
|
||||
`define STATIC_ASSERT(cond, msg)
|
||||
`define ERROR(msg) //
|
||||
`define ASSERT(cond, msg) //
|
||||
`define RUNTIME_ASSERT(cond, msg)
|
||||
|
||||
`define DEBUG_BLOCK(x)
|
||||
`define TRACE(level, args)
|
||||
|
||||
`define TRACING_ON
|
||||
`define TRACING_OFF
|
||||
|
||||
`define IGNORE_UNOPTFLAT_BEGIN
|
||||
`define IGNORE_UNOPTFLAT_END
|
||||
`define IGNORE_UNUSED_BEGIN
|
||||
`define IGNORE_UNUSED_END
|
||||
`define IGNORE_WARNINGS_BEGIN
|
||||
`define IGNORE_WARNINGS_END
|
||||
`define UNUSED_PARAM(x)
|
||||
`define UNUSED_SPARAM(x)
|
||||
`define UNUSED_VAR(x)
|
||||
`define UNUSED_PIN(x) . x ()
|
||||
`define UNUSED_ARG(x) x
|
||||
|
||||
`define __SCOPE (* mark_debug="true" *)
|
||||
|
||||
`define __SCOPE_X
|
||||
|
||||
`define __SCOPE_ON \
|
||||
`undef __SCOPE_X \
|
||||
`define __SCOPE_X `__SCOPE
|
||||
|
||||
`define __SCOPE_OFF \
|
||||
`undef __SCOPE_X \
|
||||
`define __SCOPE_X
|
||||
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -148,6 +176,7 @@
|
|||
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
|
||||
`define DISABLE_BRAM (* ramstyle = "logic" *)
|
||||
`define PRESERVE_NET (* preserve *)
|
||||
`define STRING string
|
||||
`elsif VIVADO
|
||||
`define MAX_FANOUT 8
|
||||
`define IF_DATA_SIZE(x) $bits(x.data)
|
||||
|
@ -155,6 +184,7 @@
|
|||
`define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *)
|
||||
`define DISABLE_BRAM (* ram_style = "registers" *)
|
||||
`define PRESERVE_NET (* keep = "true" *)
|
||||
`define STRING
|
||||
`else
|
||||
`define MAX_FANOUT 8
|
||||
`define IF_DATA_SIZE(x) x.DATA_WIDTH
|
||||
|
@ -162,6 +192,7 @@
|
|||
`define NO_RW_RAM_CHECK
|
||||
`define DISABLE_BRAM
|
||||
`define PRESERVE_NET
|
||||
`define STRING string
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -198,23 +229,23 @@
|
|||
`define SEXT(len, x) {{(len-$bits(x)+1){x[$bits(x)-1]}}, x[$bits(x)-2:0]}
|
||||
|
||||
`define TRACE_ARRAY1D(lvl, fmt, arr, n) \
|
||||
`TRACE(lvl, ("{")); \
|
||||
`TRACE(lvl, ("{")) \
|
||||
for (integer __i = (n-1); __i >= 0; --__i) begin \
|
||||
if (__i != (n-1)) `TRACE(lvl, (", ")); \
|
||||
`TRACE(lvl, (fmt, arr[__i])); \
|
||||
if (__i != (n-1)) `TRACE(lvl, (", ")) \
|
||||
`TRACE(lvl, (fmt, arr[__i])) \
|
||||
end \
|
||||
`TRACE(lvl, ("}"));
|
||||
`TRACE(lvl, ("}"))
|
||||
|
||||
`define TRACE_ARRAY2D(lvl, fmt, arr, m, n) \
|
||||
`TRACE(lvl, ("{")); \
|
||||
`TRACE(lvl, ("{")) \
|
||||
for (integer __i = n-1; __i >= 0; --__i) begin \
|
||||
if (__i != (n-1)) `TRACE(lvl, (", ")); \
|
||||
`TRACE(lvl, ("{")); \
|
||||
if (__i != (n-1)) `TRACE(lvl, (", ")) \
|
||||
`TRACE(lvl, ("{")) \
|
||||
for (integer __j = (m-1); __j >= 0; --__j) begin \
|
||||
if (__j != (m-1)) `TRACE(lvl, (", "));\
|
||||
`TRACE(lvl, (fmt, arr[__i][__j])); \
|
||||
if (__j != (m-1)) `TRACE(lvl, (", "))\
|
||||
`TRACE(lvl, (fmt, arr[__i][__j])) \
|
||||
end \
|
||||
`TRACE(lvl, ("}")); \
|
||||
`TRACE(lvl, ("}")) \
|
||||
end \
|
||||
`TRACE(lvl, ("}"))
|
||||
|
||||
|
@ -232,11 +263,14 @@
|
|||
`define RESET_RELAY(dst, src) \
|
||||
`RESET_RELAY_EX (dst, src, 1, 0)
|
||||
|
||||
// size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2
|
||||
`define TO_OUT_BUF_SIZE(out_reg) `MIN(out_reg, 2)
|
||||
// size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2, 5 -> 2
|
||||
`define TO_OUT_BUF_SIZE(s) `MIN(s & 7, 2)
|
||||
|
||||
// reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2
|
||||
`define TO_OUT_BUF_REG(out_reg) ((out_reg & 1) + ((out_reg >> 2) << 1))
|
||||
// reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2, 5 > 3
|
||||
`define TO_OUT_BUF_REG(s) (((s & 7) < 2) ? (s & 7) : ((s & 7) - 2))
|
||||
|
||||
// lut(x): (x & 8) != 0
|
||||
`define TO_OUT_BUF_LUTRAM(s) ((s & 8) != 0)
|
||||
|
||||
`define REPEAT(n,f,s) `_REPEAT_``n(f,s)
|
||||
`define _REPEAT_0(f,s)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,48 +21,67 @@
|
|||
input wire scope_bus_in, \
|
||||
output wire scope_bus_out,
|
||||
|
||||
`define SCOPE_IO_SWITCH(__count) \
|
||||
wire scope_bus_in_w [__count]; \
|
||||
wire scope_bus_out_w [__count]; \
|
||||
`RESET_RELAY_EX(scope_reset_w, scope_reset, __count, 4); \
|
||||
VX_scope_switch #( \
|
||||
.N (__count) \
|
||||
) scope_switch ( \
|
||||
.clk (clk), \
|
||||
.reset (scope_reset), \
|
||||
.req_in (scope_bus_in), \
|
||||
.rsp_out (scope_bus_out), \
|
||||
.req_out (scope_bus_in_w), \
|
||||
.rsp_in (scope_bus_out_w) \
|
||||
);
|
||||
|
||||
`define SCOPE_IO_BIND(__i) \
|
||||
.scope_reset (scope_reset_w[__i]), \
|
||||
.scope_bus_in (scope_bus_in_w[__i]), \
|
||||
.scope_bus_out (scope_bus_out_w[__i]),
|
||||
|
||||
`define SCOPE_IO_UNUSED() \
|
||||
`UNUSED_VAR (scope_reset); \
|
||||
`UNUSED_VAR (scope_bus_in); \
|
||||
assign scope_bus_out = 0;
|
||||
|
||||
`define SCOPE_IO_UNUSED_W(__i) \
|
||||
`define SCOPE_IO_UNUSED(__i) \
|
||||
`UNUSED_VAR (scope_reset_w[__i]); \
|
||||
`UNUSED_VAR (scope_bus_in_w[__i]); \
|
||||
assign scope_bus_out_w[__i] = 0;
|
||||
|
||||
`define SCOPE_IO_SWITCH(__count) \
|
||||
wire [__count-1:0] scope_bus_in_w; \
|
||||
wire [__count-1:0] scope_bus_out_w; \
|
||||
wire [__count-1:0] scope_reset_w = {__count{scope_reset}}; \
|
||||
VX_scope_switch #( \
|
||||
.N (__count) \
|
||||
) scope_switch ( \
|
||||
.clk (clk), \
|
||||
.reset (scope_reset), \
|
||||
.req_in (scope_bus_in), \
|
||||
.rsp_out (scope_bus_out), \
|
||||
.req_out (scope_bus_in_w), \
|
||||
.rsp_in (scope_bus_out_w) \
|
||||
)
|
||||
|
||||
`define SCOPE_TAP_EX(__idx, __id, __xtriggers_w, __htriggers_w, __probes_w, __xtriggers, __htriggers, __probes, __start, __stop, __depth) \
|
||||
VX_scope_tap #( \
|
||||
.SCOPE_ID (__id), \
|
||||
.XTRIGGERW(__xtriggers_w), \
|
||||
.HTRIGGERW(__htriggers_w), \
|
||||
.PROBEW (__probes_w), \
|
||||
.DEPTH (__depth) \
|
||||
) scope_tap_``idx ( \
|
||||
.clk (clk), \
|
||||
.reset (scope_reset_w[__idx]), \
|
||||
.start (__start), \
|
||||
.stop (__stop), \
|
||||
.xtriggers(__xtriggers), \
|
||||
.htriggers(__htriggers), \
|
||||
.probes (__probes), \
|
||||
.bus_in (scope_bus_in_w[__idx]), \
|
||||
.bus_out(scope_bus_out_w[__idx]) \
|
||||
)
|
||||
|
||||
`define SCOPE_TAP(__idx, __id, __xtriggers, __htriggers, __probes, __start, __stop, __depth) \
|
||||
`SCOPE_TAP_EX(__idx, __id, $bits(__xtriggers), $bits(__htriggers), $bits(__probes), __xtriggers, __htriggers, __probes, __start, __stop, __depth)
|
||||
|
||||
`else
|
||||
|
||||
`define SCOPE_IO_DECL
|
||||
|
||||
`define SCOPE_IO_SWITCH(__count)
|
||||
|
||||
`define SCOPE_IO_BIND(__i)
|
||||
|
||||
`define SCOPE_IO_UNUSED_W(__i)
|
||||
|
||||
`define SCOPE_IO_UNUSED(__i)
|
||||
|
||||
`define SCOPE_IO_SWITCH(__count)
|
||||
|
||||
`define SCOPE_TAP(__idx, __id, __xtriggers, __probes, __depth)
|
||||
|
||||
`define SCOPE_TAP_EX(__idx, __id, __xtriggers_w, __probes_w, __xtriggers, __probes, __depth)
|
||||
|
||||
`endif
|
||||
|
||||
`endif // VX_SCOPE_VH
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_socket import VX_gpu_pkg::*; #(
|
||||
parameter SOCKET_ID = 0
|
||||
parameter SOCKET_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -40,17 +41,20 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_core = 0;
|
||||
`SCOPE_IO_SWITCH (`SOCKET_SIZE);
|
||||
`endif
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE]();
|
||||
|
||||
`RESET_RELAY (gbar_arb_reset, reset);
|
||||
|
||||
VX_gbar_arb #(
|
||||
.NUM_REQS (`SOCKET_SIZE),
|
||||
.OUT_BUF ((`SOCKET_SIZE > 1) ? 2 : 0)
|
||||
) gbar_arb (
|
||||
.clk (clk),
|
||||
.reset (gbar_arb_reset),
|
||||
.reset (reset),
|
||||
.bus_in_if (per_core_gbar_bus_if),
|
||||
.bus_out_if (gbar_bus_if)
|
||||
);
|
||||
|
@ -81,7 +85,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (icache_reset, reset);
|
||||
|
||||
VX_cache_cluster #(
|
||||
.INSTANCE_ID ($sformatf("socket%0d-icache", SOCKET_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-icache", INSTANCE_ID)),
|
||||
.NUM_UNITS (`NUM_ICACHES),
|
||||
.NUM_INPUTS (`SOCKET_SIZE),
|
||||
.TAG_SEL_IDX (0),
|
||||
|
@ -99,7 +103,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.WRITE_ENABLE (0),
|
||||
.NC_ENABLE (0),
|
||||
.CORE_OUT_BUF (2),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (2)
|
||||
) icache (
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -126,7 +130,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (dcache_reset, reset);
|
||||
|
||||
VX_cache_cluster #(
|
||||
.INSTANCE_ID ($sformatf("socket%0d-dcache", SOCKET_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-dcache", INSTANCE_ID)),
|
||||
.NUM_UNITS (`NUM_DCACHES),
|
||||
.NUM_INPUTS (`SOCKET_SIZE),
|
||||
.TAG_SEL_IDX (0),
|
||||
|
@ -139,12 +143,14 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.CRSQ_SIZE (`DCACHE_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`DCACHE_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`DCACHE_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`DCACHE_MREQ_SIZE),
|
||||
.MREQ_SIZE (`DCACHE_WRITEBACK ? `DCACHE_MSHR_SIZE : `DCACHE_MREQ_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`DCACHE_WRITEBACK),
|
||||
.DIRTY_BYTES (`DCACHE_WRITEBACK),
|
||||
.NC_ENABLE (1),
|
||||
.CORE_OUT_BUF (`LMEM_ENABLED ? 2 : 1),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (2)
|
||||
) dcache (
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -171,19 +177,17 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[0], icache_mem_bus_if, L1_MEM_TAG_WIDTH, ICACHE_MEM_TAG_WIDTH);
|
||||
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[1], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
|
||||
|
||||
`RESET_RELAY (mem_arb_reset, reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATA_SIZE (`L1_LINE_SIZE),
|
||||
.TAG_WIDTH (L1_MEM_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (0),
|
||||
.ARBITER ("R"),
|
||||
.REQ_OUT_BUF (2),
|
||||
.RSP_OUT_BUF (2)
|
||||
.NUM_INPUTS (2),
|
||||
.DATA_SIZE (`L1_LINE_SIZE),
|
||||
.TAG_WIDTH (L1_MEM_TAG_WIDTH),
|
||||
.TAG_SEL_IDX(0),
|
||||
.ARBITER ("P"), // prioritize the icache
|
||||
.REQ_OUT_BUF(3),
|
||||
.RSP_OUT_BUF(3)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_arb_reset),
|
||||
.reset (reset),
|
||||
.bus_in_if (l1_mem_bus_if),
|
||||
.bus_out_if (l1_mem_arb_bus_if)
|
||||
);
|
||||
|
@ -194,19 +198,19 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [`SOCKET_SIZE-1:0] per_core_busy;
|
||||
|
||||
`BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1));
|
||||
|
||||
`SCOPE_IO_SWITCH (`SOCKET_SIZE)
|
||||
|
||||
// Generate all cores
|
||||
for (genvar i = 0; i < `SOCKET_SIZE; ++i) begin
|
||||
for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : g_cores
|
||||
|
||||
`RESET_RELAY (core_reset, reset);
|
||||
|
||||
VX_dcr_bus_if core_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, 1'b1, (`SOCKET_SIZE > 1))
|
||||
|
||||
VX_core #(
|
||||
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + i)
|
||||
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id),
|
||||
.INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id))
|
||||
) core (
|
||||
`SCOPE_IO_BIND (i)
|
||||
`SCOPE_IO_BIND (scope_core + core_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (core_reset),
|
||||
|
@ -217,15 +221,15 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
|
||||
.dcr_bus_if (core_dcr_bus_if),
|
||||
|
||||
.dcache_bus_if (per_core_dcache_bus_if[i * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
|
||||
.dcache_bus_if (per_core_dcache_bus_if[core_id * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
|
||||
|
||||
.icache_bus_if (per_core_icache_bus_if[i]),
|
||||
.icache_bus_if (per_core_icache_bus_if[core_id]),
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (per_core_gbar_bus_if[i]),
|
||||
.gbar_bus_if (per_core_gbar_bus_if[core_id]),
|
||||
`endif
|
||||
|
||||
.busy (per_core_busy[i])
|
||||
.busy (per_core_busy[core_id])
|
||||
);
|
||||
end
|
||||
|
||||
|
|
|
@ -85,30 +85,31 @@
|
|||
`define VX_CSR_MPM_IBUF_ST_H 12'hB85
|
||||
`define VX_CSR_MPM_SCRB_ST 12'hB06
|
||||
`define VX_CSR_MPM_SCRB_ST_H 12'hB86
|
||||
`define VX_CSR_MPM_SCRB_ALU 12'hB07
|
||||
`define VX_CSR_MPM_SCRB_ALU_H 12'hB87
|
||||
`define VX_CSR_MPM_SCRB_FPU 12'hB08
|
||||
`define VX_CSR_MPM_SCRB_FPU_H 12'hB88
|
||||
`define VX_CSR_MPM_SCRB_LSU 12'hB09
|
||||
`define VX_CSR_MPM_SCRB_LSU_H 12'hB89
|
||||
`define VX_CSR_MPM_SCRB_SFU 12'hB0A
|
||||
`define VX_CSR_MPM_SCRB_SFU_H 12'hB8A
|
||||
`define VX_CSR_MPM_OPDS_ST 12'hB07
|
||||
`define VX_CSR_MPM_OPDS_ST_H 12'hB87
|
||||
`define VX_CSR_MPM_SCRB_ALU 12'hB08
|
||||
`define VX_CSR_MPM_SCRB_ALU_H 12'hB88
|
||||
`define VX_CSR_MPM_SCRB_FPU 12'hB09
|
||||
`define VX_CSR_MPM_SCRB_FPU_H 12'hB89
|
||||
`define VX_CSR_MPM_SCRB_LSU 12'hB0A
|
||||
`define VX_CSR_MPM_SCRB_LSU_H 12'hB8A
|
||||
`define VX_CSR_MPM_SCRB_SFU 12'hB0B
|
||||
`define VX_CSR_MPM_SCRB_SFU_H 12'hB8B
|
||||
`define VX_CSR_MPM_SCRB_CSRS 12'hB0C
|
||||
`define VX_CSR_MPM_SCRB_CSRS_H 12'hB8C
|
||||
`define VX_CSR_MPM_SCRB_WCTL 12'hB0D
|
||||
`define VX_CSR_MPM_SCRB_WCTL_H 12'hB8D
|
||||
// PERF: memory
|
||||
`define VX_CSR_MPM_IFETCHES 12'hB0B
|
||||
`define VX_CSR_MPM_IFETCHES_H 12'hB8B
|
||||
`define VX_CSR_MPM_LOADS 12'hB0C
|
||||
`define VX_CSR_MPM_LOADS_H 12'hB8C
|
||||
`define VX_CSR_MPM_STORES 12'hB0D
|
||||
`define VX_CSR_MPM_STORES_H 12'hB8D
|
||||
`define VX_CSR_MPM_IFETCH_LT 12'hB0E
|
||||
`define VX_CSR_MPM_IFETCH_LT_H 12'hB8E
|
||||
`define VX_CSR_MPM_LOAD_LT 12'hB0F
|
||||
`define VX_CSR_MPM_LOAD_LT_H 12'hB8F
|
||||
// SFU: scoreboard
|
||||
`define VX_CSR_MPM_SCRB_WCTL 12'hB10
|
||||
`define VX_CSR_MPM_SCRB_WCTL_H 12'hB90
|
||||
`define VX_CSR_MPM_SCRB_CSRS 12'hB11
|
||||
`define VX_CSR_MPM_SCRB_CSRS_H 12'hB91
|
||||
`define VX_CSR_MPM_IFETCHES 12'hB0E
|
||||
`define VX_CSR_MPM_IFETCHES_H 12'hB8E
|
||||
`define VX_CSR_MPM_LOADS 12'hB0F
|
||||
`define VX_CSR_MPM_LOADS_H 12'hB8F
|
||||
`define VX_CSR_MPM_STORES 12'hB10
|
||||
`define VX_CSR_MPM_STORES_H 12'hB90
|
||||
`define VX_CSR_MPM_IFETCH_LT 12'hB11
|
||||
`define VX_CSR_MPM_IFETCH_LT_H 12'hB91
|
||||
`define VX_CSR_MPM_LOAD_LT 12'hB12
|
||||
`define VX_CSR_MPM_LOAD_LT_H 12'hB92
|
||||
|
||||
// Machine Performance-monitoring memory counters (class 2) ///////////////////
|
||||
|
||||
|
@ -165,6 +166,10 @@
|
|||
`define VX_CSR_MPM_MEM_WRITES_H 12'hB99
|
||||
`define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency
|
||||
`define VX_CSR_MPM_MEM_LT_H 12'hB9A
|
||||
`define VX_CSR_MPM_MEM_BANK_CNTR 12'hB1E // memory bank requests
|
||||
`define VX_CSR_MPM_MEM_BANK_CNTR_H 12'hB9E
|
||||
`define VX_CSR_MPM_MEM_BANK_TICK 12'hB1F // memory ticks
|
||||
`define VX_CSR_MPM_MEM_BANK_TICK_H 12'hB9F
|
||||
// PERF: lmem
|
||||
`define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads
|
||||
`define VX_CSR_MPM_LMEM_READS_H 12'hB9B
|
||||
|
|
|
@ -44,6 +44,11 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_cluster = 0;
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS);
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_if();
|
||||
assign mem_perf_if.icache = 'x;
|
||||
|
@ -75,12 +80,14 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
.CRSQ_SIZE (`L3_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L3_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L3_MREQ_SIZE),
|
||||
.MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`L3_WRITEBACK),
|
||||
.DIRTY_BYTES (`L3_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (3),
|
||||
.NC_ENABLE (1),
|
||||
.PASSTHRU (!`L3_ENABLED)
|
||||
) l3cache (
|
||||
|
@ -102,7 +109,7 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
`UNUSED_VAR (mem_bus_if.req_data.atype)
|
||||
`UNUSED_VAR (mem_bus_if.req_data.flags)
|
||||
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_data.data = mem_rsp_data;
|
||||
|
@ -121,19 +128,19 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS)
|
||||
|
||||
// Generate all clusters
|
||||
for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin
|
||||
for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : g_clusters
|
||||
|
||||
`RESET_RELAY (cluster_reset, reset);
|
||||
|
||||
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
|
||||
VX_dcr_bus_if cluster_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, 1'b1, (`NUM_CLUSTERS > 1))
|
||||
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID (i)
|
||||
.CLUSTER_ID (cluster_id),
|
||||
.INSTANCE_ID ($sformatf("cluster%0d", cluster_id))
|
||||
) cluster (
|
||||
`SCOPE_IO_BIND (i)
|
||||
`SCOPE_IO_BIND (scope_cluster + cluster_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (cluster_reset),
|
||||
|
@ -144,9 +151,9 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
|
||||
.dcr_bus_if (cluster_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_cluster_mem_bus_if[i]),
|
||||
.mem_bus_if (per_cluster_mem_bus_if[cluster_id]),
|
||||
|
||||
.busy (per_cluster_busy[i])
|
||||
.busy (per_cluster_busy[cluster_id])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -182,16 +189,26 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
|
||||
`endif
|
||||
|
||||
// dump device configuration
|
||||
initial begin
|
||||
`TRACE(0, ("CONFIGS: num_threads=%0d, num_warps=%0d, num_cores=%0d, num_clusters=%0d, socket_size=%0d, local_mem_base=0x%0h, num_barriers=%0d\n",
|
||||
`NUM_THREADS, `NUM_WARPS, `NUM_CORES, `NUM_CLUSTERS, `SOCKET_SIZE, `LMEM_BASE_ADDR, `NUM_BARRIERS))
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_MEM
|
||||
wire [`UUID_WIDTH-1:0] mem_req_uuid = mem_req_tag[`VX_MEM_TAG_WIDTH-1 -: `UUID_WIDTH];
|
||||
wire [`UUID_WIDTH-1:0] mem_rsp_uuid = mem_rsp_tag[`VX_MEM_TAG_WIDTH-1 -: `UUID_WIDTH];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw)
|
||||
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data));
|
||||
else
|
||||
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen));
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid))
|
||||
end else begin
|
||||
`TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid))
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: MEM Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag, mem_rsp_data));
|
||||
`TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
module Vortex_axi import VX_gpu_pkg::*; #(
|
||||
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH + (`VX_MEM_DATA_WIDTH/8),
|
||||
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
|
||||
parameter AXI_NUM_BANKS = 1
|
||||
)(
|
||||
|
@ -82,9 +82,11 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
// Status
|
||||
output wire busy
|
||||
);
|
||||
`STATIC_ASSERT((AXI_DATA_WIDTH == `VX_MEM_DATA_WIDTH), ("invalid memory data size: current=%0d, expected=%0d", AXI_DATA_WIDTH, `VX_MEM_DATA_WIDTH))
|
||||
`STATIC_ASSERT((AXI_ADDR_WIDTH >= `MEM_ADDR_WIDTH), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH))
|
||||
//`STATIC_ASSERT((AXI_TID_WIDTH >= `VX_MEM_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, `VX_MEM_TAG_WIDTH))
|
||||
localparam DST_LDATAW = `CLOG2(AXI_DATA_WIDTH);
|
||||
localparam SRC_LDATAW = `CLOG2(`VX_MEM_DATA_WIDTH);
|
||||
localparam SUB_LDATAW = DST_LDATAW - SRC_LDATAW;
|
||||
localparam VX_MEM_TAG_A_WIDTH = `VX_MEM_TAG_WIDTH + `MAX(SUB_LDATAW, 0);
|
||||
localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH - SUB_LDATAW;
|
||||
|
||||
wire mem_req_valid;
|
||||
wire mem_req_rw;
|
||||
|
@ -99,95 +101,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_awaddr_unqual [AXI_NUM_BANKS];
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_araddr_unqual [AXI_NUM_BANKS];
|
||||
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_awid_unqual [AXI_NUM_BANKS];
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_arid_unqual [AXI_NUM_BANKS];
|
||||
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual [AXI_NUM_BANKS];
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual [AXI_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin
|
||||
assign m_axi_awaddr[i] = `MEM_ADDR_WIDTH'(m_axi_awaddr_unqual[i]);
|
||||
assign m_axi_araddr[i] = `MEM_ADDR_WIDTH'(m_axi_araddr_unqual[i]);
|
||||
|
||||
assign m_axi_awid[i] = AXI_TID_WIDTH'(m_axi_awid_unqual[i]);
|
||||
assign m_axi_arid[i] = AXI_TID_WIDTH'(m_axi_arid_unqual[i]);
|
||||
|
||||
assign m_axi_rid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_rid[i]);
|
||||
assign m_axi_bid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_bid[i]);
|
||||
end
|
||||
|
||||
VX_axi_adapter #(
|
||||
.DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`VX_MEM_TAG_WIDTH),
|
||||
.NUM_BANKS (AXI_NUM_BANKS),
|
||||
.RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0)
|
||||
) axi_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid (mem_req_valid),
|
||||
.mem_req_rw (mem_req_rw),
|
||||
.mem_req_byteen (mem_req_byteen),
|
||||
.mem_req_addr (mem_req_addr),
|
||||
.mem_req_data (mem_req_data),
|
||||
.mem_req_tag (mem_req_tag),
|
||||
.mem_req_ready (mem_req_ready),
|
||||
|
||||
.mem_rsp_valid (mem_rsp_valid),
|
||||
.mem_rsp_data (mem_rsp_data),
|
||||
.mem_rsp_tag (mem_rsp_tag),
|
||||
.mem_rsp_ready (mem_rsp_ready),
|
||||
|
||||
.m_axi_awvalid (m_axi_awvalid),
|
||||
.m_axi_awready (m_axi_awready),
|
||||
.m_axi_awaddr (m_axi_awaddr_unqual),
|
||||
.m_axi_awid (m_axi_awid_unqual),
|
||||
.m_axi_awlen (m_axi_awlen),
|
||||
.m_axi_awsize (m_axi_awsize),
|
||||
.m_axi_awburst (m_axi_awburst),
|
||||
.m_axi_awlock (m_axi_awlock),
|
||||
.m_axi_awcache (m_axi_awcache),
|
||||
.m_axi_awprot (m_axi_awprot),
|
||||
.m_axi_awqos (m_axi_awqos),
|
||||
.m_axi_awregion (m_axi_awregion),
|
||||
|
||||
.m_axi_wvalid (m_axi_wvalid),
|
||||
.m_axi_wready (m_axi_wready),
|
||||
.m_axi_wdata (m_axi_wdata),
|
||||
.m_axi_wstrb (m_axi_wstrb),
|
||||
.m_axi_wlast (m_axi_wlast),
|
||||
|
||||
.m_axi_bvalid (m_axi_bvalid),
|
||||
.m_axi_bready (m_axi_bready),
|
||||
.m_axi_bid (m_axi_bid_unqual),
|
||||
.m_axi_bresp (m_axi_bresp),
|
||||
|
||||
.m_axi_arvalid (m_axi_arvalid),
|
||||
.m_axi_arready (m_axi_arready),
|
||||
.m_axi_araddr (m_axi_araddr_unqual),
|
||||
.m_axi_arid (m_axi_arid_unqual),
|
||||
.m_axi_arlen (m_axi_arlen),
|
||||
.m_axi_arsize (m_axi_arsize),
|
||||
.m_axi_arburst (m_axi_arburst),
|
||||
.m_axi_arlock (m_axi_arlock),
|
||||
.m_axi_arcache (m_axi_arcache),
|
||||
.m_axi_arprot (m_axi_arprot),
|
||||
.m_axi_arqos (m_axi_arqos),
|
||||
.m_axi_arregion (m_axi_arregion),
|
||||
|
||||
.m_axi_rvalid (m_axi_rvalid),
|
||||
.m_axi_rready (m_axi_rready),
|
||||
.m_axi_rdata (m_axi_rdata),
|
||||
.m_axi_rlast (m_axi_rlast) ,
|
||||
.m_axi_rid (m_axi_rid_unqual),
|
||||
.m_axi_rresp (m_axi_rresp)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (1)
|
||||
`SCOPE_IO_SWITCH (1);
|
||||
|
||||
Vortex vortex (
|
||||
`SCOPE_IO_BIND (0)
|
||||
|
@ -215,4 +129,128 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
.busy (busy)
|
||||
);
|
||||
|
||||
wire mem_req_valid_a;
|
||||
wire mem_req_rw_a;
|
||||
wire [(AXI_DATA_WIDTH/8)-1:0] mem_req_byteen_a;
|
||||
wire [VX_MEM_ADDR_A_WIDTH-1:0] mem_req_addr_a;
|
||||
wire [AXI_DATA_WIDTH-1:0] mem_req_data_a;
|
||||
wire [VX_MEM_TAG_A_WIDTH-1:0] mem_req_tag_a;
|
||||
wire mem_req_ready_a;
|
||||
|
||||
wire mem_rsp_valid_a;
|
||||
wire [AXI_DATA_WIDTH-1:0] mem_rsp_data_a;
|
||||
wire [VX_MEM_TAG_A_WIDTH-1:0] mem_rsp_tag_a;
|
||||
wire mem_rsp_ready_a;
|
||||
|
||||
VX_mem_adapter #(
|
||||
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.DST_DATA_WIDTH (AXI_DATA_WIDTH),
|
||||
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
|
||||
.DST_ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH),
|
||||
.SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
|
||||
.DST_TAG_WIDTH (VX_MEM_TAG_A_WIDTH),
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (0)
|
||||
) mem_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid_in (mem_req_valid),
|
||||
.mem_req_addr_in (mem_req_addr),
|
||||
.mem_req_rw_in (mem_req_rw),
|
||||
.mem_req_byteen_in (mem_req_byteen),
|
||||
.mem_req_data_in (mem_req_data),
|
||||
.mem_req_tag_in (mem_req_tag),
|
||||
.mem_req_ready_in (mem_req_ready),
|
||||
|
||||
.mem_rsp_valid_in (mem_rsp_valid),
|
||||
.mem_rsp_data_in (mem_rsp_data),
|
||||
.mem_rsp_tag_in (mem_rsp_tag),
|
||||
.mem_rsp_ready_in (mem_rsp_ready),
|
||||
|
||||
.mem_req_valid_out (mem_req_valid_a),
|
||||
.mem_req_addr_out (mem_req_addr_a),
|
||||
.mem_req_rw_out (mem_req_rw_a),
|
||||
.mem_req_byteen_out (mem_req_byteen_a),
|
||||
.mem_req_data_out (mem_req_data_a),
|
||||
.mem_req_tag_out (mem_req_tag_a),
|
||||
.mem_req_ready_out (mem_req_ready_a),
|
||||
|
||||
.mem_rsp_valid_out (mem_rsp_valid_a),
|
||||
.mem_rsp_data_out (mem_rsp_data_a),
|
||||
.mem_rsp_tag_out (mem_rsp_tag_a),
|
||||
.mem_rsp_ready_out (mem_rsp_ready_a)
|
||||
);
|
||||
|
||||
VX_axi_adapter #(
|
||||
.DATA_WIDTH (AXI_DATA_WIDTH),
|
||||
.ADDR_WIDTH_IN (VX_MEM_ADDR_A_WIDTH),
|
||||
.ADDR_WIDTH_OUT (AXI_ADDR_WIDTH),
|
||||
.TAG_WIDTH_IN (VX_MEM_TAG_A_WIDTH),
|
||||
.TAG_WIDTH_OUT (AXI_TID_WIDTH),
|
||||
.NUM_BANKS (AXI_NUM_BANKS),
|
||||
.BANK_INTERLEAVE(0),
|
||||
.RSP_OUT_BUF ((AXI_NUM_BANKS > 1) ? 2 : 0)
|
||||
) axi_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid (mem_req_valid_a),
|
||||
.mem_req_rw (mem_req_rw_a),
|
||||
.mem_req_byteen (mem_req_byteen_a),
|
||||
.mem_req_addr (mem_req_addr_a),
|
||||
.mem_req_data (mem_req_data_a),
|
||||
.mem_req_tag (mem_req_tag_a),
|
||||
.mem_req_ready (mem_req_ready_a),
|
||||
|
||||
.mem_rsp_valid (mem_rsp_valid_a),
|
||||
.mem_rsp_data (mem_rsp_data_a),
|
||||
.mem_rsp_tag (mem_rsp_tag_a),
|
||||
.mem_rsp_ready (mem_rsp_ready_a),
|
||||
|
||||
.m_axi_awvalid (m_axi_awvalid),
|
||||
.m_axi_awready (m_axi_awready),
|
||||
.m_axi_awaddr (m_axi_awaddr),
|
||||
.m_axi_awid (m_axi_awid),
|
||||
.m_axi_awlen (m_axi_awlen),
|
||||
.m_axi_awsize (m_axi_awsize),
|
||||
.m_axi_awburst (m_axi_awburst),
|
||||
.m_axi_awlock (m_axi_awlock),
|
||||
.m_axi_awcache (m_axi_awcache),
|
||||
.m_axi_awprot (m_axi_awprot),
|
||||
.m_axi_awqos (m_axi_awqos),
|
||||
.m_axi_awregion (m_axi_awregion),
|
||||
|
||||
.m_axi_wvalid (m_axi_wvalid),
|
||||
.m_axi_wready (m_axi_wready),
|
||||
.m_axi_wdata (m_axi_wdata),
|
||||
.m_axi_wstrb (m_axi_wstrb),
|
||||
.m_axi_wlast (m_axi_wlast),
|
||||
|
||||
.m_axi_bvalid (m_axi_bvalid),
|
||||
.m_axi_bready (m_axi_bready),
|
||||
.m_axi_bid (m_axi_bid),
|
||||
.m_axi_bresp (m_axi_bresp),
|
||||
|
||||
.m_axi_arvalid (m_axi_arvalid),
|
||||
.m_axi_arready (m_axi_arready),
|
||||
.m_axi_araddr (m_axi_araddr),
|
||||
.m_axi_arid (m_axi_arid),
|
||||
.m_axi_arlen (m_axi_arlen),
|
||||
.m_axi_arsize (m_axi_arsize),
|
||||
.m_axi_arburst (m_axi_arburst),
|
||||
.m_axi_arlock (m_axi_arlock),
|
||||
.m_axi_arcache (m_axi_arcache),
|
||||
.m_axi_arprot (m_axi_arprot),
|
||||
.m_axi_arqos (m_axi_arqos),
|
||||
.m_axi_arregion (m_axi_arregion),
|
||||
|
||||
.m_axi_rvalid (m_axi_rvalid),
|
||||
.m_axi_rready (m_axi_rready),
|
||||
.m_axi_rdata (m_axi_rdata),
|
||||
.m_axi_rlast (m_axi_rlast),
|
||||
.m_axi_rid (m_axi_rid),
|
||||
.m_axi_rresp (m_axi_rresp)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
// To be done:
|
||||
// Check how to run this with OPAE. Looks like setup issue
|
||||
|
||||
`ifndef NOPAE
|
||||
|
||||
`include "platform_if.vh"
|
||||
|
||||
|
@ -85,7 +86,7 @@ module ccip_std_afu #(
|
|||
t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS];
|
||||
t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS];
|
||||
logic avs_write [NUM_LOCAL_MEM_BANKS];
|
||||
logic avs_read [NUM_LOCAL_MEM_BANKS];
|
||||
logic avs_read [NUM_LOCAL_MEM_BANKS];
|
||||
|
||||
for (genvar b = 0; b < NUM_LOCAL_MEM_BANKS; b++) begin
|
||||
assign local_mem[b].burstcount = avs_burstcount[b];
|
||||
|
@ -94,7 +95,7 @@ module ccip_std_afu #(
|
|||
assign local_mem[b].byteenable = avs_byteenable[b];
|
||||
assign local_mem[b].write = avs_write[b];
|
||||
assign local_mem[b].read = avs_read[b];
|
||||
|
||||
|
||||
assign avs_waitrequest[b] = local_mem[b].waitrequest;
|
||||
assign avs_readdata[b] = local_mem[b].readdata;
|
||||
assign avs_readdatavalid[b] = local_mem[b].readdatavalid;
|
||||
|
@ -107,7 +108,7 @@ module ccip_std_afu #(
|
|||
.reset (reset_T1),
|
||||
|
||||
.cp2af_sRxPort (cp2af_sRx_T1),
|
||||
.af2cp_sTxPort (af2cp_sTx_T0),
|
||||
.af2cp_sTxPort (af2cp_sTx_T0),
|
||||
|
||||
.avs_writedata (avs_writedata),
|
||||
.avs_readdata (avs_readdata),
|
||||
|
@ -121,3 +122,5 @@ module ccip_std_afu #(
|
|||
);
|
||||
|
||||
endmodule
|
||||
|
||||
`endif
|
||||
|
|
|
@ -30,7 +30,17 @@
|
|||
|
||||
//`include "platform_afu_top_config.vh"
|
||||
|
||||
`ifdef PLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH (`PLATFORM_MEMORY_ADDR_WIDTH - $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8))
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH `PLATFORM_MEMORY_DATA_WIDTH
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH `PLATFORM_MEMORY_BURST_CNT_WIDTH
|
||||
`endif
|
||||
|
||||
package local_mem_cfg_pkg;
|
||||
|
||||
|
@ -57,5 +67,3 @@ package local_mem_cfg_pkg;
|
|||
typedef logic [LOCAL_MEM_DATA_N_BYTES-1:0] t_local_mem_byte_mask;
|
||||
|
||||
endpackage // local_mem_cfg_pkg
|
||||
|
||||
`endif // PLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
|
|
|
@ -18,6 +18,10 @@
|
|||
`endif
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef PLATFORM_MEMORY_INTERLEAVE
|
||||
`define PLATFORM_MEMORY_INTERLEAVE 1
|
||||
`endif
|
||||
|
||||
module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_gpu_pkg::*; #(
|
||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||
) (
|
||||
|
@ -40,16 +44,17 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
output t_local_mem_burst_cnt avs_burstcount [NUM_LOCAL_MEM_BANKS],
|
||||
input wire avs_readdatavalid [NUM_LOCAL_MEM_BANKS]
|
||||
);
|
||||
|
||||
localparam LMEM_DATA_WIDTH = $bits(t_local_mem_data);
|
||||
localparam LMEM_DATA_SIZE = LMEM_DATA_WIDTH / 8;
|
||||
localparam LMEM_ADDR_WIDTH = $bits(t_local_mem_addr);
|
||||
localparam LMEM_ADDR_WIDTH = `VX_MEM_ADDR_WIDTH + ($clog2(`VX_MEM_DATA_WIDTH) - $clog2(LMEM_DATA_WIDTH));
|
||||
localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt);
|
||||
|
||||
localparam CCI_DATA_WIDTH = $bits(t_ccip_clData);
|
||||
localparam CCI_DATA_SIZE = CCI_DATA_WIDTH / 8;
|
||||
localparam CCI_ADDR_WIDTH = $bits(t_ccip_clAddr);
|
||||
|
||||
localparam RESET_CTR_WIDTH = `CLOG2(`RESET_DELAY+1);
|
||||
|
||||
localparam AVS_RD_QUEUE_SIZE = 32;
|
||||
localparam _VX_MEM_TAG_WIDTH = `VX_MEM_TAG_WIDTH;
|
||||
localparam _AVS_REQ_TAGW_VX = _VX_MEM_TAG_WIDTH + `CLOG2(LMEM_DATA_WIDTH) - `CLOG2(`VX_MEM_DATA_WIDTH);
|
||||
|
@ -64,6 +69,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
|
||||
localparam AFU_ID_H = 16'h0004; // AFU ID Higher
|
||||
|
||||
localparam CMD_IDLE = 0;
|
||||
localparam CMD_MEM_READ = `AFU_IMAGE_CMD_MEM_READ;
|
||||
localparam CMD_MEM_WRITE = `AFU_IMAGE_CMD_MEM_WRITE;
|
||||
localparam CMD_DCR_WRITE = `AFU_IMAGE_CMD_DCR_WRITE;
|
||||
|
@ -78,7 +84,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
localparam COUT_TID_WIDTH = `CLOG2(`VX_MEM_BYTEEN_WIDTH);
|
||||
localparam COUT_QUEUE_DATAW = COUT_TID_WIDTH + 8;
|
||||
localparam COUT_QUEUE_SIZE = 64;
|
||||
localparam COUT_QUEUE_SIZE = 1024;
|
||||
|
||||
localparam MMIO_DEV_CAPS = `AFU_IMAGE_MMIO_DEV_CAPS;
|
||||
localparam MMIO_ISA_CAPS = `AFU_IMAGE_MMIO_ISA_CAPS;
|
||||
|
@ -96,7 +102,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
wire [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
wire [63:0] dev_caps = {16'b0,
|
||||
wire [63:0] dev_caps = {8'b0,
|
||||
5'(`PLATFORM_MEMORY_ADDR_WIDTH-20),
|
||||
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
|
||||
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
|
||||
16'(`NUM_CORES * `NUM_CLUSTERS),
|
||||
8'(`NUM_WARPS),
|
||||
|
@ -139,14 +147,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
t_ccip_c0_ReqMmioHdr mmio_hdr;
|
||||
assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
|
||||
`UNUSED_VAR (mmio_hdr)
|
||||
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
|
||||
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr[$bits(t_ccip_c0_ReqMmioHdr)-1:0]);
|
||||
`UNUSED_VAR (mmio_req_hdr)
|
||||
|
||||
`STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!"))
|
||||
|
||||
t_if_ccip_c2_Tx mmio_tx;
|
||||
assign af2cp_sTxPort.c2 = mmio_tx;
|
||||
t_if_ccip_c2_Tx mmio_rsp;
|
||||
assign af2cp_sTxPort.c2 = mmio_rsp;
|
||||
|
||||
`ifdef SCOPE
|
||||
|
||||
|
@ -170,33 +176,35 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
if (reset) begin
|
||||
cmd_scope_reading <= 0;
|
||||
cmd_scope_writing <= 0;
|
||||
scope_bus_in <= 0;
|
||||
scope_bus_in <= 0;
|
||||
end else begin
|
||||
scope_bus_in <= 0;
|
||||
if (scope_bus_out) begin
|
||||
cmd_scope_reading <= 1;
|
||||
scope_bus_ctr <= 63;
|
||||
end
|
||||
scope_bus_in <= 0;
|
||||
if (cp2af_sRxPort.c0.mmioWrValid
|
||||
&& (MMIO_SCOPE_WRITE == mmio_hdr.address)) begin
|
||||
&& (MMIO_SCOPE_WRITE == mmio_req_hdr.address)) begin
|
||||
cmd_scope_wdata <= 64'(cp2af_sRxPort.c0.data);
|
||||
cmd_scope_writing <= 1;
|
||||
scope_bus_ctr <= 63;
|
||||
scope_bus_in <= 1;
|
||||
end
|
||||
end
|
||||
if (cmd_scope_writing) begin
|
||||
scope_bus_in <= 1'(cmd_scope_wdata >> scope_bus_ctr);
|
||||
scope_bus_ctr <= scope_bus_ctr - 1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_writing <= 0;
|
||||
if (cmd_scope_writing) begin
|
||||
scope_bus_in <= cmd_scope_wdata[scope_bus_ctr];
|
||||
scope_bus_ctr <= scope_bus_ctr - 6'd1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_writing <= 0;
|
||||
scope_bus_ctr <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
if (cmd_scope_reading) begin
|
||||
cmd_scope_rdata <= {cmd_scope_rdata[62:0], scope_bus_out};
|
||||
scope_bus_ctr <= scope_bus_ctr - 1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_reading <= 0;
|
||||
if (cmd_scope_reading) begin
|
||||
cmd_scope_rdata <= {cmd_scope_rdata[62:0], scope_bus_out};
|
||||
scope_bus_ctr <= scope_bus_ctr - 6'd1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_reading <= 0;
|
||||
scope_bus_ctr <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -206,6 +214,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
wire [COUT_QUEUE_DATAW-1:0] cout_q_dout;
|
||||
wire cout_q_full, cout_q_empty;
|
||||
|
||||
wire [COUT_QUEUE_DATAW-1:0] cout_q_dout_s = cout_q_dout & {COUT_QUEUE_DATAW{!cout_q_empty}};
|
||||
|
||||
`ifdef SIMULATION
|
||||
`ifndef VERILATOR
|
||||
// disable assertions until full reset
|
||||
|
@ -226,60 +236,22 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
`endif
|
||||
`endif
|
||||
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
// Handle MMIO read requests
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
mmio_tx.mmioRdValid <= 0;
|
||||
mmio_tx.hdr <= '0;
|
||||
mmio_rsp.mmioRdValid <= 0;
|
||||
end else begin
|
||||
mmio_tx.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid;
|
||||
mmio_tx.hdr.tid <= mmio_hdr.tid;
|
||||
end
|
||||
// serve MMIO write request
|
||||
if (cp2af_sRxPort.c0.mmioWrValid) begin
|
||||
case (mmio_hdr.address)
|
||||
MMIO_CMD_ARG0: begin
|
||||
cmd_args[0] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%0h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_ARG1: begin
|
||||
cmd_args[1] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%0h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_ARG2: begin
|
||||
cmd_args[2] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_TYPE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
`ifdef SCOPE
|
||||
MMIO_SCOPE_WRITE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%0h\n", $time, cmd_scope_wdata));
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%0h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
mmio_rsp.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid;
|
||||
end
|
||||
|
||||
// serve MMIO read requests
|
||||
mmio_rsp.hdr.tid <= mmio_req_hdr.tid;
|
||||
|
||||
if (cp2af_sRxPort.c0.mmioRdValid) begin
|
||||
case (mmio_hdr.address)
|
||||
case (mmio_req_hdr.address)
|
||||
// AFU header
|
||||
16'h0000: mmio_tx.data <= {
|
||||
16'h0000: mmio_rsp.data <= {
|
||||
4'b0001, // Feature type = AFU
|
||||
8'b0, // reserved
|
||||
4'b0, // afu minor revision = 0
|
||||
|
@ -289,105 +261,140 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
4'b0, // afu major revision = 0
|
||||
12'b0 // feature ID = 0
|
||||
};
|
||||
AFU_ID_L: mmio_tx.data <= afu_id[63:0]; // afu id low
|
||||
AFU_ID_H: mmio_tx.data <= afu_id[127:64]; // afu id hi
|
||||
16'h0006: mmio_tx.data <= 64'h0; // next AFU
|
||||
16'h0008: mmio_tx.data <= 64'h0; // reserved
|
||||
AFU_ID_L: mmio_rsp.data <= afu_id[63:0]; // afu id low
|
||||
AFU_ID_H: mmio_rsp.data <= afu_id[127:64]; // afu id hi
|
||||
16'h0006: mmio_rsp.data <= 64'h0; // next AFU
|
||||
16'h0008: mmio_rsp.data <= 64'h0; // reserved
|
||||
MMIO_STATUS: begin
|
||||
mmio_tx.data <= 64'({cout_q_dout, !cout_q_empty, 8'(state)});
|
||||
mmio_rsp.data <= 64'({cout_q_dout_s, !cout_q_empty, 8'(state)});
|
||||
`ifdef DBG_TRACE_AFU
|
||||
if (state != STATE_WIDTH'(mmio_tx.data)) begin
|
||||
`TRACE(2, ("%d: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_hdr.address, state));
|
||||
if (state != STATE_WIDTH'(mmio_rsp.data)) begin
|
||||
`TRACE(2, ("%t: AFU: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state))
|
||||
end
|
||||
`endif
|
||||
end
|
||||
`ifdef SCOPE
|
||||
MMIO_SCOPE_READ: begin
|
||||
mmio_tx.data <= cmd_scope_rdata;
|
||||
mmio_rsp.data <= cmd_scope_rdata;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%0h\n", $time, cmd_scope_rdata));
|
||||
`TRACE(2, ("%t: AFU: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata))
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
MMIO_DEV_CAPS: begin
|
||||
mmio_tx.data <= dev_caps;
|
||||
mmio_rsp.data <= dev_caps;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%0h\n", $time, dev_caps));
|
||||
`TRACE(2, ("%t: AFU: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps))
|
||||
`endif
|
||||
end
|
||||
MMIO_ISA_CAPS: begin
|
||||
mmio_tx.data <= isa_caps;
|
||||
mmio_rsp.data <= isa_caps;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
if (state != STATE_WIDTH'(mmio_tx.data)) begin
|
||||
`TRACE(2, ("%d: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps));
|
||||
if (state != STATE_WIDTH'(mmio_rsp.data)) begin
|
||||
`TRACE(2, ("%t: AFU: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps))
|
||||
end
|
||||
`endif
|
||||
end
|
||||
default: begin
|
||||
mmio_tx.data <= 64'h0;
|
||||
mmio_rsp.data <= 64'h0;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_hdr.address));
|
||||
`TRACE(2, ("%t: AFU: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address))
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// Handle MMIO write requests
|
||||
always @(posedge clk) begin
|
||||
if (cp2af_sRxPort.c0.mmioWrValid) begin
|
||||
case (mmio_req_hdr.address)
|
||||
MMIO_CMD_ARG0: begin
|
||||
cmd_args[0] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)))
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_ARG1: begin
|
||||
cmd_args[1] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)))
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_ARG2: begin
|
||||
cmd_args[2] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data)))
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_TYPE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data)))
|
||||
`endif
|
||||
end
|
||||
`ifdef SCOPE
|
||||
MMIO_SCOPE_WRITE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: MMIO_SCOPE_WRITE: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)))
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_req_hdr.address, 64'(cp2af_sRxPort.c0.data)))
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// COMMAND FSM ////////////////////////////////////////////////////////////////
|
||||
|
||||
wire cmd_mem_rd_done;
|
||||
reg cmd_mem_wr_done;
|
||||
|
||||
reg [RESET_CTR_WIDTH-1:0] vx_reset_ctr;
|
||||
reg vx_busy_wait;
|
||||
reg vx_running;
|
||||
reg vx_reset = 1; // asserted at initialization
|
||||
wire vx_busy;
|
||||
|
||||
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
|
||||
always @(posedge clk) begin
|
||||
if (state == STATE_RUN) begin
|
||||
vx_reset_ctr <= vx_reset_ctr + $bits(vx_reset_ctr)'(1);
|
||||
end else begin
|
||||
vx_reset_ctr <= '0;
|
||||
end
|
||||
end
|
||||
|
||||
wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address);
|
||||
wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_req_hdr.address);
|
||||
wire [CMD_TYPE_WIDTH-1:0] cmd_type = is_mmio_wr_cmd ?
|
||||
CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(0);
|
||||
CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(CMD_IDLE);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
vx_busy_wait <= 0;
|
||||
vx_running <= 0;
|
||||
state <= STATE_IDLE;
|
||||
vx_reset <= 1;
|
||||
end else begin
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
case (cmd_type)
|
||||
CMD_MEM_READ: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size))
|
||||
`endif
|
||||
state <= STATE_MEM_READ;
|
||||
end
|
||||
CMD_MEM_WRITE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size))
|
||||
`endif
|
||||
state <= STATE_MEM_WRITE;
|
||||
end
|
||||
CMD_DCR_WRITE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data))
|
||||
`endif
|
||||
state <= STATE_DCR_WRITE;
|
||||
end
|
||||
CMD_RUN: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE RUN\n", $time));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE RUN\n", $time))
|
||||
`endif
|
||||
state <= STATE_RUN;
|
||||
vx_running <= 0;
|
||||
vx_reset_ctr <= RESET_CTR_WIDTH'(`RESET_DELAY-1);
|
||||
vx_reset <= 1;
|
||||
end
|
||||
default: begin
|
||||
state <= state;
|
||||
|
@ -398,54 +405,56 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
if (cmd_mem_rd_done) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE IDLE\n", $time));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time))
|
||||
`endif
|
||||
end
|
||||
end
|
||||
STATE_MEM_WRITE: begin
|
||||
if (cmd_mem_wr_done) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE IDLE\n", $time));
|
||||
`endif
|
||||
end
|
||||
end
|
||||
STATE_DCR_WRITE: begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE IDLE\n", $time));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time))
|
||||
`endif
|
||||
end
|
||||
STATE_RUN: begin
|
||||
if (vx_running) begin
|
||||
if (vx_busy_wait) begin
|
||||
// wait until the gpu goes busy
|
||||
if (vx_busy) begin
|
||||
vx_busy_wait <= 0;
|
||||
end
|
||||
end else begin
|
||||
// wait until the gpu is not busy
|
||||
if (~vx_busy) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: AFU: End execution\n", $time));
|
||||
`TRACE(2, ("%d: STATE IDLE\n", $time));
|
||||
`endif
|
||||
end
|
||||
end
|
||||
if (vx_reset) begin
|
||||
// wait until the reset network is ready
|
||||
if (vx_reset_ctr == RESET_CTR_WIDTH'(0)) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: Begin execution\n", $time))
|
||||
`endif
|
||||
vx_busy_wait <= 1;
|
||||
vx_reset <= 0;
|
||||
end
|
||||
end else begin
|
||||
// wait until the reset sequence is complete
|
||||
if (vx_reset_ctr == (`RESET_DELAY-1)) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: AFU: Begin execution\n", $time));
|
||||
`endif
|
||||
vx_running <= 1;
|
||||
vx_busy_wait <= 1;
|
||||
end
|
||||
if (vx_busy_wait) begin
|
||||
// wait until processor goes busy
|
||||
if (vx_busy) begin
|
||||
vx_busy_wait <= 0;
|
||||
end
|
||||
end else begin
|
||||
// wait until the processor is not busy
|
||||
if (~vx_busy) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: End execution\n", $time))
|
||||
`TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time))
|
||||
`endif
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
|
||||
// ensure reset network initialization
|
||||
if (vx_reset_ctr != RESET_CTR_WIDTH'(0)) begin
|
||||
vx_reset_ctr <= vx_reset_ctr - RESET_CTR_WIDTH'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -475,8 +484,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.TAG_WIDTH (AVS_REQ_TAGW)
|
||||
) cci_vx_mem_bus_if[2]();
|
||||
|
||||
`RESET_RELAY (cci_adapter_reset, reset);
|
||||
|
||||
VX_mem_adapter #(
|
||||
.SRC_DATA_WIDTH (CCI_DATA_WIDTH),
|
||||
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
|
@ -488,7 +495,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.RSP_OUT_BUF (0)
|
||||
) cci_mem_adapter (
|
||||
.clk (clk),
|
||||
.reset (cci_adapter_reset),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid_in (cci_mem_req_valid),
|
||||
.mem_req_addr_in (cci_mem_req_addr),
|
||||
|
@ -517,8 +524,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.mem_rsp_ready_out (cci_vx_mem_bus_if[1].rsp_ready)
|
||||
);
|
||||
|
||||
assign cci_vx_mem_bus_if[1].req_data.atype = '0;
|
||||
`UNUSED_VAR (cci_vx_mem_bus_if[1].req_data.atype)
|
||||
assign cci_vx_mem_bus_if[1].req_data.flags = '0;
|
||||
|
||||
//--
|
||||
|
||||
|
@ -528,8 +534,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
assign vx_mem_req_valid_qual = vx_mem_req_valid && ~vx_mem_is_cout;
|
||||
|
||||
`RESET_RELAY (vx_adapter_reset, reset);
|
||||
|
||||
VX_mem_adapter #(
|
||||
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
|
@ -541,7 +545,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.RSP_OUT_BUF (2)
|
||||
) vx_mem_adapter (
|
||||
.clk (clk),
|
||||
.reset (vx_adapter_reset),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid_in (vx_mem_req_valid_qual),
|
||||
.mem_req_addr_in (vx_mem_req_addr),
|
||||
|
@ -570,8 +574,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.mem_rsp_ready_out (cci_vx_mem_bus_if[0].rsp_ready)
|
||||
);
|
||||
|
||||
assign cci_vx_mem_bus_if[0].req_data.atype = '0;
|
||||
`UNUSED_VAR (cci_vx_mem_bus_if[0].req_data.atype)
|
||||
assign cci_vx_mem_bus_if[0].req_data.flags = '0;
|
||||
|
||||
//--
|
||||
VX_mem_bus_if #(
|
||||
|
@ -580,39 +583,37 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.TAG_WIDTH (AVS_REQ_TAGW+1)
|
||||
) mem_bus_if[1]();
|
||||
|
||||
`RESET_RELAY (mem_arb_reset, reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATA_SIZE (LMEM_DATA_SIZE),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (AVS_REQ_TAGW),
|
||||
.ARBITER ("P"),
|
||||
.ARBITER ("P"), // prioritize VX requests
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (0)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_arb_reset),
|
||||
.reset (reset),
|
||||
.bus_in_if (cci_vx_mem_bus_if),
|
||||
.bus_out_if (mem_bus_if)
|
||||
);
|
||||
|
||||
//--
|
||||
|
||||
`RESET_RELAY (avs_adapter_reset, reset);
|
||||
|
||||
VX_avs_adapter #(
|
||||
.DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.ADDR_WIDTH_IN (LMEM_ADDR_WIDTH),
|
||||
.ADDR_WIDTH_OUT($bits(t_local_mem_addr)),
|
||||
.BURST_WIDTH (LMEM_BURST_CTRW),
|
||||
.NUM_BANKS (NUM_LOCAL_MEM_BANKS),
|
||||
.TAG_WIDTH (AVS_REQ_TAGW + 1),
|
||||
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE),
|
||||
.BANK_INTERLEAVE(`PLATFORM_MEMORY_INTERLEAVE),
|
||||
.REQ_OUT_BUF (2),
|
||||
.RSP_OUT_BUF (0)
|
||||
) avs_adapter (
|
||||
.clk (clk),
|
||||
.reset (avs_adapter_reset),
|
||||
.reset (reset),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (mem_bus_if[0].req_valid),
|
||||
|
@ -641,8 +642,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.avs_readdatavalid(avs_readdatavalid)
|
||||
);
|
||||
|
||||
assign mem_bus_if[0].req_data.atype = '0;
|
||||
`UNUSED_VAR (mem_bus_if[0].req_data.atype)
|
||||
`UNUSED_VAR (mem_bus_if[0].req_data.flags)
|
||||
|
||||
// CCI-P Read Request ///////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -692,9 +692,11 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.reset (reset),
|
||||
.incr (cci_rd_req_fire),
|
||||
.decr (cci_rdq_pop),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (cci_pending_reads_full),
|
||||
.size (cci_pending_reads),
|
||||
`UNUSED_PIN (empty)
|
||||
`UNUSED_PIN (alm_full),
|
||||
.size (cci_pending_reads)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (cci_pending_reads)
|
||||
|
@ -748,7 +750,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
cci_rd_req_addr <= cci_rd_req_addr + 1;
|
||||
cci_rd_req_ctr <= cci_rd_req_ctr + $bits(cci_rd_req_ctr)'(1);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads));
|
||||
`TRACE(2, ("%t: AFU: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads))
|
||||
`endif
|
||||
end
|
||||
|
||||
|
@ -758,13 +760,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE);
|
||||
end
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%0h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data));
|
||||
`TRACE(2, ("%t: AFU: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data))
|
||||
`endif
|
||||
end
|
||||
|
||||
if (cci_rdq_pop) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads));
|
||||
`TRACE(2, ("%t: AFU: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads))
|
||||
`endif
|
||||
end
|
||||
|
||||
|
@ -776,14 +778,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (cci_rdq_reset, reset);
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (CCI_RD_QUEUE_DATAW),
|
||||
.DEPTH (CCI_RD_QUEUE_SIZE)
|
||||
) cci_rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (cci_rdq_reset),
|
||||
.reset (reset),
|
||||
.push (cci_rdq_push),
|
||||
.pop (cci_rdq_pop),
|
||||
.data_in (cci_rdq_din),
|
||||
|
@ -852,7 +852,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.incr (cci_mem_rd_rsp_fire),
|
||||
.decr (cci_wr_rsp_fire),
|
||||
.empty (cci_pending_writes_empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (cci_pending_writes_full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
.size (cci_pending_writes)
|
||||
);
|
||||
|
||||
|
@ -902,13 +904,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
cci_wr_req_done <= 1;
|
||||
end
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%0h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data));
|
||||
`TRACE(2, ("%t: AFU: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data))
|
||||
`endif
|
||||
end
|
||||
|
||||
if (cci_wr_rsp_fire) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes));
|
||||
`TRACE(2, ("%t: AFU: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes))
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
@ -926,17 +928,17 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
// Vortex ///////////////////////////////////////////////////////////////////
|
||||
|
||||
wire vx_dcr_wr_valid = (STATE_DCR_WRITE == state);
|
||||
wire vx_dcr_wr_valid = (STATE_DCR_WRITE == state);
|
||||
wire [`VX_DCR_ADDR_WIDTH-1:0] vx_dcr_wr_addr = cmd_dcr_addr;
|
||||
wire [`VX_DCR_DATA_WIDTH-1:0] vx_dcr_wr_data = cmd_dcr_data;
|
||||
|
||||
`SCOPE_IO_SWITCH (2)
|
||||
`SCOPE_IO_SWITCH (2);
|
||||
|
||||
Vortex vortex (
|
||||
`SCOPE_IO_BIND (1)
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset || ~vx_running),
|
||||
.reset (vx_reset),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (vx_mem_req_valid),
|
||||
|
@ -966,7 +968,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
wire [COUT_TID_WIDTH-1:0] cout_tid;
|
||||
|
||||
VX_onehot_encoder #(
|
||||
VX_encoder #(
|
||||
.N (`VX_MEM_BYTEEN_WIDTH)
|
||||
) cout_tid_enc (
|
||||
.data_in (vx_mem_req_byteen),
|
||||
|
@ -987,7 +989,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
wire cout_q_push = vx_mem_req_valid && vx_mem_is_cout && ~cout_q_full;
|
||||
|
||||
wire cout_q_pop = cp2af_sRxPort.c0.mmioRdValid
|
||||
&& (mmio_hdr.address == MMIO_STATUS)
|
||||
&& (mmio_req_hdr.address == MMIO_STATUS)
|
||||
&& ~cout_q_empty;
|
||||
|
||||
VX_fifo_queue #(
|
||||
|
@ -1010,59 +1012,59 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
// SCOPE //////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_SCOPE_AFU
|
||||
`ifdef SCOPE
|
||||
wire mem_req_fire = mem_bus_if[0].req_valid && mem_bus_if[0].req_ready;
|
||||
wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready;
|
||||
wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0];
|
||||
wire avs_read_fire = avs_read[0] && ~avs_waitrequest[0];
|
||||
wire [$bits(t_local_mem_addr)-1:0] mem_bus_if_addr = mem_bus_if[0].req_data.addr;
|
||||
|
||||
reg [STATE_WIDTH-1:0] state_prev;
|
||||
always @(posedge clk) begin
|
||||
state_prev <= state;
|
||||
end
|
||||
wire state_changed = (state != state_prev);
|
||||
wire state_changed = (state != state_prev);
|
||||
wire vx_mem_req_fire = vx_mem_req_valid && vx_mem_req_ready;
|
||||
wire vx_mem_rsp_fire = vx_mem_rsp_valid && vx_mem_rsp_ready;
|
||||
wire avs_req_fire = (avs_write[0] || avs_read[0]) && ~avs_waitrequest[0];
|
||||
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (0),
|
||||
.TRIGGERW (24),
|
||||
.PROBEW (431)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset_w[0]),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
state_changed,
|
||||
mem_req_fire,
|
||||
mem_rsp_fire,
|
||||
avs_write_fire,
|
||||
avs_read_fire,
|
||||
`NEG_EDGE (reset_negedge, reset);
|
||||
`SCOPE_TAP (0, 0, {
|
||||
vx_reset,
|
||||
vx_busy,
|
||||
vx_mem_req_valid,
|
||||
vx_mem_req_ready,
|
||||
vx_mem_rsp_valid,
|
||||
vx_mem_rsp_ready,
|
||||
avs_read[0],
|
||||
avs_write[0],
|
||||
avs_waitrequest[0],
|
||||
avs_readdatavalid[0],
|
||||
cp2af_sRxPort.c0.mmioRdValid,
|
||||
cp2af_sRxPort.c0.mmioWrValid,
|
||||
cp2af_sRxPort.c0.rspValid,
|
||||
cp2af_sRxPort.c1.rspValid,
|
||||
af2cp_sTxPort.c0.valid,
|
||||
af2cp_sTxPort.c1.valid,
|
||||
cp2af_sRxPort.c0TxAlmFull,
|
||||
cp2af_sRxPort.c1TxAlmFull,
|
||||
af2cp_sTxPort.c2.mmioRdValid,
|
||||
cci_wr_req_fire,
|
||||
cci_wr_rsp_fire,
|
||||
cp2af_sRxPort.c1TxAlmFull
|
||||
},{
|
||||
state_changed,
|
||||
vx_dcr_wr_valid, // ack-free
|
||||
avs_readdatavalid[0], // ack-free
|
||||
cp2af_sRxPort.c0.mmioRdValid, // ack-free
|
||||
cp2af_sRxPort.c0.mmioWrValid, // ack-free
|
||||
af2cp_sTxPort.c2.mmioRdValid, // ack-free
|
||||
cp2af_sRxPort.c0.rspValid, // ack-free
|
||||
cp2af_sRxPort.c1.rspValid, // ack-free
|
||||
cci_rd_req_fire,
|
||||
cci_rd_rsp_fire,
|
||||
cci_pending_reads_full,
|
||||
cci_pending_writes_empty,
|
||||
cci_pending_writes_full
|
||||
}),
|
||||
.probes({
|
||||
cci_wr_req_fire,
|
||||
avs_req_fire,
|
||||
vx_mem_req_fire,
|
||||
vx_mem_rsp_fire
|
||||
},{
|
||||
cmd_type,
|
||||
state,
|
||||
mmio_hdr.address,
|
||||
mmio_hdr.length,
|
||||
vx_mem_req_rw,
|
||||
vx_mem_req_byteen,
|
||||
vx_mem_req_addr,
|
||||
vx_mem_req_data,
|
||||
vx_mem_req_tag,
|
||||
vx_mem_rsp_data,
|
||||
vx_mem_rsp_tag,
|
||||
vx_dcr_wr_addr,
|
||||
vx_dcr_wr_data,
|
||||
mmio_req_hdr.address,
|
||||
cp2af_sRxPort.c0.hdr.mdata,
|
||||
af2cp_sTxPort.c0.hdr.address,
|
||||
af2cp_sTxPort.c0.hdr.mdata,
|
||||
|
@ -1074,15 +1076,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
cci_mem_wr_req_ctr,
|
||||
cci_rd_req_ctr,
|
||||
cci_rd_rsp_ctr,
|
||||
cci_wr_req_ctr,
|
||||
mem_bus_if_addr
|
||||
}),
|
||||
.bus_in(scope_bus_in_w[0]),
|
||||
.bus_out(scope_bus_out_w[0])
|
||||
);
|
||||
`endif
|
||||
cci_wr_req_ctr
|
||||
},
|
||||
reset_negedge, 1'b0, 4096
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED_W(0)
|
||||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -1091,13 +1090,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin
|
||||
if (avs_write[i] && ~avs_waitrequest[i]) begin
|
||||
`TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]));
|
||||
`TRACE(2, ("%t: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]))
|
||||
end
|
||||
if (avs_read[i] && ~avs_waitrequest[i]) begin
|
||||
`TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i]));
|
||||
`TRACE(2, ("%t: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i]))
|
||||
end
|
||||
if (avs_readdatavalid[i]) begin
|
||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%0h\n", $time, i, avs_readdata[i]));
|
||||
`TRACE(2, ("%t: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i]))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,9 +17,9 @@
|
|||
`define AFU_ACCEL_NAME "vortex_afu"
|
||||
`define AFU_ACCEL_UUID 128'h35F9452B_25C2_434C_93D5_6F8C60DB361C
|
||||
|
||||
`define AFU_IMAGE_CMD_MEM_READ 1
|
||||
`define AFU_IMAGE_CMD_MEM_READ 1
|
||||
`define AFU_IMAGE_CMD_MEM_WRITE 2
|
||||
`define AFU_IMAGE_CMD_RUN 3
|
||||
`define AFU_IMAGE_CMD_RUN 3
|
||||
`define AFU_IMAGE_CMD_DCR_WRITE 4
|
||||
`define AFU_IMAGE_CMD_MAX_VALUE 4
|
||||
|
||||
|
|
|
@ -14,22 +14,20 @@
|
|||
`include "vortex_afu.vh"
|
||||
|
||||
module VX_afu_ctrl #(
|
||||
parameter AXI_ADDR_WIDTH = 8,
|
||||
parameter AXI_DATA_WIDTH = 32,
|
||||
parameter AXI_NUM_BANKS = 1
|
||||
parameter S_AXI_ADDR_WIDTH = 8,
|
||||
parameter S_AXI_DATA_WIDTH = 32
|
||||
) (
|
||||
// axi4 lite slave signals
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk_en,
|
||||
|
||||
input wire s_axi_awvalid,
|
||||
input wire [AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
|
||||
input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
|
||||
output wire s_axi_awready,
|
||||
|
||||
input wire s_axi_wvalid,
|
||||
input wire [AXI_DATA_WIDTH-1:0] s_axi_wdata,
|
||||
input wire [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb,
|
||||
input wire [S_AXI_DATA_WIDTH-1:0] s_axi_wdata,
|
||||
input wire [S_AXI_DATA_WIDTH/8-1:0]s_axi_wstrb,
|
||||
output wire s_axi_wready,
|
||||
|
||||
output wire s_axi_bvalid,
|
||||
|
@ -37,11 +35,11 @@ module VX_afu_ctrl #(
|
|||
input wire s_axi_bready,
|
||||
|
||||
input wire s_axi_arvalid,
|
||||
input wire [AXI_ADDR_WIDTH-1:0] s_axi_araddr,
|
||||
input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_araddr,
|
||||
output wire s_axi_arready,
|
||||
|
||||
output wire s_axi_rvalid,
|
||||
output wire [AXI_DATA_WIDTH-1:0] s_axi_rdata,
|
||||
output wire [S_AXI_DATA_WIDTH-1:0] s_axi_rdata,
|
||||
output wire [1:0] s_axi_rresp,
|
||||
input wire s_axi_rready,
|
||||
|
||||
|
@ -57,8 +55,6 @@ module VX_afu_ctrl #(
|
|||
output wire scope_bus_out,
|
||||
`endif
|
||||
|
||||
output wire [63:0] mem_base [AXI_NUM_BANKS],
|
||||
|
||||
output wire dcr_wr_valid,
|
||||
output wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
|
||||
output wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data
|
||||
|
@ -110,39 +106,36 @@ module VX_afu_ctrl #(
|
|||
|
||||
ADDR_DEV_0 = 8'h10,
|
||||
ADDR_DEV_1 = 8'h14,
|
||||
//ADDR_DEV_CTRL = 8'h18,
|
||||
|
||||
ADDR_ISA_0 = 8'h1C,
|
||||
ADDR_ISA_1 = 8'h20,
|
||||
//ADDR_ISA_CTRL = 8'h24,
|
||||
ADDR_ISA_0 = 8'h18,
|
||||
ADDR_ISA_1 = 8'h1C,
|
||||
|
||||
ADDR_DCR_0 = 8'h28,
|
||||
ADDR_DCR_1 = 8'h2C,
|
||||
//ADDR_DCR_CTRL = 8'h30,
|
||||
ADDR_DCR_0 = 8'h20,
|
||||
ADDR_DCR_1 = 8'h24,
|
||||
|
||||
`ifdef SCOPE
|
||||
ADDR_SCP_0 = 8'h34,
|
||||
ADDR_SCP_1 = 8'h38,
|
||||
//ADDR_SCP_CTRL = 8'h3C,
|
||||
ADDR_SCP_0 = 8'h28,
|
||||
ADDR_SCP_1 = 8'h2C,
|
||||
`endif
|
||||
|
||||
ADDR_MEM_0 = 8'h40,
|
||||
ADDR_MEM_1 = 8'h44,
|
||||
//ADDR_MEM_CTRL = 8'h48,
|
||||
|
||||
ADDR_BITS = 8;
|
||||
|
||||
localparam
|
||||
WSTATE_IDLE = 2'd0,
|
||||
WSTATE_ADDR = 2'd0,
|
||||
WSTATE_DATA = 2'd1,
|
||||
WSTATE_RESP = 2'd2;
|
||||
WSTATE_RESP = 2'd2,
|
||||
WSTATE_WIDTH = 2;
|
||||
|
||||
localparam
|
||||
RSTATE_IDLE = 2'd0,
|
||||
RSTATE_DATA = 2'd1;
|
||||
RSTATE_ADDR = 2'd0,
|
||||
RSTATE_DATA = 2'd1,
|
||||
RSTATE_RESP = 2'd2,
|
||||
RSTATE_WIDTH = 2;
|
||||
|
||||
// device caps
|
||||
wire [63:0] dev_caps = {16'b0,
|
||||
wire [63:0] dev_caps = {8'b0,
|
||||
5'(`PLATFORM_MEMORY_ADDR_WIDTH-20),
|
||||
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
|
||||
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
|
||||
16'(`NUM_CORES * `NUM_CLUSTERS),
|
||||
8'(`NUM_WARPS),
|
||||
|
@ -153,16 +146,18 @@ module VX_afu_ctrl #(
|
|||
2'(`CLOG2(`XLEN)-4),
|
||||
30'(`MISA_STD)};
|
||||
|
||||
reg [1:0] wstate;
|
||||
reg [WSTATE_WIDTH-1:0] wstate;
|
||||
reg [ADDR_BITS-1:0] waddr;
|
||||
wire [31:0] wmask;
|
||||
wire s_axi_aw_fire;
|
||||
wire s_axi_w_fire;
|
||||
wire s_axi_b_fire;
|
||||
|
||||
reg [1:0] rstate;
|
||||
logic [RSTATE_WIDTH-1:0] rstate;
|
||||
reg [31:0] rdata;
|
||||
wire [ADDR_BITS-1:0] raddr;
|
||||
reg [ADDR_BITS-1:0] raddr;
|
||||
wire s_axi_ar_fire;
|
||||
wire s_axi_r_fire;
|
||||
|
||||
reg ap_reset_r;
|
||||
reg ap_start_r;
|
||||
|
@ -170,20 +165,23 @@ module VX_afu_ctrl #(
|
|||
reg gie_r;
|
||||
reg [1:0] ier_r;
|
||||
reg [1:0] isr_r;
|
||||
reg [63:0] mem_r [AXI_NUM_BANKS];
|
||||
reg [31:0] dcra_r;
|
||||
reg [31:0] dcrv_r;
|
||||
reg dcr_wr_valid_r;
|
||||
|
||||
logic wready_stall;
|
||||
logic rvalid_stall;
|
||||
|
||||
`ifdef SCOPE
|
||||
|
||||
reg [63:0] scope_bus_wdata;
|
||||
reg [63:0] scope_bus_rdata;
|
||||
reg [63:0] scope_bus_wdata, scope_bus_rdata;
|
||||
reg [5:0] scope_bus_ctr;
|
||||
|
||||
reg cmd_scope_reading;
|
||||
reg cmd_scope_writing;
|
||||
reg cmd_scope_writing, cmd_scope_reading;
|
||||
reg scope_bus_out_r;
|
||||
reg scope_rdata_valid;
|
||||
|
||||
reg is_scope_waddr, is_scope_raddr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -191,18 +189,33 @@ module VX_afu_ctrl #(
|
|||
cmd_scope_writing <= 0;
|
||||
scope_bus_ctr <= '0;
|
||||
scope_bus_out_r <= 0;
|
||||
end else if (clk_en) begin
|
||||
is_scope_waddr <= 0;
|
||||
is_scope_raddr <= 0;
|
||||
scope_bus_rdata <= '0;
|
||||
scope_rdata_valid <= 0;
|
||||
end else begin
|
||||
scope_bus_out_r <= 0;
|
||||
if (s_axi_aw_fire) begin
|
||||
is_scope_waddr <= (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_0)
|
||||
|| (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_1);
|
||||
end
|
||||
if (s_axi_ar_fire) begin
|
||||
is_scope_raddr <= (s_axi_araddr[ADDR_BITS-1:0] == ADDR_SCP_0)
|
||||
|| (s_axi_araddr[ADDR_BITS-1:0] == ADDR_SCP_1);
|
||||
end
|
||||
if (s_axi_w_fire && waddr == ADDR_SCP_0) begin
|
||||
scope_bus_wdata[31:0] <= (s_axi_wdata & wmask) | (scope_bus_wdata[31:0] & ~wmask);
|
||||
end
|
||||
if (s_axi_w_fire && waddr == ADDR_SCP_1) begin
|
||||
scope_bus_wdata[63:32] <= (s_axi_wdata & wmask) | (scope_bus_wdata[63:32] & ~wmask);
|
||||
cmd_scope_writing <= 1;
|
||||
scope_rdata_valid <= 0;
|
||||
scope_bus_out_r <= 1;
|
||||
scope_bus_ctr <= 63;
|
||||
end
|
||||
if (scope_bus_in) begin
|
||||
cmd_scope_reading <= 1;
|
||||
scope_bus_rdata <= '0;
|
||||
scope_bus_ctr <= 63;
|
||||
end
|
||||
if (cmd_scope_reading) begin
|
||||
|
@ -210,13 +223,16 @@ module VX_afu_ctrl #(
|
|||
scope_bus_ctr <= scope_bus_ctr - 1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_reading <= 0;
|
||||
scope_rdata_valid <= 1;
|
||||
scope_bus_ctr <= 0;
|
||||
end
|
||||
end
|
||||
if (cmd_scope_writing) begin
|
||||
scope_bus_out_r <= 1'(scope_bus_wdata >> scope_bus_ctr);
|
||||
scope_bus_out_r <= scope_bus_wdata[scope_bus_ctr];
|
||||
scope_bus_ctr <= scope_bus_ctr - 1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_writing <= 0;
|
||||
scope_bus_ctr <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -224,41 +240,50 @@ module VX_afu_ctrl #(
|
|||
|
||||
assign scope_bus_out = scope_bus_out_r;
|
||||
|
||||
assign wready_stall = is_scope_waddr && cmd_scope_writing;
|
||||
assign rvalid_stall = is_scope_raddr && ~scope_rdata_valid;
|
||||
|
||||
`else
|
||||
|
||||
assign wready_stall = 0;
|
||||
assign rvalid_stall = 0;
|
||||
|
||||
`endif
|
||||
|
||||
// AXI Write
|
||||
// AXI Write Request
|
||||
assign s_axi_awready = (wstate == WSTATE_ADDR);
|
||||
assign s_axi_wready = (wstate == WSTATE_DATA) && ~wready_stall;
|
||||
|
||||
assign s_axi_awready = (wstate == WSTATE_IDLE);
|
||||
assign s_axi_wready = (wstate == WSTATE_DATA);
|
||||
// AXI Write Response
|
||||
assign s_axi_bvalid = (wstate == WSTATE_RESP);
|
||||
assign s_axi_bresp = 2'b00; // OKAY
|
||||
|
||||
assign s_axi_aw_fire = s_axi_awvalid && s_axi_awready;
|
||||
assign s_axi_w_fire = s_axi_wvalid && s_axi_wready;
|
||||
|
||||
for (genvar i = 0; i < 4; ++i) begin
|
||||
for (genvar i = 0; i < 4; ++i) begin : g_wmask
|
||||
assign wmask[8 * i +: 8] = {8{s_axi_wstrb[i]}};
|
||||
end
|
||||
|
||||
assign s_axi_aw_fire = s_axi_awvalid && s_axi_awready;
|
||||
assign s_axi_w_fire = s_axi_wvalid && s_axi_wready;
|
||||
assign s_axi_b_fire = s_axi_bvalid && s_axi_bready;
|
||||
|
||||
// wstate
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wstate <= WSTATE_IDLE;
|
||||
end else if (clk_en) begin
|
||||
wstate <= WSTATE_ADDR;
|
||||
end else begin
|
||||
case (wstate)
|
||||
WSTATE_IDLE: wstate <= s_axi_awvalid ? WSTATE_DATA : WSTATE_IDLE;
|
||||
WSTATE_DATA: wstate <= s_axi_wvalid ? WSTATE_RESP : WSTATE_DATA;
|
||||
WSTATE_RESP: wstate <= s_axi_bready ? WSTATE_IDLE : WSTATE_RESP;
|
||||
default: wstate <= WSTATE_IDLE;
|
||||
WSTATE_ADDR: wstate <= s_axi_aw_fire ? WSTATE_DATA : WSTATE_ADDR;
|
||||
WSTATE_DATA: wstate <= s_axi_w_fire ? WSTATE_RESP : WSTATE_DATA;
|
||||
WSTATE_RESP: wstate <= s_axi_b_fire ? WSTATE_ADDR : WSTATE_RESP;
|
||||
default: wstate <= WSTATE_ADDR;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// waddr
|
||||
always @(posedge clk) begin
|
||||
if (clk_en) begin
|
||||
if (s_axi_aw_fire)
|
||||
waddr <= s_axi_awaddr[ADDR_BITS-1:0];
|
||||
if (s_axi_aw_fire) begin
|
||||
waddr <= s_axi_awaddr[ADDR_BITS-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -276,16 +301,13 @@ module VX_afu_ctrl #(
|
|||
dcra_r <= '0;
|
||||
dcrv_r <= '0;
|
||||
dcr_wr_valid_r <= 0;
|
||||
end else begin
|
||||
dcr_wr_valid_r <= 0;
|
||||
ap_reset_r <= 0;
|
||||
|
||||
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
|
||||
mem_r[i] <= '0;
|
||||
end
|
||||
end else if (clk_en) begin
|
||||
if (ap_ready)
|
||||
ap_start_r <= auto_restart_r;
|
||||
|
||||
dcr_wr_valid_r <= 0;
|
||||
|
||||
if (s_axi_w_fire) begin
|
||||
case (waddr)
|
||||
ADDR_AP_CTRL: begin
|
||||
|
@ -317,16 +339,7 @@ module VX_afu_ctrl #(
|
|||
dcrv_r <= (s_axi_wdata & wmask) | (dcrv_r & ~wmask);
|
||||
dcr_wr_valid_r <= 1;
|
||||
end
|
||||
default: begin
|
||||
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
|
||||
if (waddr == (ADDR_MEM_0 + 8'(i) * 8'd12)) begin
|
||||
mem_r[i][31:0] <= (s_axi_wdata & wmask) | (mem_r[i][31:0] & ~wmask);
|
||||
end
|
||||
if (waddr == (ADDR_MEM_1 + 8'(i) * 8'd12)) begin
|
||||
mem_r[i][63:32] <= (s_axi_wdata & wmask) | (mem_r[i][63:32] & ~wmask);
|
||||
end
|
||||
end
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
|
||||
if (ier_r[0] & ap_done)
|
||||
|
@ -337,83 +350,86 @@ module VX_afu_ctrl #(
|
|||
end
|
||||
end
|
||||
|
||||
// AXI Read
|
||||
// AXI Read Request
|
||||
assign s_axi_arready = (rstate == RSTATE_ADDR);
|
||||
|
||||
assign s_axi_arready = (rstate == RSTATE_IDLE);
|
||||
assign s_axi_rvalid = (rstate == RSTATE_DATA);
|
||||
// AXI Read Response
|
||||
assign s_axi_rvalid = (rstate == RSTATE_RESP);
|
||||
assign s_axi_rdata = rdata;
|
||||
assign s_axi_rresp = 2'b00; // OKAY
|
||||
|
||||
assign s_axi_ar_fire = s_axi_arvalid && s_axi_arready;
|
||||
assign raddr = s_axi_araddr[ADDR_BITS-1:0];
|
||||
assign s_axi_r_fire = s_axi_rvalid && s_axi_rready;
|
||||
|
||||
// rstate
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rstate <= RSTATE_IDLE;
|
||||
end else if (clk_en) begin
|
||||
rstate <= RSTATE_ADDR;
|
||||
end else begin
|
||||
case (rstate)
|
||||
RSTATE_IDLE: rstate <= s_axi_arvalid ? RSTATE_DATA : RSTATE_IDLE;
|
||||
RSTATE_DATA: rstate <= (s_axi_rready & s_axi_rvalid) ? RSTATE_IDLE : RSTATE_DATA;
|
||||
default: rstate <= RSTATE_IDLE;
|
||||
RSTATE_ADDR: rstate <= s_axi_ar_fire ? RSTATE_DATA : RSTATE_ADDR;
|
||||
RSTATE_DATA: rstate <= (~rvalid_stall) ? RSTATE_RESP : RSTATE_DATA;
|
||||
RSTATE_RESP: rstate <= s_axi_r_fire ? RSTATE_ADDR : RSTATE_RESP;
|
||||
default: rstate <= RSTATE_ADDR;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// raddr
|
||||
always @(posedge clk) begin
|
||||
if (s_axi_ar_fire) begin
|
||||
raddr <= s_axi_araddr[ADDR_BITS-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
// rdata
|
||||
always @(posedge clk) begin
|
||||
if (clk_en) begin
|
||||
if (s_axi_ar_fire) begin
|
||||
rdata <= '0;
|
||||
case (raddr)
|
||||
ADDR_AP_CTRL: begin
|
||||
rdata[0] <= ap_start_r;
|
||||
rdata[1] <= ap_done;
|
||||
rdata[2] <= ap_idle;
|
||||
rdata[3] <= ap_ready;
|
||||
rdata[7] <= auto_restart_r;
|
||||
end
|
||||
ADDR_GIE: begin
|
||||
rdata <= 32'(gie_r);
|
||||
end
|
||||
ADDR_IER: begin
|
||||
rdata <= 32'(ier_r);
|
||||
end
|
||||
ADDR_ISR: begin
|
||||
rdata <= 32'(isr_r);
|
||||
end
|
||||
ADDR_DEV_0: begin
|
||||
rdata <= dev_caps[31:0];
|
||||
end
|
||||
ADDR_DEV_1: begin
|
||||
rdata <= dev_caps[63:32];
|
||||
end
|
||||
ADDR_ISA_0: begin
|
||||
rdata <= isa_caps[31:0];
|
||||
end
|
||||
ADDR_ISA_1: begin
|
||||
rdata <= isa_caps[63:32];
|
||||
end
|
||||
`ifdef SCOPE
|
||||
ADDR_SCP_0: begin
|
||||
rdata <= scope_bus_rdata[31:0];
|
||||
end
|
||||
ADDR_SCP_1: begin
|
||||
rdata <= scope_bus_rdata[63:32];
|
||||
end
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
rdata <= '0;
|
||||
case (raddr)
|
||||
ADDR_AP_CTRL: begin
|
||||
rdata[0] <= ap_start_r;
|
||||
rdata[1] <= ap_done;
|
||||
rdata[2] <= ap_idle;
|
||||
rdata[3] <= ap_ready;
|
||||
rdata[7] <= auto_restart_r;
|
||||
end
|
||||
end
|
||||
ADDR_GIE: begin
|
||||
rdata <= 32'(gie_r);
|
||||
end
|
||||
ADDR_IER: begin
|
||||
rdata <= 32'(ier_r);
|
||||
end
|
||||
ADDR_ISR: begin
|
||||
rdata <= 32'(isr_r);
|
||||
end
|
||||
ADDR_DEV_0: begin
|
||||
rdata <= dev_caps[31:0];
|
||||
end
|
||||
ADDR_DEV_1: begin
|
||||
rdata <= dev_caps[63:32];
|
||||
end
|
||||
ADDR_ISA_0: begin
|
||||
rdata <= isa_caps[31:0];
|
||||
end
|
||||
ADDR_ISA_1: begin
|
||||
rdata <= isa_caps[63:32];
|
||||
end
|
||||
`ifdef SCOPE
|
||||
ADDR_SCP_0: begin
|
||||
rdata <= scope_bus_rdata[31:0];
|
||||
end
|
||||
ADDR_SCP_1: begin
|
||||
rdata <= scope_bus_rdata[63:32];
|
||||
end
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign ap_reset = ap_reset_r;
|
||||
assign ap_start = ap_start_r;
|
||||
assign interrupt = gie_r & (| isr_r);
|
||||
|
||||
assign mem_base = mem_r;
|
||||
|
||||
assign dcr_wr_valid = dcr_wr_valid_r;
|
||||
assign dcr_wr_addr = `VX_DCR_ADDR_WIDTH'(dcra_r);
|
||||
assign dcr_wr_data = `VX_DCR_DATA_WIDTH'(dcrv_r);
|
||||
|
|
|
@ -16,17 +16,21 @@
|
|||
module VX_afu_wrap #(
|
||||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = 512,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = 25,
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = 2
|
||||
) (
|
||||
// System signals
|
||||
input wire ap_clk,
|
||||
input wire ap_rst_n,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// AXI4 master interface
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
`REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`else
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`endif
|
||||
// AXI4-Lite slave interface
|
||||
input wire s_axi_ctrl_awvalid,
|
||||
output wire s_axi_ctrl_awready,
|
||||
|
@ -48,11 +52,18 @@ module VX_afu_wrap #(
|
|||
|
||||
output wire interrupt
|
||||
);
|
||||
localparam C_M_AXI_MEM_NUM_BANKS = `M_AXI_MEM_NUM_BANKS;
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_BANKS);
|
||||
`else
|
||||
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH;
|
||||
`endif
|
||||
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_RUN = 1;
|
||||
|
||||
localparam PENDING_SIZEW = 12; // max outstanding requests size
|
||||
localparam C_M_AXI_MEM_NUM_BANKS_SW = `CLOG2(C_M_AXI_MEM_NUM_BANKS+1);
|
||||
|
||||
wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
@ -80,19 +91,18 @@ module VX_afu_wrap #(
|
|||
wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
// convert memory interface to array
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||
|
||||
wire reset = ~ap_rst_n;
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
`REPEAT (1, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||
`else
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||
`endif
|
||||
|
||||
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
|
||||
reg [15:0] vx_pending_writes;
|
||||
reg [PENDING_SIZEW-1:0] vx_pending_writes;
|
||||
reg vx_busy_wait;
|
||||
reg vx_running;
|
||||
|
||||
reg vx_reset = 1; // asserted at initialization
|
||||
wire vx_busy;
|
||||
|
||||
wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
wire dcr_wr_valid;
|
||||
wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr;
|
||||
wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data;
|
||||
|
@ -101,8 +111,8 @@ module VX_afu_wrap #(
|
|||
|
||||
wire ap_reset;
|
||||
wire ap_start;
|
||||
wire ap_idle = ~vx_running;
|
||||
wire ap_done = ~(state == STATE_RUN || vx_pending_writes != 0);
|
||||
wire ap_idle = vx_reset;
|
||||
wire ap_done = (state == STATE_IDLE) && (vx_pending_writes == '0);
|
||||
wire ap_ready = 1'b1;
|
||||
|
||||
`ifdef SCOPE
|
||||
|
@ -111,24 +121,33 @@ module VX_afu_wrap #(
|
|||
wire scope_reset = reset;
|
||||
`endif
|
||||
|
||||
always @(posedge ap_clk) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset || ap_reset) begin
|
||||
state <= STATE_IDLE;
|
||||
vx_busy_wait <= 0;
|
||||
vx_running <= 0;
|
||||
state <= STATE_IDLE;
|
||||
vx_reset <= 1;
|
||||
end else begin
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (ap_start) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE RUN\n", $time));
|
||||
`TRACE(2, ("%t: AFU: Goto STATE RUN\n", $time))
|
||||
`endif
|
||||
state <= STATE_RUN;
|
||||
vx_running <= 0;
|
||||
vx_reset_ctr <= (`RESET_DELAY-1);
|
||||
vx_reset <= 1;
|
||||
end
|
||||
end
|
||||
STATE_RUN: begin
|
||||
if (vx_running) begin
|
||||
if (vx_reset) begin
|
||||
// wait until the reset network is ready
|
||||
if (vx_reset_ctr == 0) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%t: AFU: Begin execution\n", $time))
|
||||
`endif
|
||||
vx_busy_wait <= 1;
|
||||
vx_reset <= 0;
|
||||
end
|
||||
end else begin
|
||||
if (vx_busy_wait) begin
|
||||
// wait until processor goes busy
|
||||
if (vx_busy) begin
|
||||
|
@ -137,67 +156,63 @@ module VX_afu_wrap #(
|
|||
end else begin
|
||||
// wait until the processor is not busy
|
||||
if (~vx_busy) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: AFU: End execution\n", $time));
|
||||
`TRACE(2, ("%d: STATE IDLE\n", $time));
|
||||
`TRACE(2, ("%t: AFU: End execution\n", $time))
|
||||
`TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time))
|
||||
`endif
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
// wait until the reset sequence is complete
|
||||
if (vx_reset_ctr == (`RESET_DELAY-1)) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: AFU: Begin execution\n", $time));
|
||||
`endif
|
||||
vx_running <= 1;
|
||||
vx_busy_wait <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
endcase
|
||||
|
||||
// ensure reset network initialization
|
||||
if (vx_reset_ctr != '0) begin
|
||||
vx_reset_ctr <= vx_reset_ctr - 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg m_axi_mem_wfire;
|
||||
reg m_axi_mem_bfire;
|
||||
wire [C_M_AXI_MEM_NUM_BANKS-1:0] m_axi_wr_req_fire, m_axi_wr_rsp_fire;
|
||||
wire [C_M_AXI_MEM_NUM_BANKS_SW-1:0] cur_wr_reqs, cur_wr_rsps;
|
||||
|
||||
always @(*) begin
|
||||
m_axi_mem_wfire = 0;
|
||||
m_axi_mem_bfire = 0;
|
||||
for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
|
||||
m_axi_mem_wfire |= m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i];
|
||||
m_axi_mem_bfire |= m_axi_mem_bvalid_a[i] && m_axi_mem_bready_a[i];
|
||||
end
|
||||
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_awfire
|
||||
VX_axi_write_ack axi_write_ack (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.awvalid(m_axi_mem_awvalid_a[i]),
|
||||
.awready(m_axi_mem_awready_a[i]),
|
||||
.wvalid (m_axi_mem_wvalid_a[i]),
|
||||
.wready (m_axi_mem_wready_a[i]),
|
||||
.tx_ack (m_axi_wr_req_fire[i]),
|
||||
`UNUSED_PIN (aw_ack),
|
||||
`UNUSED_PIN (w_ack),
|
||||
`UNUSED_PIN (tx_rdy)
|
||||
);
|
||||
assign m_axi_wr_rsp_fire[i] = m_axi_mem_bvalid_a[i] & m_axi_mem_bready_a[i];
|
||||
end
|
||||
|
||||
always @(posedge ap_clk) begin
|
||||
if (reset || ap_reset) begin
|
||||
`POP_COUNT(cur_wr_reqs, m_axi_wr_req_fire);
|
||||
`POP_COUNT(cur_wr_rsps, m_axi_wr_rsp_fire);
|
||||
|
||||
wire signed [C_M_AXI_MEM_NUM_BANKS_SW:0] reqs_sub = (C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_reqs) -
|
||||
(C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_rsps);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
vx_pending_writes <= '0;
|
||||
end else begin
|
||||
if (m_axi_mem_wfire && ~m_axi_mem_bfire)
|
||||
vx_pending_writes <= vx_pending_writes + 1;
|
||||
if (~m_axi_mem_wfire && m_axi_mem_bfire)
|
||||
vx_pending_writes <= vx_pending_writes - 1;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge ap_clk) begin
|
||||
if (state == STATE_RUN) begin
|
||||
vx_reset_ctr <= vx_reset_ctr + 1;
|
||||
end else begin
|
||||
vx_reset_ctr <= '0;
|
||||
vx_pending_writes <= vx_pending_writes + PENDING_SIZEW'(reqs_sub);
|
||||
end
|
||||
end
|
||||
|
||||
VX_afu_ctrl #(
|
||||
.AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
|
||||
.AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
|
||||
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
.S_AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
|
||||
.S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH)
|
||||
) afu_ctrl (
|
||||
.clk (ap_clk),
|
||||
.reset (reset || ap_reset),
|
||||
.clk_en (1'b1),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.s_axi_awvalid (s_axi_ctrl_awvalid),
|
||||
.s_axi_awready (s_axi_ctrl_awready),
|
||||
|
@ -229,37 +244,36 @@ module VX_afu_wrap #(
|
|||
.scope_bus_out (scope_bus_in),
|
||||
`endif
|
||||
|
||||
.mem_base (mem_base),
|
||||
|
||||
.dcr_wr_valid (dcr_wr_valid),
|
||||
.dcr_wr_addr (dcr_wr_addr),
|
||||
.dcr_wr_data (dcr_wr_data)
|
||||
);
|
||||
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_w [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_w [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
|
||||
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
|
||||
assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
|
||||
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing
|
||||
localparam [C_M_AXI_MEM_ADDR_WIDTH-1:0] BANK_OFFSET = C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET) + C_M_AXI_MEM_ADDR_WIDTH'(i) << M_AXI_MEM_ADDR_WIDTH;
|
||||
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + BANK_OFFSET;
|
||||
assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + BANK_OFFSET;
|
||||
end
|
||||
|
||||
`SCOPE_IO_SWITCH (2)
|
||||
`SCOPE_IO_SWITCH (2);
|
||||
|
||||
Vortex_axi #(
|
||||
.AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
|
||||
.AXI_ADDR_WIDTH (`MEM_ADDR_WIDTH),
|
||||
.AXI_ADDR_WIDTH (M_AXI_MEM_ADDR_WIDTH),
|
||||
.AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
|
||||
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
) vortex_axi (
|
||||
`SCOPE_IO_BIND (1)
|
||||
|
||||
.clk (ap_clk),
|
||||
.reset (reset || ap_reset || ~vx_running),
|
||||
.clk (clk),
|
||||
.reset (vx_reset),
|
||||
|
||||
.m_axi_awvalid (m_axi_mem_awvalid_a),
|
||||
.m_axi_awready (m_axi_mem_awready_a),
|
||||
.m_axi_awaddr (m_axi_mem_awaddr_w),
|
||||
.m_axi_awaddr (m_axi_mem_awaddr_u),
|
||||
.m_axi_awid (m_axi_mem_awid_a),
|
||||
.m_axi_awlen (m_axi_mem_awlen_a),
|
||||
`UNUSED_PIN (m_axi_awsize),
|
||||
|
@ -283,7 +297,7 @@ module VX_afu_wrap #(
|
|||
|
||||
.m_axi_arvalid (m_axi_mem_arvalid_a),
|
||||
.m_axi_arready (m_axi_mem_arready_a),
|
||||
.m_axi_araddr (m_axi_mem_araddr_w),
|
||||
.m_axi_araddr (m_axi_mem_araddr_u),
|
||||
.m_axi_arid (m_axi_mem_arid_a),
|
||||
.m_axi_arlen (m_axi_mem_arlen_a),
|
||||
`UNUSED_PIN (m_axi_arsize),
|
||||
|
@ -310,42 +324,60 @@ module VX_afu_wrap #(
|
|||
|
||||
// SCOPE //////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_SCOPE_AFU
|
||||
`ifdef SCOPE
|
||||
`define TRIGGERS { \
|
||||
reset, \
|
||||
ap_start, \
|
||||
ap_done, \
|
||||
ap_idle, \
|
||||
interrupt, \
|
||||
vx_busy_wait, \
|
||||
vx_busy, \
|
||||
vx_running \
|
||||
}
|
||||
`ifdef DBG_SCOPE_AFU
|
||||
wire m_axi_mem_awfire_0 = m_axi_mem_awvalid_a[0] & m_axi_mem_awready_a[0];
|
||||
wire m_axi_mem_arfire_0 = m_axi_mem_arvalid_a[0] & m_axi_mem_arready_a[0];
|
||||
wire m_axi_mem_wfire_0 = m_axi_mem_wvalid_a[0] & m_axi_mem_wready_a[0];
|
||||
wire m_axi_mem_bfire_0 = m_axi_mem_bvalid_a[0] & m_axi_mem_bready_a[0];
|
||||
|
||||
`define PROBES { \
|
||||
vx_pending_writes \
|
||||
}
|
||||
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (0),
|
||||
.TRIGGERW ($bits(`TRIGGERS)),
|
||||
.PROBEW ($bits(`PROBES))
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset_w[0]),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers(`TRIGGERS),
|
||||
.probes(`PROBES),
|
||||
.bus_in(scope_bus_in_w[0]),
|
||||
.bus_out(scope_bus_out_w[0])
|
||||
);
|
||||
`NEG_EDGE (reset_negedge, reset);
|
||||
`SCOPE_TAP (0, 0, {
|
||||
ap_reset,
|
||||
ap_start,
|
||||
ap_done,
|
||||
ap_idle,
|
||||
interrupt,
|
||||
vx_reset,
|
||||
vx_busy,
|
||||
m_axi_mem_awvalid_a[0],
|
||||
m_axi_mem_awready_a[0],
|
||||
m_axi_mem_wvalid_a[0],
|
||||
m_axi_mem_wready_a[0],
|
||||
m_axi_mem_bvalid_a[0],
|
||||
m_axi_mem_bready_a[0],
|
||||
m_axi_mem_arvalid_a[0],
|
||||
m_axi_mem_arready_a[0],
|
||||
m_axi_mem_rvalid_a[0],
|
||||
m_axi_mem_rready_a[0]
|
||||
}, {
|
||||
dcr_wr_valid,
|
||||
m_axi_mem_awfire_0,
|
||||
m_axi_mem_arfire_0,
|
||||
m_axi_mem_wfire_0,
|
||||
m_axi_mem_bfire_0
|
||||
},{
|
||||
dcr_wr_addr,
|
||||
dcr_wr_data,
|
||||
vx_pending_writes,
|
||||
m_axi_mem_awaddr_u[0],
|
||||
m_axi_mem_awid_a[0],
|
||||
m_axi_mem_bid_a[0],
|
||||
m_axi_mem_araddr_u[0],
|
||||
m_axi_mem_arid_a[0],
|
||||
m_axi_mem_rid_a[0]
|
||||
},
|
||||
reset_negedge, 1'b0, 4096
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_afu ila_afu_inst (
|
||||
.clk (ap_clk),
|
||||
.clk (clk),
|
||||
.probe0 ({
|
||||
ap_reset,
|
||||
ap_start,
|
||||
ap_done,
|
||||
ap_idle,
|
||||
|
@ -355,13 +387,13 @@ module VX_afu_wrap #(
|
|||
vx_pending_writes,
|
||||
vx_busy_wait,
|
||||
vx_busy,
|
||||
vx_running
|
||||
vx_reset,
|
||||
dcr_wr_valid,
|
||||
dcr_wr_addr,
|
||||
dcr_wr_data
|
||||
})
|
||||
);
|
||||
`endif
|
||||
`else
|
||||
`SCOPE_IO_UNUSED_W(0)
|
||||
`endif
|
||||
|
||||
`ifdef SIMULATION
|
||||
`ifndef VERILATOR
|
||||
|
@ -371,7 +403,7 @@ module VX_afu_wrap #(
|
|||
initial begin
|
||||
$assertoff(0, vortex_axi);
|
||||
end
|
||||
always @(posedge ap_clk) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
assert_delay_ctr <= '0;
|
||||
assert_enabled <= 0;
|
||||
|
@ -390,19 +422,19 @@ module VX_afu_wrap #(
|
|||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_AFU
|
||||
always @(posedge ap_clk) begin
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
|
||||
if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]));
|
||||
`TRACE(2, ("%t: AXI Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]))
|
||||
end
|
||||
if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%0h\n", $time, i, m_axi_mem_wdata_a[i]));
|
||||
`TRACE(2, ("%t: AXI Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i]))
|
||||
end
|
||||
if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i]));
|
||||
`TRACE(2, ("%t: AXI Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i]))
|
||||
end
|
||||
if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin
|
||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]));
|
||||
`TRACE(2, ("%t: AXI Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,16 +16,25 @@
|
|||
module vortex_afu #(
|
||||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = 1
|
||||
`else
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
|
||||
`endif
|
||||
) (
|
||||
// System signals
|
||||
input wire ap_clk,
|
||||
input wire ap_rst_n,
|
||||
|
||||
|
||||
// AXI4 master interface
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
`REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`else
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`endif
|
||||
|
||||
// AXI4-Lite slave interface
|
||||
input wire s_axi_ctrl_awvalid,
|
||||
|
@ -45,8 +54,8 @@ module vortex_afu #(
|
|||
output wire s_axi_ctrl_bvalid,
|
||||
input wire s_axi_ctrl_bready,
|
||||
output wire [1:0] s_axi_ctrl_bresp,
|
||||
|
||||
output wire interrupt
|
||||
|
||||
output wire interrupt
|
||||
);
|
||||
|
||||
VX_afu_wrap #(
|
||||
|
@ -54,16 +63,19 @@ module vortex_afu #(
|
|||
.C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
|
||||
.C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
|
||||
.C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
|
||||
.C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH)
|
||||
.C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
|
||||
.C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
) afu_wrap (
|
||||
.ap_clk (ap_clk),
|
||||
.ap_rst_n (ap_rst_n),
|
||||
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
|
||||
.clk (ap_clk),
|
||||
.reset (~ap_rst_n),
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
`REPEAT (1, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
`else
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
`endif
|
||||
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
|
||||
.s_axi_ctrl_awready (s_axi_ctrl_awready),
|
||||
.s_axi_ctrl_awaddr (s_axi_ctrl_awaddr),
|
||||
.s_axi_ctrl_awaddr (s_axi_ctrl_awaddr),
|
||||
.s_axi_ctrl_wvalid (s_axi_ctrl_wvalid),
|
||||
.s_axi_ctrl_wready (s_axi_ctrl_wready),
|
||||
.s_axi_ctrl_wdata (s_axi_ctrl_wdata),
|
||||
|
@ -81,5 +93,5 @@ module vortex_afu #(
|
|||
|
||||
.interrupt (interrupt)
|
||||
);
|
||||
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,12 +14,24 @@
|
|||
`ifndef VORTEX_AFU_VH
|
||||
`define VORTEX_AFU_VH
|
||||
|
||||
`ifndef M_AXI_MEM_NUM_BANKS
|
||||
`define M_AXI_MEM_NUM_BANKS 1
|
||||
`ifndef PLATFORM_MEMORY_BANKS
|
||||
`define PLATFORM_MEMORY_BANKS 2
|
||||
`endif
|
||||
|
||||
`ifndef M_AXI_MEM_ID_WIDTH
|
||||
`define M_AXI_MEM_ID_WIDTH 32
|
||||
`ifndef PLATFORM_MEMORY_ADDR_WIDTH
|
||||
`define PLATFORM_MEMORY_ADDR_WIDTH 31
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_DATA_WIDTH
|
||||
`define PLATFORM_MEMORY_DATA_WIDTH 512
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_OFFSET
|
||||
`define PLATFORM_MEMORY_OFFSET 0
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_ID_WIDTH
|
||||
`define PLATFORM_MEMORY_ID_WIDTH 32
|
||||
`endif
|
||||
|
||||
`define GEN_AXI_MEM(i) \
|
||||
|
|
129
hw/rtl/cache/VX_bank_flush.sv
vendored
Normal file
129
hw/rtl/cache/VX_bank_flush.sv
vendored
Normal file
|
@ -0,0 +1,129 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_bank_flush #(
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire flush_begin,
|
||||
output wire flush_end,
|
||||
output wire flush_init,
|
||||
output wire flush_valid,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] flush_line,
|
||||
output wire [NUM_WAYS-1:0] flush_way,
|
||||
input wire flush_ready,
|
||||
input wire mshr_empty,
|
||||
input wire bank_empty
|
||||
);
|
||||
// ways interation is only needed when eviction is enabled
|
||||
localparam CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);
|
||||
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_INIT = 1;
|
||||
localparam STATE_WAIT1 = 2;
|
||||
localparam STATE_FLUSH = 3;
|
||||
localparam STATE_WAIT2 = 4;
|
||||
localparam STATE_DONE = 5;
|
||||
|
||||
reg [2:0] state_r, state_n;
|
||||
|
||||
reg [CTR_WIDTH-1:0] counter_r;
|
||||
|
||||
always @(*) begin
|
||||
state_n = state_r;
|
||||
case (state_r)
|
||||
STATE_IDLE: begin
|
||||
if (flush_begin) begin
|
||||
state_n = STATE_WAIT1;
|
||||
end
|
||||
end
|
||||
STATE_INIT: begin
|
||||
if (counter_r == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_WAIT1: begin
|
||||
// wait for pending requests to complete
|
||||
if (mshr_empty) begin
|
||||
state_n = STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
|
||||
state_n = (BANK_ID == 0) ? STATE_DONE : STATE_WAIT2;
|
||||
end
|
||||
end
|
||||
STATE_WAIT2: begin
|
||||
// ensure the bank is empty before notifying the cache flush unit,
|
||||
// because the flush request to lower caches only goes through bank0
|
||||
// and it is important that request gets send out last.
|
||||
if (bank_empty) begin
|
||||
state_n = STATE_DONE;
|
||||
end
|
||||
end
|
||||
STATE_DONE: begin
|
||||
// generate a completion pulse
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state_r <= STATE_INIT;
|
||||
counter_r <= '0;
|
||||
end else begin
|
||||
state_r <= state_n;
|
||||
if (state_r != STATE_IDLE) begin
|
||||
if ((state_r == STATE_INIT)
|
||||
|| ((state_r == STATE_FLUSH) && flush_ready)) begin
|
||||
counter_r <= counter_r + CTR_WIDTH'(1);
|
||||
end
|
||||
end else begin
|
||||
counter_r <= '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign flush_end = (state_r == STATE_DONE);
|
||||
assign flush_init = (state_r == STATE_INIT);
|
||||
assign flush_valid = (state_r == STATE_FLUSH);
|
||||
assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way
|
||||
VX_decoder #(
|
||||
.N (`CS_WAY_SEL_BITS),
|
||||
.D (NUM_WAYS)
|
||||
) ctr_decoder (
|
||||
.data_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]),
|
||||
.valid_in (1'b1),
|
||||
.data_out (flush_way)
|
||||
);
|
||||
end else begin : g_flush_way_all
|
||||
assign flush_way = {NUM_WAYS{1'b1}};
|
||||
end
|
||||
|
||||
endmodule
|
510
hw/rtl/cache/VX_cache.sv
vendored
510
hw/rtl/cache/VX_cache.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,15 +14,15 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 4096,
|
||||
parameter CACHE_SIZE = 4096,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
|
@ -33,7 +33,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
|
@ -42,6 +42,12 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -53,12 +59,12 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_BUF = 0
|
||||
) (
|
||||
) (
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t cache_perf,
|
||||
`endif
|
||||
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -66,23 +72,32 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
VX_mem_bus_if.master mem_bus_if
|
||||
);
|
||||
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter: number of banks must be power of 2"))
|
||||
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable"))
|
||||
`STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback"))
|
||||
|
||||
// In writeback mode, memory fill response may issue a new memory request to handle evicted blocks.
|
||||
// We need to ensure that the memory request queue never fills up to avoid deadlock.
|
||||
`STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE"))
|
||||
|
||||
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
|
||||
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
localparam MEM_TAG_WIDTH = `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH);
|
||||
localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE;
|
||||
localparam WORD_WIDTH = WORD_SIZE * 8;
|
||||
localparam WORD_SEL_BITS = `CLOG2(WORDS_PER_LINE);
|
||||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH;
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1;
|
||||
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
|
||||
localparam BANK_MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH;
|
||||
|
||||
localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
|
||||
localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1);
|
||||
localparam CORE_RSP_REG_DISABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
|
||||
localparam MEM_REQ_REG_DISABLE = (NUM_BANKS != 1);
|
||||
|
||||
localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
|
||||
|
@ -90,24 +105,33 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
||||
`endif
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
|
||||
wire [NUM_REQS-1:0] core_req_rw;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
||||
wire [NUM_REQS-1:0] core_req_ready;
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) core_bus2_if[NUM_REQS]();
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_valid[i] = core_bus_if[i].req_valid;
|
||||
assign core_req_rw[i] = core_bus_if[i].req_data.rw;
|
||||
assign core_req_byteen[i] = core_bus_if[i].req_data.byteen;
|
||||
assign core_req_addr[i] = core_bus_if[i].req_data.addr;
|
||||
assign core_req_data[i] = core_bus_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus_if[i].req_data.tag;
|
||||
assign core_bus_if[i].req_ready = core_req_ready[i];
|
||||
`UNUSED_VAR (core_bus_if[i].req_data.atype)
|
||||
end
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_begin;
|
||||
wire [`UP(UUID_WIDTH)-1:0] flush_uuid;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_end;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_fire;
|
||||
|
||||
VX_cache_flush #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.UUID_WIDTH(UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.core_bus_in_if (core_bus_if),
|
||||
.core_bus_out_if (core_bus2_if),
|
||||
.bank_req_fire (per_bank_core_req_fire),
|
||||
.flush_begin (per_bank_flush_begin),
|
||||
.flush_uuid (flush_uuid),
|
||||
.flush_end (per_bank_flush_end)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -117,99 +141,101 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_s;
|
||||
|
||||
`RESET_RELAY (core_rsp_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_buf
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
|
||||
.SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
|
||||
.SIZE (CORE_RSP_REG_DISABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
) core_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (core_rsp_reset),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_valid_s[i]),
|
||||
.ready_in (core_rsp_ready_s[i]),
|
||||
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
|
||||
.data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus_if[i].rsp_valid),
|
||||
.ready_out (core_bus_if[i].rsp_ready)
|
||||
.data_out ({core_bus2_if[i].rsp_data.data, core_bus2_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus2_if[i].rsp_valid),
|
||||
.ready_out (core_bus2_if[i].rsp_ready)
|
||||
);
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Memory request buffering
|
||||
wire mem_req_valid_s;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
|
||||
wire mem_req_rw_s;
|
||||
wire [LINE_SIZE-1:0] mem_req_byteen_s;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
|
||||
wire mem_req_ready_s;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH),
|
||||
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_valid_s),
|
||||
.ready_in (mem_req_ready_s),
|
||||
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s}),
|
||||
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}),
|
||||
.valid_out (mem_bus_if.req_valid),
|
||||
.ready_out (mem_bus_if.req_ready)
|
||||
);
|
||||
|
||||
assign mem_bus_if.req_data.atype = '0;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (MEM_TAG_WIDTH)
|
||||
) mem_bus_tmp_if();
|
||||
|
||||
// Memory response buffering
|
||||
|
||||
wire mem_rsp_valid_s;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||
wire mem_rsp_ready_s;
|
||||
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
|
||||
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
|
||||
.SIZE (MRSQ_SIZE),
|
||||
.OUT_REG (MRSQ_SIZE > 2)
|
||||
) mem_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_bus_if.rsp_valid),
|
||||
.ready_in (mem_bus_if.rsp_ready),
|
||||
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
|
||||
.data_out ({mem_rsp_tag_s, mem_rsp_data_s}),
|
||||
.valid_in (mem_bus_tmp_if.rsp_valid),
|
||||
.ready_in (mem_bus_tmp_if.rsp_ready),
|
||||
.data_in ({mem_bus_tmp_if.rsp_data.tag, mem_bus_tmp_if.rsp_data.data}),
|
||||
.data_out ({mem_rsp_tag_s, mem_rsp_data_s}),
|
||||
.valid_out (mem_rsp_valid_s),
|
||||
.ready_out (mem_rsp_ready_s)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_rsp_tag;
|
||||
wire [`UP(`CS_BANK_SEL_BITS)-1:0] mem_rsp_bank_id;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] init_line_sel;
|
||||
wire init_enable;
|
||||
if (NUM_BANKS > 1) begin : g_mem_rsp_tag_s_with_banks
|
||||
assign bank_mem_rsp_tag = mem_rsp_tag_s[MEM_TAG_WIDTH-1:`CS_BANK_SEL_BITS];
|
||||
assign mem_rsp_bank_id = mem_rsp_tag_s[`CS_BANK_SEL_BITS-1:0];
|
||||
end else begin : g_mem_rsp_tag_s_no_bank
|
||||
assign bank_mem_rsp_tag = mem_rsp_tag_s;
|
||||
assign mem_rsp_bank_id = 0;
|
||||
end
|
||||
|
||||
// this reset relay is required to sync with bank initialization
|
||||
`RESET_RELAY (init_reset, reset);
|
||||
// Memory request buffering
|
||||
|
||||
VX_cache_init #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS)
|
||||
) cache_init (
|
||||
wire mem_req_valid;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr;
|
||||
wire mem_req_rw;
|
||||
wire [LINE_SIZE-1:0] mem_req_byteen;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_flush;
|
||||
wire mem_req_ready;
|
||||
|
||||
wire mem_req_flush_b;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.SIZE (MEM_REQ_REG_DISABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (init_reset),
|
||||
.addr_out (init_line_sel),
|
||||
.valid_out (init_enable)
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_valid),
|
||||
.ready_in (mem_req_ready),
|
||||
.data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}),
|
||||
.data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}),
|
||||
.valid_out (mem_bus_tmp_if.req_valid),
|
||||
.ready_out (mem_bus_tmp_if.req_ready)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0;
|
||||
|
||||
if (WRITE_ENABLE) begin : g_mem_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if);
|
||||
end else begin : g_mem_bus_if_ro
|
||||
`ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if);
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
|
||||
|
@ -218,81 +244,105 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_flush;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
|
||||
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
|
||||
wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_mem_req_wsel;
|
||||
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
|
||||
wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][BANK_MEM_TAG_WIDTH-1:0] per_bank_mem_req_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_flush;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign mem_rsp_ready_s = per_bank_mem_rsp_ready;
|
||||
end else begin
|
||||
assign mem_rsp_ready_s = per_bank_mem_rsp_ready[`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s)];
|
||||
end
|
||||
|
||||
assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready;
|
||||
|
||||
assign mem_rsp_ready_s = per_bank_mem_rsp_ready[mem_rsp_bank_id];
|
||||
|
||||
// Bank requests dispatch
|
||||
|
||||
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
|
||||
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
|
||||
wire [NUM_REQS-1:0] core_req_valid;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
|
||||
wire [NUM_REQS-1:0] core_req_rw;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
||||
wire [NUM_REQS-1:0] core_req_flush;
|
||||
wire [NUM_REQS-1:0] core_req_ready;
|
||||
|
||||
wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr;
|
||||
wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid;
|
||||
wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
|
||||
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req
|
||||
assign core_req_valid[i] = core_bus2_if[i].req_valid;
|
||||
assign core_req_rw[i] = core_bus2_if[i].req_data.rw;
|
||||
assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen;
|
||||
assign core_req_addr[i] = core_bus2_if[i].req_data.addr;
|
||||
assign core_req_data[i] = core_bus2_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus2_if[i].req_data.tag;
|
||||
assign core_req_flush[i] = core_bus2_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH];
|
||||
assign core_bus2_if[i].req_ready = core_req_ready[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_wsel
|
||||
if (WORDS_PER_LINE > 1) begin : g_wsel
|
||||
assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS];
|
||||
end else begin
|
||||
end else begin : g_no_wsel
|
||||
assign core_req_wsel[i] = '0;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_line_addr
|
||||
assign core_req_line_addr[i] = core_req_addr[i][(BANK_SEL_BITS + WORD_SEL_BITS) +: LINE_ADDR_WIDTH];
|
||||
end
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_bid
|
||||
if (NUM_BANKS > 1) begin : g_multibanks
|
||||
assign core_req_bid[i] = core_req_addr[i][WORD_SEL_BITS +: BANK_SEL_BITS];
|
||||
end else begin : g_singlebank
|
||||
assign core_req_bid[i] = '0;
|
||||
end
|
||||
end else begin
|
||||
assign core_req_bid = '0;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_data_in
|
||||
assign core_req_data_in[i] = {
|
||||
core_req_line_addr[i],
|
||||
core_req_rw[i],
|
||||
core_req_wsel[i],
|
||||
core_req_byteen[i],
|
||||
core_req_byteen[i],
|
||||
core_req_data[i],
|
||||
core_req_tag[i]};
|
||||
core_req_tag[i],
|
||||
core_req_flush[i]
|
||||
};
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [`PERF_CTR_BITS-1:0] perf_collisions;
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (req_xbar_reset, reset);
|
||||
|
||||
VX_stream_xbar #(
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_REQS),
|
||||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (CORE_REQ_DATAW),
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||
.OUT_BUF ((NUM_REQS > 4) ? 2 : 0)
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (REQ_XBAR_BUF)
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
.reset (req_xbar_reset),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.collisions(perf_collisions),
|
||||
`else
|
||||
|
@ -308,32 +358,27 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.ready_out (per_bank_core_req_ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_core_req_data_out
|
||||
assign {
|
||||
per_bank_core_req_addr[i],
|
||||
per_bank_core_req_rw[i],
|
||||
per_bank_core_req_wsel[i],
|
||||
per_bank_core_req_byteen[i],
|
||||
per_bank_core_req_byteen[i],
|
||||
per_bank_core_req_data[i],
|
||||
per_bank_core_req_tag[i]} = core_req_data_out[i];
|
||||
per_bank_core_req_tag[i],
|
||||
per_bank_core_req_flush[i]
|
||||
} = core_req_data_out[i];
|
||||
end
|
||||
|
||||
|
||||
// Banks access
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : g_banks
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
|
||||
wire curr_bank_mem_rsp_valid;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s;
|
||||
end else begin
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == i);
|
||||
end
|
||||
wire curr_bank_mem_rsp_valid = mem_rsp_valid_s && (mem_rsp_bank_id == bank_id);
|
||||
|
||||
`RESET_RELAY (bank_reset, reset);
|
||||
|
||||
VX_cache_bank #(
|
||||
.BANK_ID (i),
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
VX_cache_bank #(
|
||||
.BANK_ID (bank_id),
|
||||
.INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
|
@ -344,84 +389,87 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF)
|
||||
) bank (
|
||||
.CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)),
|
||||
.MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) bank (
|
||||
.clk (clk),
|
||||
.reset (bank_reset),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_read_misses (perf_read_miss_per_bank[i]),
|
||||
.perf_write_misses (perf_write_miss_per_bank[i]),
|
||||
.perf_mshr_stalls (perf_mshr_stall_per_bank[i]),
|
||||
.perf_read_misses (perf_read_miss_per_bank[bank_id]),
|
||||
.perf_write_misses (perf_write_miss_per_bank[bank_id]),
|
||||
.perf_mshr_stalls (perf_mshr_stall_per_bank[bank_id]),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_bank_core_req_valid[i]),
|
||||
.core_req_addr (per_bank_core_req_addr[i]),
|
||||
.core_req_rw (per_bank_core_req_rw[i]),
|
||||
.core_req_wsel (per_bank_core_req_wsel[i]),
|
||||
.core_req_byteen (per_bank_core_req_byteen[i]),
|
||||
.core_req_data (per_bank_core_req_data[i]),
|
||||
.core_req_tag (per_bank_core_req_tag[i]),
|
||||
.core_req_idx (per_bank_core_req_idx[i]),
|
||||
.core_req_ready (per_bank_core_req_ready[i]),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_bank_core_rsp_valid[i]),
|
||||
.core_rsp_data (per_bank_core_rsp_data[i]),
|
||||
.core_rsp_tag (per_bank_core_rsp_tag[i]),
|
||||
.core_rsp_idx (per_bank_core_rsp_idx[i]),
|
||||
.core_rsp_ready (per_bank_core_rsp_ready[i]),
|
||||
// Core request
|
||||
.core_req_valid (per_bank_core_req_valid[bank_id]),
|
||||
.core_req_addr (per_bank_core_req_addr[bank_id]),
|
||||
.core_req_rw (per_bank_core_req_rw[bank_id]),
|
||||
.core_req_wsel (per_bank_core_req_wsel[bank_id]),
|
||||
.core_req_byteen (per_bank_core_req_byteen[bank_id]),
|
||||
.core_req_data (per_bank_core_req_data[bank_id]),
|
||||
.core_req_tag (per_bank_core_req_tag[bank_id]),
|
||||
.core_req_idx (per_bank_core_req_idx[bank_id]),
|
||||
.core_req_flush (per_bank_core_req_flush[bank_id]),
|
||||
.core_req_ready (per_bank_core_req_ready[bank_id]),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_bank_core_rsp_valid[bank_id]),
|
||||
.core_rsp_data (per_bank_core_rsp_data[bank_id]),
|
||||
.core_rsp_tag (per_bank_core_rsp_tag[bank_id]),
|
||||
.core_rsp_idx (per_bank_core_rsp_idx[bank_id]),
|
||||
.core_rsp_ready (per_bank_core_rsp_ready[bank_id]),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (per_bank_mem_req_valid[i]),
|
||||
.mem_req_valid (per_bank_mem_req_valid[bank_id]),
|
||||
.mem_req_addr (curr_bank_mem_req_addr),
|
||||
.mem_req_rw (per_bank_mem_req_rw[i]),
|
||||
.mem_req_wsel (per_bank_mem_req_wsel[i]),
|
||||
.mem_req_byteen (per_bank_mem_req_byteen[i]),
|
||||
.mem_req_data (per_bank_mem_req_data[i]),
|
||||
.mem_req_id (per_bank_mem_req_id[i]),
|
||||
.mem_req_ready (per_bank_mem_req_ready[i]),
|
||||
.mem_req_rw (per_bank_mem_req_rw[bank_id]),
|
||||
.mem_req_byteen (per_bank_mem_req_byteen[bank_id]),
|
||||
.mem_req_data (per_bank_mem_req_data[bank_id]),
|
||||
.mem_req_tag (per_bank_mem_req_tag[bank_id]),
|
||||
.mem_req_flush (per_bank_mem_req_flush[bank_id]),
|
||||
.mem_req_ready (per_bank_mem_req_ready[bank_id]),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (curr_bank_mem_rsp_valid),
|
||||
.mem_rsp_data (mem_rsp_data_s),
|
||||
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
|
||||
.mem_rsp_ready (per_bank_mem_rsp_ready[i]),
|
||||
.mem_rsp_tag (bank_mem_rsp_tag),
|
||||
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
|
||||
|
||||
// initialization
|
||||
.init_enable (init_enable),
|
||||
.init_line_sel (init_line_sel)
|
||||
// Flush request
|
||||
.flush_begin (per_bank_flush_begin[bank_id]),
|
||||
.flush_uuid (flush_uuid),
|
||||
.flush_end (per_bank_flush_end[bank_id])
|
||||
);
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr;
|
||||
end else begin
|
||||
assign per_bank_mem_req_addr[i] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, i);
|
||||
if (NUM_BANKS == 1) begin : g_per_bank_mem_req_addr_multibanks
|
||||
assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr;
|
||||
end else begin : g_per_bank_mem_req_addr_singlebank
|
||||
assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Bank responses gather
|
||||
|
||||
wire [NUM_BANKS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_in;
|
||||
wire [NUM_REQS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_core_rsp_data_in
|
||||
assign core_rsp_data_in[i] = {per_bank_core_rsp_data[i], per_bank_core_rsp_tag[i]};
|
||||
end
|
||||
|
||||
`RESET_RELAY (rsp_xbar_reset, reset);
|
||||
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.NUM_OUTPUTS (NUM_REQS),
|
||||
.DATAW (CORE_RSP_DATAW)
|
||||
.DATAW (CORE_RSP_DATAW),
|
||||
.ARBITER ("R")
|
||||
) rsp_xbar (
|
||||
.clk (clk),
|
||||
.reset (rsp_xbar_reset),
|
||||
.reset (reset),
|
||||
`UNUSED_PIN (collisions),
|
||||
.valid_in (per_bank_core_rsp_valid),
|
||||
.data_in (core_rsp_data_in),
|
||||
|
@ -433,38 +481,30 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_data_s
|
||||
assign {core_rsp_data_s[i], core_rsp_tag_s[i]} = core_rsp_data_out[i];
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire mem_req_valid_p;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p;
|
||||
wire mem_req_rw_p;
|
||||
wire [WORD_SEL_WIDTH-1:0] mem_req_wsel_p;
|
||||
wire [WORD_SIZE-1:0] mem_req_byteen_p;
|
||||
wire [`CS_WORD_WIDTH-1:0] mem_req_data_p;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_p;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id_p;
|
||||
wire mem_req_ready_p;
|
||||
|
||||
// Memory request arbitration
|
||||
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH)-1:0] data_in;
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign data_in[i] = {per_bank_mem_req_addr[i],
|
||||
per_bank_mem_req_rw[i],
|
||||
per_bank_mem_req_wsel[i],
|
||||
per_bank_mem_req_byteen[i],
|
||||
per_bank_mem_req_data[i],
|
||||
per_bank_mem_req_id[i]};
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_in
|
||||
assign data_in[i] = {
|
||||
per_bank_mem_req_addr[i],
|
||||
per_bank_mem_req_rw[i],
|
||||
per_bank_mem_req_byteen[i],
|
||||
per_bank_mem_req_data[i],
|
||||
per_bank_mem_req_tag[i],
|
||||
per_bank_mem_req_flush[i]
|
||||
};
|
||||
end
|
||||
|
||||
wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_req_tag;
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1),
|
||||
.ARBITER ("R")
|
||||
) mem_req_arb (
|
||||
.clk (clk),
|
||||
|
@ -472,65 +512,27 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.valid_in (per_bank_mem_req_valid),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.data_in (data_in),
|
||||
.data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_wsel_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p}),
|
||||
.valid_out (mem_req_valid_p),
|
||||
.ready_out (mem_req_ready_p),
|
||||
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flush}),
|
||||
.valid_out (mem_req_valid),
|
||||
.ready_out (mem_req_ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p);
|
||||
assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p});
|
||||
end else begin
|
||||
assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p);
|
||||
end
|
||||
|
||||
// Memory request multi-port handling
|
||||
|
||||
assign mem_req_valid_s = mem_req_valid_p;
|
||||
assign mem_req_addr_s = mem_req_addr_p;
|
||||
assign mem_req_tag_s = mem_req_tag_p;
|
||||
assign mem_req_ready_p = mem_req_ready_s;
|
||||
|
||||
if (WRITE_ENABLE != 0) begin
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
reg [LINE_SIZE-1:0] mem_req_byteen_r;
|
||||
reg [`CS_LINE_WIDTH-1:0] mem_req_data_r;
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen_r = '0;
|
||||
mem_req_data_r = 'x;
|
||||
mem_req_byteen_r[mem_req_wsel_p * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p;
|
||||
mem_req_data_r[mem_req_wsel_p * `CS_WORD_WIDTH +: `CS_WORD_WIDTH] = mem_req_data_p;
|
||||
end
|
||||
assign mem_req_rw_s = mem_req_rw_p;
|
||||
assign mem_req_byteen_s = mem_req_byteen_r;
|
||||
assign mem_req_data_s = mem_req_data_r;
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_wsel_p)
|
||||
assign mem_req_rw_s = mem_req_rw_p;
|
||||
assign mem_req_byteen_s = mem_req_byteen_p;
|
||||
assign mem_req_data_s = mem_req_data_p;
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_byteen_p)
|
||||
`UNUSED_VAR (mem_req_wsel_p)
|
||||
`UNUSED_VAR (mem_req_data_p)
|
||||
`UNUSED_VAR (mem_req_rw_p)
|
||||
|
||||
assign mem_req_rw_s = 0;
|
||||
assign mem_req_byteen_s = {LINE_SIZE{1'b1}};
|
||||
assign mem_req_data_s = '0;
|
||||
if (NUM_BANKS > 1) begin : g_mem_req_tag_multibanks
|
||||
wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr);
|
||||
assign mem_req_tag = MEM_TAG_WIDTH'({bank_mem_req_tag, mem_req_bank_id});
|
||||
end else begin : g_mem_req_tag
|
||||
assign mem_req_tag = MEM_TAG_WIDTH'(bank_mem_req_tag);
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// per cycle: core_reads, core_writes
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
||||
|
||||
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_req;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_req;
|
||||
|
||||
|
||||
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
|
||||
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
|
||||
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
|
||||
|
@ -539,16 +541,16 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
`BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw);
|
||||
`BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw);
|
||||
|
||||
|
||||
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req);
|
||||
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req);
|
||||
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
|
||||
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
|
||||
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
|
||||
|
||||
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_req;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign perf_crsp_stall_per_req[i] = core_bus_if[i].rsp_valid && ~core_bus_if[i].rsp_ready;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_perf_crsp_stall_per_req
|
||||
assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req);
|
||||
|
@ -561,7 +563,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
reg [`PERF_CTR_BITS-1:0] perf_write_misses;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
|
503
hw/rtl/cache/VX_cache_bank.sv
vendored
503
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -41,19 +41,26 @@ module VX_cache_bank #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// Core response output buffer
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
// Core response output register
|
||||
parameter CORE_OUT_REG = 0,
|
||||
|
||||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0,
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_REG = 0,
|
||||
|
||||
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE),
|
||||
parameter MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH,
|
||||
parameter REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS),
|
||||
parameter WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS)
|
||||
) (
|
||||
|
@ -69,12 +76,13 @@ module VX_cache_bank #(
|
|||
// Core Request
|
||||
input wire core_req_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire core_req_rw,
|
||||
input wire [WORD_SEL_WIDTH-1:0] core_req_wsel,
|
||||
input wire [WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [`CS_WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag,
|
||||
input wire [REQ_SEL_WIDTH-1:0] core_req_idx,
|
||||
input wire core_req_rw, // write enable
|
||||
input wire [WORD_SEL_WIDTH-1:0] core_req_wsel, // select the word in a cacheline, e.g. word size = 4 bytes, cacheline size = 64 bytes, it should have log(64/4)= 4 bits
|
||||
input wire [WORD_SIZE-1:0] core_req_byteen,// which bytes in data to write
|
||||
input wire [`CS_WORD_WIDTH-1:0] core_req_data, // data to be written
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag, // identifier of the request (request id)
|
||||
input wire [REQ_SEL_WIDTH-1:0] core_req_idx, // index of the request in the core request array
|
||||
input wire core_req_flush, // flush enable
|
||||
output wire core_req_ready,
|
||||
|
||||
// Core Response
|
||||
|
@ -88,21 +96,22 @@ module VX_cache_bank #(
|
|||
output wire mem_req_valid,
|
||||
output wire [`CS_LINE_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire mem_req_rw,
|
||||
output wire [WORD_SEL_WIDTH-1:0] mem_req_wsel,
|
||||
output wire [WORD_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_WORD_WIDTH-1:0] mem_req_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
|
||||
output wire [LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire mem_req_flush,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// initialization
|
||||
input wire init_enable,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] init_line_sel
|
||||
// flush
|
||||
input wire flush_begin,
|
||||
input wire [`UP(UUID_WIDTH)-1:0] flush_uuid,
|
||||
output wire flush_end
|
||||
);
|
||||
|
||||
localparam PIPELINE_STAGES = 2;
|
||||
|
@ -113,6 +122,7 @@ module VX_cache_bank #(
|
|||
|
||||
wire crsp_queue_stall;
|
||||
wire mshr_alm_full;
|
||||
wire mreq_queue_empty;
|
||||
wire mreq_queue_alm_full;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
|
||||
|
@ -128,173 +138,269 @@ module VX_cache_bank #(
|
|||
wire [MSHR_ADDR_WIDTH-1:0] replay_id;
|
||||
wire replay_ready;
|
||||
|
||||
wire is_init_st0, is_init_st1;
|
||||
wire is_flush_st0, is_flush_st1;
|
||||
wire [NUM_WAYS-1:0] flush_way_st0;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
|
||||
wire rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] wsel_st0, wsel_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_st0, byteen_st1;
|
||||
wire [REQ_SEL_WIDTH-1:0] req_idx_st0, req_idx_st1;
|
||||
wire [TAG_WIDTH-1:0] tag_st0, tag_st1;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel_st0, line_sel_st1;
|
||||
wire rw_sel, rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
|
||||
wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1;
|
||||
wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1;
|
||||
wire [`CS_WORD_WIDTH-1:0] read_data_st1;
|
||||
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1;
|
||||
wire valid_sel, valid_st0, valid_st1;
|
||||
wire is_init_st0;
|
||||
wire is_creq_st0, is_creq_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire is_replay_st0, is_replay_st1;
|
||||
wire creq_flush_sel, creq_flush_st0, creq_flush_st1;
|
||||
wire evict_dirty_st0, evict_dirty_st1;
|
||||
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1;
|
||||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
wire mshr_empty;
|
||||
|
||||
wire rdw_hazard_st0;
|
||||
reg rdw_hazard_st1;
|
||||
wire flush_valid;
|
||||
wire init_valid;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] flush_sel;
|
||||
wire [NUM_WAYS-1:0] flush_way;
|
||||
wire flush_ready;
|
||||
|
||||
wire pipe_stall = crsp_queue_stall || rdw_hazard_st1;
|
||||
// ensure we have no pending memory request in the bank
|
||||
wire no_pending_req = ~valid_st0 && ~valid_st1 && mreq_queue_empty;
|
||||
|
||||
// flush unit
|
||||
VX_bank_flush #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WRITEBACK (WRITEBACK)
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.flush_begin (flush_begin),
|
||||
.flush_end (flush_end),
|
||||
.flush_init (init_valid),
|
||||
.flush_valid (flush_valid),
|
||||
.flush_line (flush_sel),
|
||||
.flush_way (flush_way),
|
||||
.flush_ready (flush_ready),
|
||||
.mshr_empty (mshr_empty),
|
||||
.bank_empty (no_pending_req)
|
||||
);
|
||||
|
||||
wire rdw_hazard1_sel;
|
||||
wire rdw_hazard2_sel;
|
||||
reg rdw_hazard3_st1;
|
||||
|
||||
wire pipe_stall = crsp_queue_stall || rdw_hazard3_st1;
|
||||
|
||||
// inputs arbitration:
|
||||
// mshr replay has highest priority to maximize utilization since there is no miss.
|
||||
// handle memory responses next to prevent deadlock with potential memory request from a miss.
|
||||
wire replay_grant = ~init_enable;
|
||||
// flush has precedence over core requests to ensure that the cache is in a consistent state.
|
||||
wire replay_grant = ~init_valid;
|
||||
wire replay_enable = replay_grant && replay_valid;
|
||||
|
||||
wire fill_grant = ~init_enable && ~replay_enable;
|
||||
wire fill_grant = ~init_valid && ~replay_enable;
|
||||
wire fill_enable = fill_grant && mem_rsp_valid;
|
||||
|
||||
wire creq_grant = ~init_enable && ~replay_enable && ~fill_enable;
|
||||
wire flush_grant = ~init_valid && ~replay_enable && ~fill_enable;
|
||||
wire flush_enable = flush_grant && flush_valid;
|
||||
|
||||
wire creq_grant = ~init_valid && ~replay_enable && ~fill_enable && ~flush_enable;
|
||||
wire creq_enable = creq_grant && core_req_valid;
|
||||
|
||||
assign replay_ready = replay_grant
|
||||
&& ~rdw_hazard_st0
|
||||
&& ~pipe_stall;
|
||||
&& ~rdw_hazard1_sel
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign mem_rsp_ready = fill_grant
|
||||
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||
&& ~rdw_hazard2_sel
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign flush_ready = flush_grant
|
||||
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||
&& ~rdw_hazard2_sel
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign core_req_ready = creq_grant
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~mshr_alm_full
|
||||
&& ~pipe_stall;
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~mshr_alm_full
|
||||
&& ~pipe_stall;
|
||||
|
||||
wire init_fire = init_enable;
|
||||
wire init_fire = init_valid;
|
||||
wire replay_fire = replay_valid && replay_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
wire flush_fire = flush_valid && flush_ready;
|
||||
wire core_req_fire = core_req_valid && core_req_ready;
|
||||
|
||||
wire [TAG_WIDTH-1:0] mshr_creq_tag = replay_enable ? replay_tag : core_req_tag;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id = mem_rsp_tag[MSHR_ADDR_WIDTH-1:0];
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign req_uuid_sel = mshr_creq_tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign req_uuid_sel = 0;
|
||||
wire [TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||
if (TAG_WIDTH > MEM_TAG_WIDTH) begin : g_mem_rsp_tag_s_pad
|
||||
assign mem_rsp_tag_s = {mem_rsp_tag, (TAG_WIDTH-MEM_TAG_WIDTH)'(1'b0)};
|
||||
end else begin : g_mem_rsp_tag_s_cut
|
||||
assign mem_rsp_tag_s = mem_rsp_tag[MEM_TAG_WIDTH-1 -: TAG_WIDTH];
|
||||
`UNUSED_VAR (mem_rsp_tag)
|
||||
end
|
||||
|
||||
`UNUSED_VAR (mshr_creq_tag)
|
||||
wire [TAG_WIDTH-1:0] flush_tag;
|
||||
if (UUID_WIDTH != 0) begin : g_flush_tag_uuid
|
||||
assign flush_tag = {flush_uuid, (TAG_WIDTH-UUID_WIDTH)'(1'b0)};
|
||||
end else begin : g_flush_tag_0
|
||||
`UNUSED_VAR (flush_uuid)
|
||||
assign flush_tag = '0;
|
||||
end
|
||||
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || core_req_fire;
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
|
||||
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
|
||||
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
|
||||
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
|
||||
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
|
||||
assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) :
|
||||
(replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag));
|
||||
assign creq_flush_sel = core_req_valid && core_req_flush;
|
||||
|
||||
assign addr_sel = init_enable ? `CS_LINE_ADDR_WIDTH'(init_line_sel) :
|
||||
(replay_valid ? replay_addr :
|
||||
(mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
|
||||
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
|
||||
assign data_sel[`CS_WORD_WIDTH-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : (replay_valid ? replay_data : core_req_data);
|
||||
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin
|
||||
assign data_sel[i] = mem_rsp_data[i];
|
||||
if (WRITE_ENABLE) begin : g_data_sel
|
||||
for (genvar i = 0; i < `CS_LINE_WIDTH; ++i) begin : g_i
|
||||
if (i < `CS_WORD_WIDTH) begin : g_lo
|
||||
assign data_sel[i] = replay_valid ? replay_data[i] : (mem_rsp_valid ? mem_rsp_data[i] : core_req_data[i]);
|
||||
end else begin : g_hi
|
||||
assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel
|
||||
end
|
||||
end
|
||||
end else begin : g_data_sel_ro
|
||||
assign data_sel = mem_rsp_data;
|
||||
`UNUSED_VAR (core_req_data)
|
||||
`UNUSED_VAR (replay_data)
|
||||
end
|
||||
|
||||
if (UUID_WIDTH != 0) begin : g_req_uuid_sel
|
||||
assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin : g_req_uuid_sel_0
|
||||
assign req_uuid_sel = '0;
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({
|
||||
valid_sel,
|
||||
init_enable,
|
||||
replay_enable,
|
||||
fill_enable,
|
||||
creq_enable,
|
||||
addr_sel,
|
||||
data_sel,
|
||||
replay_valid ? replay_rw : core_req_rw,
|
||||
replay_valid ? replay_byteen : core_req_byteen,
|
||||
replay_valid ? replay_wsel : core_req_wsel,
|
||||
replay_valid ? replay_idx : core_req_idx,
|
||||
replay_valid ? replay_tag : core_req_tag,
|
||||
replay_id
|
||||
}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_creq_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
if (UUID_WIDTH != 0) begin : g_req_uuid_st0
|
||||
assign req_uuid_st0 = tag_st0[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign req_uuid_st0 = 0;
|
||||
end else begin : g_req_uuid_st0_0
|
||||
assign req_uuid_st0 = '0;
|
||||
end
|
||||
|
||||
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_init_st0 = valid_st0 && is_init_st0;
|
||||
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_init_st0);
|
||||
wire do_flush_st0 = valid_st0 && is_flush_st0;
|
||||
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
|
||||
wire do_creq_wr_st0 = valid_st0 && is_creq_st0 && rw_st0;
|
||||
wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0;
|
||||
wire do_replay_wr_st0 = valid_st0 && is_replay_st0 && rw_st0;
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0;
|
||||
wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0;
|
||||
wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0;
|
||||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
||||
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0, tag_matches_st1;
|
||||
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
||||
assign line_sel_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
`RESET_RELAY (tag_reset, reset);
|
||||
wire [NUM_WAYS-1:0] evict_way_st0;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0;
|
||||
|
||||
VX_cache_tags #(
|
||||
.INSTANCE_ID(INSTANCE_ID),
|
||||
.INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
) cache_tags (
|
||||
.clk (clk),
|
||||
.reset (tag_reset),
|
||||
.reset (reset),
|
||||
|
||||
.req_uuid (req_uuid_st0),
|
||||
|
||||
.stall (pipe_stall),
|
||||
|
||||
// read/Fill
|
||||
// init/flush/fill/write/lookup
|
||||
.init (do_init_st0),
|
||||
.flush (do_flush_st0),
|
||||
.fill (do_fill_st0),
|
||||
.write (do_cache_wr_st0),
|
||||
.lookup (do_lookup_st0),
|
||||
.line_addr (addr_st0),
|
||||
.fill (do_fill_st0),
|
||||
.init (do_init_st0),
|
||||
.way_sel (way_sel_st0),
|
||||
.tag_matches(tag_matches_st0)
|
||||
.way_sel (flush_way_st0),
|
||||
.tag_matches(tag_matches_st0),
|
||||
|
||||
// replacement
|
||||
.evict_dirty(evict_dirty_st0),
|
||||
.evict_way (evict_way_st0),
|
||||
.evict_tag (evict_tag_st0)
|
||||
);
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr2_st0;
|
||||
|
||||
wire is_flush2_st0 = WRITEBACK && is_flush_st0;
|
||||
|
||||
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
|
||||
|
||||
assign way_sel_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
|
||||
|
||||
assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + NUM_WAYS + 1),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_st0, is_replay_st0, is_fill_st0, is_creq_st0, rw_st0, addr_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, tag_matches_st0, way_sel_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_replay_st1, is_fill_st1, is_creq_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, tag_matches_st1, way_sel_st1, mshr_pending_st1})
|
||||
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, creq_flush_st0, rw_st0, addr2_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, evict_dirty_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, evict_dirty_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
// we have a tag hit
|
||||
wire is_hit_st1 = (| tag_matches_st1);
|
||||
wire is_hit_st1 = (| way_sel_st1);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
if (UUID_WIDTH != 0) begin : g_req_uuid_st1
|
||||
assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign req_uuid_st1 = 0;
|
||||
end else begin : g_req_uuid_st1_0
|
||||
assign req_uuid_st1 = '0;
|
||||
end
|
||||
|
||||
wire do_creq_rd_st1 = valid_st1 && is_creq_st1 && ~rw_st1;
|
||||
wire do_creq_wr_st1 = valid_st1 && is_creq_st1 && rw_st1;
|
||||
wire is_read_st1 = is_creq_st1 && ~rw_st1;
|
||||
wire is_write_st1 = is_creq_st1 && rw_st1;
|
||||
|
||||
wire do_init_st1 = valid_st1 && is_init_st1;
|
||||
wire do_fill_st1 = valid_st1 && is_fill_st1;
|
||||
wire do_flush_st1 = valid_st1 && is_flush_st1;
|
||||
|
||||
wire do_creq_rd_st1 = valid_st1 && is_read_st1;
|
||||
wire do_creq_wr_st1 = valid_st1 && is_write_st1;
|
||||
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
|
||||
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
|
||||
|
||||
|
@ -304,25 +410,46 @@ module VX_cache_bank #(
|
|||
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
|
||||
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
|
||||
|
||||
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
|
||||
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
|
||||
|
||||
assign line_sel_st1 = addr_st1[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
`UNUSED_VAR (do_write_miss_st1)
|
||||
|
||||
// ensure mshr replay always get a hit
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: invalid mshr replay"));
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time))
|
||||
|
||||
// detect BRAM's read-during-write hazard
|
||||
assign rdw_hazard_st0 = do_fill_st0; // after a fill
|
||||
// both tag and data stores use BRAM with no read-during-write protection.
|
||||
// we ned to stall the pipeline to prevent read-after-write hazards.
|
||||
assign rdw_hazard1_sel = do_fill_st0; // stall first replay following a fill
|
||||
assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write
|
||||
always @(posedge clk) begin
|
||||
rdw_hazard_st1 <= (do_creq_rd_st0 && do_write_hit_st1 && (addr_st0 == addr_st1))
|
||||
&& ~rdw_hazard_st1; // after a write to same address
|
||||
// stall reads following writes to same line address
|
||||
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_sel_st0 == line_sel_st1)
|
||||
&& ~rdw_hazard3_st1; // release pipeline stall
|
||||
end
|
||||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0];
|
||||
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
|
||||
wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1;
|
||||
wire [LINE_SIZE-1:0] write_byteen_st1;
|
||||
|
||||
`RESET_RELAY (data_reset, reset);
|
||||
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
|
||||
wire [LINE_SIZE-1:0] dirty_byteen_st1;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin : g_write_byteen_st1_wsel
|
||||
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen_w;
|
||||
always @(*) begin
|
||||
write_byteen_w = '0;
|
||||
write_byteen_w[wsel_st1] = byteen_st1;
|
||||
end
|
||||
assign write_byteen_st1 = write_byteen_w;
|
||||
end else begin : g_write_byteen_st1
|
||||
assign write_byteen_st1 = byteen_st1;
|
||||
end
|
||||
|
||||
VX_cache_data #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
|
@ -330,32 +457,49 @@ module VX_cache_bank #(
|
|||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
) cache_data (
|
||||
.clk (clk),
|
||||
.reset (data_reset),
|
||||
.reset (reset),
|
||||
|
||||
.req_uuid (req_uuid_st1),
|
||||
|
||||
.stall (pipe_stall),
|
||||
|
||||
.read (do_read_hit_st1 || do_replay_rd_st1),
|
||||
.init (do_init_st1),
|
||||
.read (do_cache_rd_st1),
|
||||
.fill (do_fill_st1),
|
||||
.write (do_write_hit_st1 || do_replay_wr_st1),
|
||||
.way_sel (way_sel_st1 | tag_matches_st1),
|
||||
.flush (do_flush_st1),
|
||||
.write (do_cache_wr_st1),
|
||||
.way_sel (way_sel_st1),
|
||||
.line_addr (addr_st1),
|
||||
.wsel (wsel_st1),
|
||||
.byteen (byteen_st1),
|
||||
.fill_data (fill_data_st1),
|
||||
.write_data (write_data_st1),
|
||||
.read_data (read_data_st1)
|
||||
.write_byteen(write_byteen_st1),
|
||||
.read_data (read_data_st1),
|
||||
.dirty_data (dirty_data_st1),
|
||||
.dirty_byteen(dirty_byteen_st1)
|
||||
);
|
||||
|
||||
wire [MSHR_SIZE-1:0] mshr_matches_st0;
|
||||
wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0;
|
||||
wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0;
|
||||
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall;
|
||||
wire mshr_lookup_st0 = mshr_allocate_st0;
|
||||
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall;
|
||||
wire mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
|
||||
|
||||
// release allocated mshr entry if we had a hit
|
||||
wire mshr_release_st1;
|
||||
if (WRITEBACK) begin : g_mshr_release_st1
|
||||
assign mshr_release_st1 = is_hit_st1;
|
||||
end else begin : g_mshr_release_st1_ro
|
||||
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
|
||||
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
|
||||
// this can happen when writes are sent late, when the fill was already in flight.
|
||||
assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
|
||||
end
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (MSHR_SIZE)
|
||||
|
@ -364,15 +508,15 @@ module VX_cache_bank #(
|
|||
.reset (reset),
|
||||
.incr (core_req_fire),
|
||||
.decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)),
|
||||
.empty (mshr_empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (mshr_alm_full),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
`RESET_RELAY (mshr_reset, reset);
|
||||
|
||||
VX_cache_mshr #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
|
@ -381,7 +525,7 @@ module VX_cache_bank #(
|
|||
.DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH)
|
||||
) cache_mshr (
|
||||
.clk (clk),
|
||||
.reset (mshr_reset),
|
||||
.reset (reset),
|
||||
|
||||
.deq_req_uuid (req_uuid_sel),
|
||||
.lkp_req_uuid (req_uuid_st0),
|
||||
|
@ -412,7 +556,8 @@ module VX_cache_bank #(
|
|||
// lookup
|
||||
.lookup_valid (mshr_lookup_st0),
|
||||
.lookup_addr (addr_st0),
|
||||
.lookup_matches (mshr_matches_st0),
|
||||
.lookup_pending (mshr_lookup_pending_st0),
|
||||
.lookup_rw (mshr_lookup_rw_st0),
|
||||
|
||||
// finalize
|
||||
.finalize_valid (mshr_finalize_st1),
|
||||
|
@ -422,10 +567,12 @@ module VX_cache_bank #(
|
|||
.finalize_prev (mshr_prev_st1)
|
||||
);
|
||||
|
||||
// ignore allocated id from mshr matches
|
||||
// check if there are pending requests to same line in the MSHR
|
||||
wire [MSHR_SIZE-1:0] lookup_matches;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
|
||||
assign lookup_matches[i] = (i != mshr_alloc_id_st0) && mshr_matches_st0[i];
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches
|
||||
assign lookup_matches[i] = mshr_lookup_pending_st0[i]
|
||||
&& (i != mshr_alloc_id_st0) // exclude current mshr id
|
||||
&& (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough
|
||||
end
|
||||
assign mshr_pending_st0 = (| lookup_matches);
|
||||
|
||||
|
@ -436,21 +583,19 @@ module VX_cache_bank #(
|
|||
wire [REQ_SEL_WIDTH-1:0] crsp_queue_idx;
|
||||
wire [TAG_WIDTH-1:0] crsp_queue_tag;
|
||||
|
||||
assign crsp_queue_valid = do_read_hit_st1 || do_replay_rd_st1;
|
||||
assign crsp_queue_valid = do_cache_rd_st1;
|
||||
assign crsp_queue_idx = req_idx_st1;
|
||||
assign crsp_queue_data = read_data_st1;
|
||||
assign crsp_queue_tag = tag_st1;
|
||||
|
||||
`RESET_RELAY (crsp_queue_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (TAG_WIDTH + `CS_WORD_WIDTH + REQ_SEL_WIDTH),
|
||||
.SIZE (CRSQ_SIZE),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
.OUT_REG (CORE_OUT_REG)
|
||||
) core_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (crsp_queue_reset),
|
||||
.valid_in (crsp_queue_valid && ~rdw_hazard_st1),
|
||||
.reset (reset),
|
||||
.valid_in (crsp_queue_valid && ~rdw_hazard3_st1),
|
||||
.ready_in (crsp_queue_ready),
|
||||
.data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}),
|
||||
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
|
||||
|
@ -462,40 +607,77 @@ module VX_cache_bank #(
|
|||
|
||||
// schedule memory request
|
||||
|
||||
wire mreq_queue_push, mreq_queue_pop, mreq_queue_empty;
|
||||
wire [`CS_WORD_WIDTH-1:0] mreq_queue_data;
|
||||
wire [WORD_SIZE-1:0] mreq_queue_byteen;
|
||||
wire [WORD_SEL_WIDTH-1:0] mreq_queue_wsel;
|
||||
wire mreq_queue_push, mreq_queue_pop;
|
||||
wire [`CS_LINE_WIDTH-1:0] mreq_queue_data;
|
||||
wire [LINE_SIZE-1:0] mreq_queue_byteen;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mreq_queue_id;
|
||||
wire [MEM_TAG_WIDTH-1:0] mreq_queue_tag;
|
||||
wire mreq_queue_rw;
|
||||
wire mreq_queue_flush;
|
||||
|
||||
assign mreq_queue_push = (do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1;
|
||||
wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1;
|
||||
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
|
||||
wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1;
|
||||
|
||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||
if (WRITEBACK) begin : g_mreq_queue_push
|
||||
if (DIRTY_BYTES) begin : g_dirty_bytes
|
||||
// ensure dirty bytes match the tag info
|
||||
wire has_dirty_bytes = (| dirty_byteen_st1);
|
||||
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)))
|
||||
end
|
||||
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|
||||
|| do_writeback_st1)
|
||||
&& ~rdw_hazard3_st1;
|
||||
end else begin : g_mreq_queue_push_ro
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1)
|
||||
&& ~rdw_hazard3_st1;
|
||||
end
|
||||
|
||||
assign mreq_queue_rw = WRITE_ENABLE && rw_st1;
|
||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||
assign mreq_queue_addr = addr_st1;
|
||||
assign mreq_queue_id = mshr_id_st1;
|
||||
assign mreq_queue_wsel = wsel_st1;
|
||||
assign mreq_queue_byteen = byteen_st1;
|
||||
assign mreq_queue_data = write_data_st1;
|
||||
assign mreq_queue_flush = creq_flush_st1;
|
||||
|
||||
`RESET_RELAY (mreq_queue_reset, reset);
|
||||
if (WRITE_ENABLE) begin : g_mreq_queue
|
||||
if (WRITEBACK) begin : g_writeback
|
||||
assign mreq_queue_rw = is_fill_or_flush_st1;
|
||||
assign mreq_queue_data = dirty_data_st1;
|
||||
assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1;
|
||||
end else begin : g_writethrough
|
||||
assign mreq_queue_rw = rw_st1;
|
||||
assign mreq_queue_data = write_data_st1;
|
||||
assign mreq_queue_byteen = rw_st1 ? write_byteen_st1 : '1;
|
||||
`UNUSED_VAR (is_fill_or_flush_st1)
|
||||
`UNUSED_VAR (dirty_data_st1)
|
||||
`UNUSED_VAR (dirty_byteen_st1)
|
||||
end
|
||||
end else begin : g_mreq_queue_ro
|
||||
assign mreq_queue_rw = 0;
|
||||
assign mreq_queue_data = '0;
|
||||
assign mreq_queue_byteen = '1;
|
||||
`UNUSED_VAR (dirty_data_st1)
|
||||
`UNUSED_VAR (dirty_byteen_st1)
|
||||
end
|
||||
|
||||
if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid
|
||||
assign mreq_queue_tag = {req_uuid_st1, mshr_id_st1};
|
||||
end else begin : g_mreq_queue_tag
|
||||
assign mreq_queue_tag = mshr_id_st1;
|
||||
end
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH),
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.DEPTH (MREQ_SIZE),
|
||||
.ALM_FULL (MREQ_SIZE-PIPELINE_STAGES),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
.OUT_REG (MEM_OUT_REG)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (mreq_queue_reset),
|
||||
.reset (reset),
|
||||
.push (mreq_queue_push),
|
||||
.pop (mreq_queue_pop),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_wsel, mreq_queue_data}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_wsel, mem_req_data}),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flush}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flush}),
|
||||
.empty (mreq_queue_empty),
|
||||
.alm_full (mreq_queue_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
|
@ -515,35 +697,36 @@ module VX_cache_bank #(
|
|||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
|
||||
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire);
|
||||
wire input_stall = (replay_valid || mem_rsp_valid || core_req_valid || flush_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
|
||||
always @(posedge clk) begin
|
||||
if (pipeline_stall) begin
|
||||
`TRACE(3, ("%d: *** %s-bank%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, BANK_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full));
|
||||
end
|
||||
if (init_enable) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d init: addr=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(init_line_sel, BANK_ID)));
|
||||
if (input_stall || pipe_stall) begin
|
||||
`TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1))
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
|
||||
`TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel))
|
||||
end
|
||||
if (replay_fire) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
|
||||
`TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel))
|
||||
end
|
||||
if (core_req_fire) begin
|
||||
if (core_req_rw)
|
||||
`TRACE(2, ("%d: %s-bank%0d core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
|
||||
else
|
||||
`TRACE(2, ("%d: %s-bank%0d core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
|
||||
if (core_req_rw) begin
|
||||
`TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel))
|
||||
end else begin
|
||||
`TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel))
|
||||
end
|
||||
end
|
||||
if (crsp_queue_fire) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
|
||||
`TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1))
|
||||
end
|
||||
if (mreq_queue_push) begin
|
||||
if (do_creq_wr_st1)
|
||||
`TRACE(2, ("%d: %s-bank%0d writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
|
||||
else
|
||||
`TRACE(2, ("%d: %s-bank%0d fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1));
|
||||
if (do_creq_wr_st1 && !WRITEBACK) begin
|
||||
`TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
|
||||
end else if (do_writeback_st1) begin
|
||||
`TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
|
||||
end else begin
|
||||
`TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1))
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
241
hw/rtl/cache/VX_cache_bypass.sv
vendored
241
hw/rtl/cache/VX_cache_bypass.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -18,16 +18,16 @@ module VX_cache_bypass #(
|
|||
parameter TAG_SEL_IDX = 0,
|
||||
|
||||
parameter PASSTHRU = 0,
|
||||
parameter NC_ENABLE = 0,
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
parameter WORD_SIZE = 1,
|
||||
parameter LINE_SIZE = 1,
|
||||
parameter LINE_SIZE = 1,
|
||||
|
||||
parameter CORE_ADDR_WIDTH = 1,
|
||||
|
||||
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
parameter MEM_ADDR_WIDTH = 1,
|
||||
|
||||
parameter MEM_ADDR_WIDTH = 1,
|
||||
parameter MEM_TAG_IN_WIDTH = 1,
|
||||
parameter MEM_TAG_OUT_WIDTH = 1,
|
||||
|
||||
|
@ -35,9 +35,9 @@ module VX_cache_bypass #(
|
|||
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
parameter MEM_OUT_BUF = 0,
|
||||
|
||||
|
||||
parameter CORE_DATA_WIDTH = WORD_SIZE * 8
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -56,7 +56,8 @@ module VX_cache_bypass #(
|
|||
localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1);
|
||||
|
||||
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
|
||||
localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `ADDR_TYPE_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH;
|
||||
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
|
||||
localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH;
|
||||
|
||||
localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE;
|
||||
localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE);
|
||||
|
@ -71,40 +72,39 @@ module VX_cache_bypass #(
|
|||
|
||||
wire core_req_nc_valid;
|
||||
wire [NUM_REQS-1:0] core_req_nc_valids;
|
||||
wire [NUM_REQS-1:0] core_req_nc_idxs;
|
||||
wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire [NUM_REQS-1:0] core_req_nc_idxs;
|
||||
wire [REQ_SEL_WIDTH-1:0] core_req_nc_idx;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire core_req_nc_ready;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (PASSTHRU != 0) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_nc
|
||||
if (PASSTHRU != 0) begin : g_passthru
|
||||
assign core_req_nc_idxs[i] = 1'b1;
|
||||
end else if (NC_ENABLE) begin
|
||||
assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO];
|
||||
end else begin
|
||||
end else if (NC_ENABLE) begin : g_nc
|
||||
assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_IO];
|
||||
end else begin : g_no_nc
|
||||
assign core_req_nc_idxs[i] = 1'b0;
|
||||
end
|
||||
end
|
||||
assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i];
|
||||
end
|
||||
end
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.TYPE (PASSTHRU ? "R" : "P"),
|
||||
.LOCK_ENABLE (1)
|
||||
.TYPE (PASSTHRU ? "R" : "P")
|
||||
) core_req_nc_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (core_req_nc_valids),
|
||||
.reset (reset),
|
||||
.requests (core_req_nc_valids),
|
||||
.grant_index (core_req_nc_idx),
|
||||
.grant_onehot (core_req_nc_sel),
|
||||
.grant_valid (core_req_nc_valid),
|
||||
.grant_unlock (core_req_nc_ready)
|
||||
.grant_ready (core_req_nc_ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_if
|
||||
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i];
|
||||
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
|
||||
assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i])
|
||||
assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i])
|
||||
: core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
|
@ -114,37 +114,37 @@ module VX_cache_bypass #(
|
|||
wire mem_req_out_rw;
|
||||
wire [LINE_SIZE-1:0] mem_req_out_byteen;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_out_addr;
|
||||
wire [`ADDR_TYPE_WIDTH-1:0] mem_req_out_atype;
|
||||
wire [`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_out_flags;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_out_data;
|
||||
wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag;
|
||||
wire mem_req_out_ready;
|
||||
|
||||
|
||||
wire core_req_nc_sel_rw;
|
||||
wire [WORD_SIZE-1:0] core_req_nc_sel_byteen;
|
||||
wire [CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr;
|
||||
wire [`ADDR_TYPE_WIDTH-1:0] core_req_nc_sel_atype;
|
||||
wire [`MEM_REQ_FLAGS_WIDTH-1:0] core_req_nc_sel_flags;
|
||||
wire [CORE_DATA_WIDTH-1:0] core_req_nc_sel_data;
|
||||
wire [CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag;
|
||||
|
||||
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_nc_mux_in
|
||||
assign core_req_nc_mux_in[i] = {
|
||||
core_bus_in_if[i].req_data.rw,
|
||||
core_bus_in_if[i].req_data.byteen,
|
||||
core_bus_in_if[i].req_data.rw,
|
||||
core_bus_in_if[i].req_data.addr,
|
||||
core_bus_in_if[i].req_data.atype,
|
||||
core_bus_in_if[i].req_data.data,
|
||||
core_bus_in_if[i].req_data.tag
|
||||
core_bus_in_if[i].req_data.byteen,
|
||||
core_bus_in_if[i].req_data.flags,
|
||||
core_bus_in_if[i].req_data.tag
|
||||
};
|
||||
end
|
||||
|
||||
|
||||
assign {
|
||||
core_req_nc_sel_rw,
|
||||
core_req_nc_sel_byteen,
|
||||
core_req_nc_sel_addr,
|
||||
core_req_nc_sel_atype,
|
||||
core_req_nc_sel_data,
|
||||
core_req_nc_sel_tag
|
||||
core_req_nc_sel_byteen,
|
||||
core_req_nc_sel_flags,
|
||||
core_req_nc_sel_tag
|
||||
} = core_req_nc_mux_in[core_req_nc_idx];
|
||||
|
||||
assign core_req_nc_ready = ~mem_bus_in_if.req_valid && mem_req_out_ready;
|
||||
|
@ -152,84 +152,82 @@ module VX_cache_bypass #(
|
|||
assign mem_req_out_valid = mem_bus_in_if.req_valid || core_req_nc_valid;
|
||||
assign mem_req_out_rw = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.rw : core_req_nc_sel_rw;
|
||||
assign mem_req_out_addr = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.addr : core_req_nc_sel_addr[WSEL_BITS +: MEM_ADDR_WIDTH];
|
||||
assign mem_req_out_atype = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.atype : core_req_nc_sel_atype;
|
||||
assign mem_req_out_flags = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.flags : core_req_nc_sel_flags;
|
||||
|
||||
wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass;
|
||||
|
||||
wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0];
|
||||
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r;
|
||||
reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
|
||||
|
||||
|
||||
if (WORDS_PER_LINE > 1) begin : g_mem_req_multi_word_line
|
||||
reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_w;
|
||||
reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_w;
|
||||
|
||||
wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0];
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = '0;
|
||||
mem_req_byteen_in_r[req_wsel] = core_req_nc_sel_byteen;
|
||||
mem_req_byteen_in_w = '0;
|
||||
mem_req_byteen_in_w[req_wsel] = core_req_nc_sel_byteen;
|
||||
|
||||
mem_req_data_in_r = 'x;
|
||||
mem_req_data_in_r[req_wsel] = core_req_nc_sel_data;
|
||||
mem_req_data_in_w = 'x;
|
||||
mem_req_data_in_w[req_wsel] = core_req_nc_sel_data;
|
||||
end
|
||||
|
||||
assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_r;
|
||||
assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r;
|
||||
if (NUM_REQS > 1) begin
|
||||
assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_w;
|
||||
assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_w;
|
||||
if (NUM_REQS > 1) begin : g_multiple_requests
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id});
|
||||
end else begin
|
||||
end else begin : g_single_request
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id});
|
||||
end
|
||||
end else begin
|
||||
end else begin : g_mem_req_single_word_line
|
||||
assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : core_req_nc_sel_byteen;
|
||||
assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : core_req_nc_sel_data;
|
||||
if (NUM_REQS > 1) begin
|
||||
if (NUM_REQS > 1) begin : g_multiple_requests
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id});
|
||||
end else begin
|
||||
end else begin : g_single_request
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_in_id});
|
||||
end
|
||||
end
|
||||
|
||||
wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass;
|
||||
wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
if (UUID_WIDTH != 0) begin : g_mem_req_tag_bypass_with_uuid
|
||||
assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass};
|
||||
end else begin
|
||||
end else begin : g_mem_req_tag_bypass
|
||||
assign mem_req_tag_bypass = mem_req_tag_id_bypass;
|
||||
end
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
if (PASSTHRU != 0) begin : g_mem_req_out_tag_passthru
|
||||
assign mem_req_out_tag = mem_req_tag_bypass;
|
||||
`UNUSED_VAR (mem_bus_in_if.req_data.tag)
|
||||
end else begin
|
||||
if (NC_ENABLE) begin
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_OUT_WIDTH-1),
|
||||
.S (1),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) mem_req_tag_in_nc_insert (
|
||||
.data_in (mem_bus_in_if.req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if.req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)),
|
||||
.ins_in (~mem_bus_in_if.req_valid),
|
||||
.data_out (mem_req_out_tag)
|
||||
);
|
||||
end else begin
|
||||
assign mem_req_out_tag = mem_bus_in_if.req_data.tag;
|
||||
end
|
||||
end
|
||||
end else if (NC_ENABLE) begin : g_mem_req_out_tag_nc
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_OUT_WIDTH-1),
|
||||
.S (1),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) mem_req_tag_in_nc_insert (
|
||||
.data_in (mem_bus_in_if.req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if.req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)),
|
||||
.ins_in (~mem_bus_in_if.req_valid),
|
||||
.data_out (mem_req_out_tag)
|
||||
);
|
||||
end else begin : g_mem_req_out_tag
|
||||
assign mem_req_out_tag = mem_bus_in_if.req_data.tag;
|
||||
end
|
||||
|
||||
assign mem_bus_in_if.req_ready = mem_req_out_ready;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH),
|
||||
.SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH),
|
||||
.SIZE (DIRECT_PASSTHRU ? 0 : `TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_out_valid),
|
||||
.ready_in (mem_req_out_ready),
|
||||
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}),
|
||||
.data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.atype, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}),
|
||||
.valid_out (mem_bus_out_if.req_valid),
|
||||
.valid_in (mem_req_out_valid),
|
||||
.ready_in (mem_req_out_ready),
|
||||
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_flags, mem_req_out_data, mem_req_out_tag}),
|
||||
.data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.flags, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}),
|
||||
.valid_out (mem_bus_out_if.req_valid),
|
||||
.ready_out (mem_bus_out_if.req_ready)
|
||||
);
|
||||
|
||||
|
@ -241,19 +239,17 @@ module VX_cache_bypass #(
|
|||
wire [NUM_REQS-1:0] core_rsp_in_ready;
|
||||
|
||||
wire is_mem_rsp_nc;
|
||||
if (PASSTHRU != 0) begin
|
||||
if (PASSTHRU != 0) begin : g_is_mem_rsp_nc_passthru
|
||||
assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid;
|
||||
end else begin
|
||||
if (NC_ENABLE) begin
|
||||
assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid && mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX];
|
||||
end else begin
|
||||
assign is_mem_rsp_nc = 1'b0;
|
||||
end
|
||||
end else if (NC_ENABLE) begin : g_is_mem_rsp_nc
|
||||
assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid && mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX];
|
||||
end else begin : g_is_no_mem_rsp_nc
|
||||
assign is_mem_rsp_nc = 1'b0;
|
||||
end
|
||||
|
||||
wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc;
|
||||
|
||||
VX_bits_remove #(
|
||||
VX_bits_remove #(
|
||||
.N (MEM_TAG_OUT_WIDTH),
|
||||
.S (NC_ENABLE),
|
||||
.POS (TAG_SEL_IDX)
|
||||
|
@ -262,57 +258,52 @@ module VX_cache_bypass #(
|
|||
.data_out (mem_rsp_tag_id_nc)
|
||||
);
|
||||
|
||||
wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx;
|
||||
if (NUM_REQS > 1) begin
|
||||
wire [REQ_SEL_WIDTH-1:0] rsp_idx;
|
||||
if (NUM_REQS > 1) begin : g_rsp_idx
|
||||
assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS];
|
||||
end else begin
|
||||
end else begin : g_rsp_idx_0
|
||||
assign rsp_idx = 1'b0;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_valid
|
||||
assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i));
|
||||
end
|
||||
|
||||
reg [NUM_REQS-1:0] rsp_nc_valid_r;
|
||||
always @(*) begin
|
||||
rsp_nc_valid_r = '0;
|
||||
rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_ready
|
||||
assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i];
|
||||
assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i];
|
||||
end
|
||||
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ?
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_data
|
||||
if (WORDS_PER_LINE > 1) begin : g_wsel
|
||||
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
|
||||
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ?
|
||||
core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
end else begin : g_no_wsel
|
||||
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data;
|
||||
end
|
||||
end
|
||||
|
||||
wire [(CORE_TAG_ID_BITS + UUID_WIDTH)-1:0] mem_rsp_tag_in_nc2;
|
||||
if (UUID_WIDTH != 0) begin
|
||||
if (UUID_WIDTH != 0) begin : g_mem_rsp_tag_in_nc2_uuid
|
||||
assign mem_rsp_tag_in_nc2 = {mem_rsp_tag_id_nc[(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1 -: UUID_WIDTH], mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]};
|
||||
end else begin
|
||||
end else begin : g_mem_rsp_tag_in_nc2
|
||||
assign mem_rsp_tag_in_nc2 = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (PASSTHRU) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_tag
|
||||
if (PASSTHRU) begin : g_passthru
|
||||
assign core_rsp_in_tag[i] = mem_rsp_tag_in_nc2;
|
||||
end else if (NC_ENABLE) begin
|
||||
end else if (NC_ENABLE) begin : g_nc
|
||||
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2;
|
||||
end else begin
|
||||
end else begin : g_no_nc
|
||||
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_buf
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH),
|
||||
.SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
|
||||
.SIZE (DIRECT_PASSTHRU ? 0 : `TO_OUT_BUF_SIZE(CORE_OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
) core_rsp_buf (
|
||||
.clk (clk),
|
||||
|
@ -320,7 +311,7 @@ module VX_cache_bypass #(
|
|||
.valid_in (core_rsp_in_valid[i]),
|
||||
.ready_in (core_rsp_in_ready[i]),
|
||||
.data_in ({core_rsp_in_data[i], core_rsp_in_tag[i]}),
|
||||
.data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}),
|
||||
.data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus_in_if[i].rsp_valid),
|
||||
.ready_out (core_bus_in_if[i].rsp_ready)
|
||||
);
|
||||
|
@ -328,22 +319,22 @@ module VX_cache_bypass #(
|
|||
|
||||
// handle memory responses ////////////////////////////////////////////////
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
if (PASSTHRU != 0) begin : g_mem_bus_in_if_passthru
|
||||
assign mem_bus_in_if.rsp_valid = 1'b0;
|
||||
assign mem_bus_in_if.rsp_data.data = '0;
|
||||
assign mem_bus_in_if.rsp_data.tag = '0;
|
||||
end else if (NC_ENABLE) begin
|
||||
end else if (NC_ENABLE) begin : g_mem_bus_in_if_nc
|
||||
assign mem_bus_in_if.rsp_valid = mem_bus_out_if.rsp_valid && ~mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX];
|
||||
assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data;
|
||||
assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc[MEM_TAG_IN_WIDTH-1:0];
|
||||
end else begin
|
||||
end else begin : g_mem_bus_in_if
|
||||
assign mem_bus_in_if.rsp_valid = mem_bus_out_if.rsp_valid;
|
||||
assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data;
|
||||
assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc;
|
||||
end
|
||||
|
||||
|
||||
wire [NUM_REQS-1:0] core_rsp_out_valid;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_out_valid
|
||||
assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid;
|
||||
end
|
||||
|
||||
|
|
64
hw/rtl/cache/VX_cache_cluster.sv
vendored
64
hw/rtl/cache/VX_cache_cluster.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -24,20 +24,20 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 16384,
|
||||
parameter CACHE_SIZE = 16384,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 4,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
|
@ -46,6 +46,12 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -60,7 +66,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -74,17 +80,16 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
);
|
||||
localparam NUM_CACHES = `UP(NUM_UNITS);
|
||||
localparam PASSTHRU = (NUM_UNITS == 0);
|
||||
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH, UUID_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH));
|
||||
|
||||
`STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter"))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
cache_perf_t perf_cache_tmp[1], perf_cache_unit[NUM_CACHES];
|
||||
`PERF_CACHE_ADD (perf_cache_tmp, perf_cache_unit, 1, NUM_CACHES)
|
||||
assign cache_perf = perf_cache_tmp[0];
|
||||
cache_perf_t perf_cache_unit[NUM_CACHES];
|
||||
`PERF_CACHE_ADD (cache_perf, perf_cache_unit, NUM_CACHES)
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if #(
|
||||
|
@ -97,9 +102,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (ARB_TAG_WIDTH)
|
||||
) arb_core_bus_if[NUM_CACHES * NUM_REQS]();
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_arb
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
|
@ -110,7 +113,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (ARB_TAG_WIDTH)
|
||||
) arb_core_bus_tmp_if[NUM_CACHES]();
|
||||
|
||||
for (genvar j = 0; j < NUM_INPUTS; ++j) begin
|
||||
for (genvar j = 0; j < NUM_INPUTS; ++j) begin : g_core_bus_tmp_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
|
||||
end
|
||||
|
||||
|
@ -122,23 +125,20 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
.ARBITER ("R"),
|
||||
.REQ_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0),
|
||||
.RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0)
|
||||
) cache_arb (
|
||||
.RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? CORE_OUT_BUF : 0)
|
||||
) core_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.reset (reset),
|
||||
.bus_in_if (core_bus_tmp_if),
|
||||
.bus_out_if (arb_core_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar k = 0; k < NUM_CACHES; ++k) begin
|
||||
for (genvar k = 0; k < NUM_CACHES; ++k) begin : g_arb_core_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (arb_core_bus_if[k * NUM_REQS + i], arb_core_bus_tmp_if[k]);
|
||||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (cache_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < NUM_CACHES; ++i) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_CACHES; ++i) begin : g_cache_wrap
|
||||
VX_cache_wrap #(
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
|
@ -152,6 +152,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (ARB_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
|
@ -164,7 +166,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.cache_perf (perf_cache_unit[i]),
|
||||
`endif
|
||||
.clk (clk),
|
||||
.reset (cache_reset),
|
||||
.reset (reset),
|
||||
.core_bus_if (arb_core_bus_if[i * NUM_REQS +: NUM_REQS]),
|
||||
.mem_bus_if (cache_mem_bus_if[i])
|
||||
);
|
||||
|
@ -181,7 +183,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (MEM_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
.ARBITER ("R"),
|
||||
.REQ_OUT_BUF ((NUM_CACHES > 1) ? 2 : 0),
|
||||
.REQ_OUT_BUF ((NUM_CACHES > 1) ? MEM_OUT_BUF : 0),
|
||||
.RSP_OUT_BUF ((NUM_CACHES > 1) ? 2 : 0)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
|
@ -190,6 +192,10 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.bus_out_if (mem_bus_tmp_if)
|
||||
);
|
||||
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]);
|
||||
if (WRITE_ENABLE) begin : g_mem_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]);
|
||||
end else begin : g_mem_bus_if_ro
|
||||
`ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if[0]);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
185
hw/rtl/cache/VX_cache_data.sv
vendored
185
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,17 +17,21 @@ module VX_cache_data #(
|
|||
parameter `STRING INSTANCE_ID= "",
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0
|
||||
) (
|
||||
|
@ -40,62 +44,105 @@ module VX_cache_data #(
|
|||
|
||||
input wire stall,
|
||||
|
||||
input wire init,
|
||||
input wire read,
|
||||
input wire fill,
|
||||
input wire fill,
|
||||
input wire flush,
|
||||
input wire write,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel,
|
||||
input wire [WORD_SIZE-1:0] byteen,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
|
||||
input wire [`CS_WORD_WIDTH-1:0] write_data,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] write_data,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
|
||||
input wire [NUM_WAYS-1:0] way_sel,
|
||||
|
||||
output wire [`CS_WORD_WIDTH-1:0] read_data
|
||||
output wire [`CS_WORD_WIDTH-1:0] read_data,
|
||||
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
|
||||
output wire [LINE_SIZE-1:0] dirty_byteen
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_PARAM (WORD_SIZE)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (stall)
|
||||
`UNUSED_VAR (line_addr)
|
||||
`UNUSED_VAR (init)
|
||||
`UNUSED_VAR (read)
|
||||
`UNUSED_VAR (flush)
|
||||
|
||||
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
|
||||
wire [BYTEENW-1:0] wren;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
|
||||
reg [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata_r;
|
||||
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
|
||||
|
||||
always @(*) begin
|
||||
wdata_r = {`CS_WORDS_PER_LINE{write_data}};
|
||||
wren_r = '0;
|
||||
wren_r[wsel] = byteen;
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last
|
||||
// this allows performing onehot encoding of the way index in parallel with BRAM read.
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
assign wdata[i] = fill ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{wdata_r[i]}};
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign wren_w[i][j] = (fill ? {WORD_SIZE{1'b1}} : wren_r[i])
|
||||
& {WORD_SIZE{((NUM_WAYS == 1) || way_sel[j])}};
|
||||
end
|
||||
end
|
||||
assign wren = wren_w;
|
||||
end else begin
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (byteen)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign wdata = fill_data;
|
||||
assign wren = fill;
|
||||
end
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
|
||||
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
|
||||
|
||||
VX_onehot_encoder #(
|
||||
if (WRITEBACK) begin : g_dirty_data
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata;
|
||||
VX_transpose #(
|
||||
.DATAW (`CS_WORD_WIDTH),
|
||||
.N (`CS_WORDS_PER_LINE),
|
||||
.M (NUM_WAYS)
|
||||
) transpose (
|
||||
.data_in (line_rdata),
|
||||
.data_out (transposed_rdata)
|
||||
);
|
||||
assign dirty_data = transposed_rdata[way_idx];
|
||||
end else begin : g_dirty_data_0
|
||||
assign dirty_data = '0;
|
||||
end
|
||||
|
||||
if (DIRTY_BYTES) begin : g_dirty_byteen
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata;
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata;
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata
|
||||
wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]);
|
||||
assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]);
|
||||
end
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (LINE_SIZE * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK)
|
||||
) byteen_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (write || fill || flush),
|
||||
.write (init || write || fill || flush),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.wdata (bs_wdata),
|
||||
.rdata (bs_rdata)
|
||||
);
|
||||
|
||||
assign dirty_byteen = bs_rdata[way_idx];
|
||||
end else begin : g_dirty_byteen_0
|
||||
assign dirty_byteen = '1;
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last.
|
||||
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||
wire [BYTEENW-1:0] line_wren;
|
||||
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin : g_line_wdata
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_i
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j
|
||||
assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i];
|
||||
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
|
||||
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
|
||||
end
|
||||
end
|
||||
assign line_wren = wren_w;
|
||||
end else begin : g_line_wdata_ro
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (write_byteen)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign line_wdata = fill_data;
|
||||
assign line_wren = fill;
|
||||
end
|
||||
|
||||
VX_encoder #(
|
||||
.N (NUM_WAYS)
|
||||
) way_enc (
|
||||
.data_in (way_sel),
|
||||
|
@ -103,50 +150,52 @@ module VX_cache_data #(
|
|||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata;
|
||||
wire line_read = (read && ~stall)
|
||||
|| (WRITEBACK && (fill || flush));
|
||||
|
||||
wire line_write = write || fill;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (BYTEENW),
|
||||
.NO_RWCHECK (1)
|
||||
.NO_RWCHECK (1),
|
||||
.RW_ASSERT (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (write || fill),
|
||||
.wren (wren),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.write (line_write),
|
||||
.wren (line_wren),
|
||||
.addr (line_sel),
|
||||
.wdata (wdata),
|
||||
.rdata (rdata)
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
assign per_way_rdata = rdata[wsel];
|
||||
end else begin
|
||||
if (`CS_WORDS_PER_LINE > 1) begin : g_per_way_rdata_wsel
|
||||
assign per_way_rdata = line_rdata[wsel];
|
||||
end else begin : g_per_way_rdata
|
||||
`UNUSED_VAR (wsel)
|
||||
assign per_way_rdata = rdata;
|
||||
end
|
||||
|
||||
assign per_way_rdata = line_rdata;
|
||||
end
|
||||
assign read_data = per_way_rdata[way_idx];
|
||||
|
||||
`UNUSED_VAR (stall)
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
always @(posedge clk) begin
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d data-fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data))
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data))
|
||||
end
|
||||
if (read && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d data-read: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, read_data, req_uuid));
|
||||
end
|
||||
`TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid))
|
||||
end
|
||||
if (write && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d data-write: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, byteen, write_data, req_uuid));
|
||||
end
|
||||
end
|
||||
`TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
27
hw/rtl/cache/VX_cache_define.vh
vendored
27
hw/rtl/cache/VX_cache_define.vh
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,7 +14,7 @@
|
|||
`ifndef VX_CACHE_DEFINE_VH
|
||||
`define VX_CACHE_DEFINE_VH
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_define.vh"
|
||||
|
||||
`define CS_REQ_SEL_BITS `CLOG2(NUM_REQS)
|
||||
|
||||
|
@ -50,28 +50,27 @@
|
|||
`define CS_TAG_SEL_ADDR_START (1+`CS_LINE_SEL_ADDR_END)
|
||||
`define CS_TAG_SEL_ADDR_END (`CS_WORD_ADDR_WIDTH-1)
|
||||
|
||||
`define CS_LINE_TAG_ADDR(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS]
|
||||
`define CS_LINE_ADDR_TAG(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CS_LINE_TO_MEM_ADDR(x, i) {x, `CS_BANK_SEL_BITS'(i)}
|
||||
`define CS_MEM_ADDR_TO_BANK_ID(x) x[0 +: `CS_BANK_SEL_BITS]
|
||||
`define CS_MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0]
|
||||
`define CS_MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `CS_BANK_SEL_BITS]
|
||||
|
||||
`define CS_LINE_TO_FULL_ADDR(x, i) {x, (`XLEN-$bits(x))'(i << (`XLEN-$bits(x)-`CS_BANK_SEL_BITS))}
|
||||
`define CS_MEM_TO_FULL_ADDR(x) {x, (`XLEN-$bits(x))'(0)}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define PERF_CACHE_ADD(dst, src, dcount, scount) \
|
||||
`PERF_COUNTER_ADD (dst, src, reads, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, writes, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, read_misses, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, write_misses, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, bank_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, mshr_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1))
|
||||
`define PERF_CACHE_ADD(dst, src, count) \
|
||||
`PERF_COUNTER_ADD (dst, src, reads, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, writes, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, read_misses, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, write_misses, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, bank_stalls, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, mshr_stalls, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, count, (count > 1))
|
||||
|
||||
`endif // VX_CACHE_DEFINE_VH
|
||||
|
|
188
hw/rtl/cache/VX_cache_flush.sv
vendored
Normal file
188
hw/rtl/cache/VX_cache_flush.sv
vendored
Normal file
|
@ -0,0 +1,188 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_flush #(
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
// Bank select latency
|
||||
parameter BANK_SEL_LATENCY = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
|
||||
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
|
||||
input wire [NUM_BANKS-1:0] bank_req_fire,
|
||||
output wire [NUM_BANKS-1:0] flush_begin,
|
||||
output wire [`UP(UUID_WIDTH)-1:0] flush_uuid,
|
||||
input wire [NUM_BANKS-1:0] flush_end
|
||||
);
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_WAIT1 = 1;
|
||||
localparam STATE_FLUSH = 2;
|
||||
localparam STATE_WAIT2 = 3;
|
||||
localparam STATE_DONE = 4;
|
||||
|
||||
reg [2:0] state, state_n;
|
||||
|
||||
// track in-flight core requests
|
||||
|
||||
wire no_inflight_reqs;
|
||||
|
||||
if (BANK_SEL_LATENCY != 0) begin : g_bank_sel_latency
|
||||
|
||||
localparam NUM_REQS_W = `CLOG2(NUM_REQS+1);
|
||||
localparam NUM_BANKS_W = `CLOG2(NUM_BANKS+1);
|
||||
|
||||
wire [NUM_REQS-1:0] core_bus_out_fire;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_fire
|
||||
assign core_bus_out_fire[i] = core_bus_out_if[i].req_valid && core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
wire [NUM_REQS_W-1:0] core_bus_out_cnt;
|
||||
wire [NUM_BANKS_W-1:0] bank_req_cnt;
|
||||
|
||||
`POP_COUNT(core_bus_out_cnt, core_bus_out_fire);
|
||||
`POP_COUNT(bank_req_cnt, bank_req_fire);
|
||||
`UNUSED_VAR (core_bus_out_cnt)
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (BANK_SEL_LATENCY * NUM_BANKS),
|
||||
.INCRW (NUM_BANKS_W),
|
||||
.DECRW (NUM_BANKS_W)
|
||||
) pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (NUM_BANKS_W'(core_bus_out_cnt)),
|
||||
.decr (bank_req_cnt),
|
||||
.empty (no_inflight_reqs),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
end else begin : g_no_bank_sel_latency
|
||||
assign no_inflight_reqs = 0;
|
||||
`UNUSED_VAR (bank_req_fire)
|
||||
end
|
||||
|
||||
reg [NUM_BANKS-1:0] flush_done, flush_done_n;
|
||||
|
||||
wire [NUM_REQS-1:0] flush_req_mask;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_flush_req_mask
|
||||
assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH];
|
||||
end
|
||||
wire flush_req_enable = (| flush_req_mask);
|
||||
|
||||
reg [NUM_REQS-1:0] lock_released, lock_released_n;
|
||||
reg [`UP(UUID_WIDTH)-1:0] flush_uuid_r, flush_uuid_n;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_req
|
||||
wire input_enable = ~flush_req_enable || lock_released[i];
|
||||
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && input_enable;
|
||||
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
|
||||
assign core_bus_in_if[i].req_ready = core_bus_out_if[i].req_ready && input_enable;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_in_rsp
|
||||
assign core_bus_in_if[i].rsp_valid = core_bus_out_if[i].rsp_valid;
|
||||
assign core_bus_in_if[i].rsp_data = core_bus_out_if[i].rsp_data;
|
||||
assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
reg [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] core_bus_out_uuid;
|
||||
wire [NUM_REQS-1:0] core_bus_out_ready;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_uuid
|
||||
if (UUID_WIDTH != 0) begin : g_uuid
|
||||
assign core_bus_out_uuid[i] = core_bus_in_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin : g_no_uuid
|
||||
assign core_bus_out_uuid[i] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_ready
|
||||
assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
state_n = state;
|
||||
flush_done_n = flush_done;
|
||||
lock_released_n = lock_released;
|
||||
flush_uuid_n = flush_uuid_r;
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (flush_req_enable) begin
|
||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH;
|
||||
for (integer i = NUM_REQS-1; i >= 0; --i) begin
|
||||
if (flush_req_mask[i]) begin
|
||||
flush_uuid_n = core_bus_out_uuid[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
STATE_WAIT1: begin
|
||||
if (no_inflight_reqs) begin
|
||||
state_n = STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
// generate a flush request pulse
|
||||
state_n = STATE_WAIT2;
|
||||
end
|
||||
STATE_WAIT2: begin
|
||||
// wait for all banks to finish flushing
|
||||
flush_done_n = flush_done | flush_end;
|
||||
if (flush_done_n == {NUM_BANKS{1'b1}}) begin
|
||||
state_n = STATE_DONE;
|
||||
flush_done_n = '0;
|
||||
// only release current flush requests
|
||||
// and keep normal requests locked
|
||||
lock_released_n = flush_req_mask;
|
||||
end
|
||||
end
|
||||
STATE_DONE: begin
|
||||
// wait until released flush requests are issued
|
||||
// when returning to IDLE state other requests will unlock
|
||||
lock_released_n = lock_released & ~core_bus_out_ready;
|
||||
if (lock_released_n == 0) begin
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
flush_done <= '0;
|
||||
lock_released <= '0;
|
||||
end else begin
|
||||
state <= state_n;
|
||||
flush_done <= flush_done_n;
|
||||
lock_released <= lock_released_n;
|
||||
end
|
||||
flush_uuid_r <= flush_uuid_n;
|
||||
end
|
||||
|
||||
assign flush_begin = {NUM_BANKS{state == STATE_FLUSH}};
|
||||
assign flush_uuid = flush_uuid_r;
|
||||
|
||||
endmodule
|
51
hw/rtl/cache/VX_cache_init.sv
vendored
51
hw/rtl/cache/VX_cache_init.sv
vendored
|
@ -1,51 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_init #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] addr_out,
|
||||
output wire valid_out
|
||||
);
|
||||
reg enabled;
|
||||
reg [`CS_LINE_SEL_BITS-1:0] line_ctr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
enabled <= 1;
|
||||
line_ctr <= '0;
|
||||
end else begin
|
||||
if (enabled) begin
|
||||
if (line_ctr == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
|
||||
enabled <= 0;
|
||||
end
|
||||
line_ctr <= line_ctr + `CS_LINE_SEL_BITS'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign addr_out = line_ctr;
|
||||
assign valid_out = enabled;
|
||||
|
||||
endmodule
|
75
hw/rtl/cache/VX_cache_mshr.sv
vendored
75
hw/rtl/cache/VX_cache_mshr.sv
vendored
|
@ -104,7 +104,8 @@ module VX_cache_mshr #(
|
|||
// lookup
|
||||
input wire lookup_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr,
|
||||
output wire [MSHR_SIZE-1:0] lookup_matches,
|
||||
output wire [MSHR_SIZE-1:0] lookup_pending,
|
||||
output wire [MSHR_SIZE-1:0] lookup_rw,
|
||||
|
||||
// finalize
|
||||
input wire finalize_valid,
|
||||
|
@ -134,7 +135,7 @@ module VX_cache_mshr #(
|
|||
wire dequeue_fire = dequeue_valid && dequeue_ready;
|
||||
|
||||
wire [MSHR_SIZE-1:0] addr_matches;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_addr_matches
|
||||
assign addr_matches[i] = valid_table[i] && (addr_table[i] == lookup_addr);
|
||||
end
|
||||
|
||||
|
@ -147,7 +148,7 @@ module VX_cache_mshr #(
|
|||
.valid_out (allocate_rdy_n)
|
||||
);
|
||||
|
||||
VX_onehot_encoder #(
|
||||
VX_encoder #(
|
||||
.N (MSHR_SIZE)
|
||||
) prev_sel (
|
||||
.data_in (addr_matches & ~next_table_x),
|
||||
|
@ -216,13 +217,13 @@ module VX_cache_mshr #(
|
|||
next_table <= next_table_n;
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s-bank%0d inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID,
|
||||
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid))
|
||||
|
||||
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s-bank%0d invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID,
|
||||
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
|
||||
|
||||
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s-bank%0d invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, BANK_ID,
|
||||
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
|
||||
|
||||
VX_dp_ram #(
|
||||
|
@ -231,9 +232,10 @@ module VX_cache_mshr #(
|
|||
.LUTRAM (1)
|
||||
) entries (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (allocate_valid),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (allocate_id_r),
|
||||
.wdata (allocate_data),
|
||||
.raddr (dequeue_id_r),
|
||||
|
@ -251,7 +253,9 @@ module VX_cache_mshr #(
|
|||
assign dequeue_rw = write_table[dequeue_id_r];
|
||||
assign dequeue_id = dequeue_id_r;
|
||||
|
||||
assign lookup_matches = addr_matches & ~write_table;
|
||||
// return pending entries for the given cache line
|
||||
assign lookup_pending = addr_matches;
|
||||
assign lookup_rw = write_table;
|
||||
|
||||
`UNUSED_VAR (lookup_valid)
|
||||
|
||||
|
@ -263,35 +267,42 @@ module VX_cache_mshr #(
|
|||
end else begin
|
||||
show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire;
|
||||
end
|
||||
if (allocate_fire)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid));
|
||||
if (lookup_valid)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_matches, lkp_req_uuid));
|
||||
if (finalize_valid)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid));
|
||||
if (fill_valid)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id));
|
||||
if (dequeue_fire)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid));
|
||||
if (allocate_fire) begin
|
||||
`TRACE(3, ("%t: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid))
|
||||
end
|
||||
if (lookup_valid) begin
|
||||
`TRACE(3, ("%t: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid))
|
||||
end
|
||||
if (finalize_valid) begin
|
||||
`TRACE(3, ("%t: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid))
|
||||
end
|
||||
if (fill_valid) begin
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id))
|
||||
end
|
||||
if (dequeue_fire) begin
|
||||
`TRACE(3, ("%t: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid))
|
||||
end
|
||||
if (show_table) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-table", $time, INSTANCE_ID, BANK_ID));
|
||||
`TRACE(3, ("%t: %s table", $time, INSTANCE_ID))
|
||||
for (integer i = 0; i < MSHR_SIZE; ++i) begin
|
||||
if (valid_table[i]) begin
|
||||
`TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID)));
|
||||
if (write_table[i])
|
||||
`TRACE(3, ("(w)"));
|
||||
else
|
||||
`TRACE(3, ("(r)"));
|
||||
if (next_table[i])
|
||||
`TRACE(3, ("->%0d", next_index[i]));
|
||||
`TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID)))
|
||||
if (write_table[i]) begin
|
||||
`TRACE(3, ("(w)"))
|
||||
end else begin
|
||||
`TRACE(3, ("(r)"))
|
||||
end
|
||||
if (next_table[i]) begin
|
||||
`TRACE(3, ("->%0d", next_index[i]))
|
||||
end
|
||||
end
|
||||
end
|
||||
`TRACE(3, ("\n"));
|
||||
`TRACE(3, ("\n"))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
148
hw/rtl/cache/VX_cache_tags.sv
vendored
148
hw/rtl/cache/VX_cache_tags.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,15 +17,17 @@ module VX_cache_tags #(
|
|||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
parameter WORD_SIZE = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0
|
||||
) (
|
||||
|
@ -38,79 +40,139 @@ module VX_cache_tags #(
|
|||
|
||||
input wire stall,
|
||||
|
||||
// read/fill
|
||||
// init/fill/lookup
|
||||
input wire init,
|
||||
input wire flush,
|
||||
input wire fill,
|
||||
input wire write,
|
||||
input wire lookup,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire fill,
|
||||
input wire init,
|
||||
output wire [NUM_WAYS-1:0] way_sel,
|
||||
output wire [NUM_WAYS-1:0] tag_matches
|
||||
input wire [NUM_WAYS-1:0] way_sel,
|
||||
output wire [NUM_WAYS-1:0] tag_matches,
|
||||
|
||||
// eviction
|
||||
output wire evict_dirty,
|
||||
output wire [NUM_WAYS-1:0] evict_way,
|
||||
output wire [`CS_TAG_SEL_BITS-1:0] evict_tag
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (lookup)
|
||||
|
||||
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS;
|
||||
// valid, dirty, tag
|
||||
localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_TAG_ADDR(line_addr);
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
|
||||
|
||||
if (NUM_WAYS > 1) begin
|
||||
reg [NUM_WAYS-1:0] repl_way;
|
||||
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||
wire [NUM_WAYS-1:0] read_valid;
|
||||
wire [NUM_WAYS-1:0] read_dirty;
|
||||
|
||||
if (NUM_WAYS > 1) begin : g_evict_way
|
||||
reg [NUM_WAYS-1:0] evict_way_r;
|
||||
// cyclic assignment of replacement way
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
repl_way <= 1;
|
||||
end else if (~stall) begin // hold the value on stalls prevent filling different slots twice
|
||||
repl_way <= {repl_way[NUM_WAYS-2:0], repl_way[NUM_WAYS-1]};
|
||||
evict_way_r <= 1;
|
||||
end else if (~stall) begin // holding the value on stalls prevents filling different slots twice
|
||||
evict_way_r <= {evict_way_r[NUM_WAYS-2:0], evict_way_r[NUM_WAYS-1]};
|
||||
end
|
||||
end
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
assign way_sel[i] = fill && repl_way[i];
|
||||
end
|
||||
end else begin
|
||||
|
||||
assign evict_way = fill ? evict_way_r : way_sel;
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (`CS_TAG_SEL_BITS),
|
||||
.N (NUM_WAYS)
|
||||
) evict_tag_sel (
|
||||
.data_in (read_tag),
|
||||
.sel_in (evict_way),
|
||||
.data_out (evict_tag)
|
||||
);
|
||||
end else begin : g_evict_way_0
|
||||
`UNUSED_VAR (stall)
|
||||
assign way_sel = fill;
|
||||
assign evict_way = 1'b1;
|
||||
assign evict_tag = read_tag;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
wire [`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||
wire read_valid;
|
||||
// fill and flush need to also read in writeback mode
|
||||
wire fill_s = fill && (!WRITEBACK || ~stall);
|
||||
wire flush_s = flush && (!WRITEBACK || ~stall);
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store
|
||||
|
||||
wire do_fill = fill_s && evict_way[i];
|
||||
wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode
|
||||
wire do_write = WRITEBACK && write && tag_matches[i];
|
||||
|
||||
wire line_read = (WRITEBACK && (fill_s || flush_s));
|
||||
wire line_write = init || do_fill || do_flush || do_write;
|
||||
wire line_valid = ~(init || flush);
|
||||
|
||||
wire [TAG_WIDTH-1:0] line_wdata;
|
||||
wire [TAG_WIDTH-1:0] line_rdata;
|
||||
|
||||
if (WRITEBACK) begin : g_writeback
|
||||
assign line_wdata = {line_valid, write, line_tag};
|
||||
assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata;
|
||||
end else begin : g_writethrough
|
||||
assign line_wdata = {line_valid, line_tag};
|
||||
assign {read_valid[i], read_tag[i]} = line_rdata;
|
||||
assign read_dirty[i] = 1'b0;
|
||||
end
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (TAG_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.NO_RWCHECK (1)
|
||||
.NO_RWCHECK (1),
|
||||
.RW_ASSERT (1)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (way_sel[i] || init),
|
||||
`UNUSED_PIN (wren),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.write (line_write),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.wdata ({~init, line_tag}),
|
||||
.rdata ({read_valid, read_tag})
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
|
||||
assign tag_matches[i] = read_valid && (line_tag == read_tag);
|
||||
end
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches
|
||||
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
|
||||
end
|
||||
|
||||
assign evict_dirty = | (read_dirty & evict_way);
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel};
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag));
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)))
|
||||
end
|
||||
if (init) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
|
||||
`TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel))
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty))
|
||||
end
|
||||
if (lookup && ~stall) begin
|
||||
if (tag_matches != 0) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag, req_uuid));
|
||||
if (write) begin
|
||||
`TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid))
|
||||
end else begin
|
||||
`TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid))
|
||||
end
|
||||
end else begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
if (write) begin
|
||||
`TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid))
|
||||
end else begin
|
||||
`TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
52
hw/rtl/cache/VX_cache_top.sv
vendored
52
hw/rtl/cache/VX_cache_top.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -20,20 +20,20 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 16384,
|
||||
parameter CACHE_SIZE = 16384,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 4,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 4,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 16,
|
||||
parameter MSHR_SIZE = 16,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
|
@ -42,6 +42,12 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -55,7 +61,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
parameter MEM_OUT_BUF = 2,
|
||||
|
||||
parameter MEM_TAG_WIDTH = `CLOG2(MSHR_SIZE) + `CLOG2(NUM_BANKS)
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -69,7 +75,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
input wire [NUM_REQS-1:0] core_req_rw,
|
||||
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] core_req_atype,
|
||||
input wire [NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] core_req_flags,
|
||||
input wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire [NUM_REQS-1:0] core_req_ready,
|
||||
|
@ -82,17 +88,17 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire mem_req_rw,
|
||||
output wire [LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready
|
||||
);
|
||||
VX_mem_bus_if #(
|
||||
|
@ -111,7 +117,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
assign core_bus_if[i].req_data.rw = core_req_rw[i];
|
||||
assign core_bus_if[i].req_data.byteen = core_req_byteen[i];
|
||||
assign core_bus_if[i].req_data.addr = core_req_addr[i];
|
||||
assign core_bus_if[i].req_data.atype = core_req_atype[i];
|
||||
assign core_bus_if[i].req_data.flags = core_req_flags[i];
|
||||
assign core_bus_if[i].req_data.data = core_req_data[i];
|
||||
assign core_bus_if[i].req_data.tag = core_req_tag[i];
|
||||
assign core_req_ready[i] = core_bus_if[i].req_ready;
|
||||
|
@ -127,18 +133,18 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
|
||||
// Memory request
|
||||
assign mem_req_valid = mem_bus_if.req_valid;
|
||||
assign mem_req_rw = mem_bus_if.req_data.rw;
|
||||
assign mem_req_rw = mem_bus_if.req_data.rw;
|
||||
assign mem_req_byteen = mem_bus_if.req_data.byteen;
|
||||
assign mem_req_addr = mem_bus_if.req_data.addr;
|
||||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
`UNUSED_VAR (mem_bus_if.req_data.atype)
|
||||
|
||||
`UNUSED_VAR (mem_bus_if.req_data.flags)
|
||||
|
||||
// Memory response
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_data.data = mem_rsp_data;
|
||||
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
|
||||
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_bus_if.rsp_ready;
|
||||
|
||||
VX_cache #(
|
||||
|
@ -156,6 +162,8 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.CORE_OUT_BUF (CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (MEM_OUT_BUF)
|
||||
) cache (
|
||||
|
|
173
hw/rtl/cache/VX_cache_wrap.sv
vendored
173
hw/rtl/cache/VX_cache_wrap.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -23,20 +23,20 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 4096,
|
||||
parameter CACHE_SIZE = 4096,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
|
@ -45,6 +45,12 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -63,7 +69,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0
|
||||
) (
|
||||
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -78,12 +84,11 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
localparam CACHE_MEM_TAG_WIDTH = `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH);
|
||||
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH, UUID_WIDTH) :
|
||||
CACHE_MEM_TAG_WIDTH);
|
||||
|
||||
localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU);
|
||||
|
||||
|
@ -97,9 +102,12 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (CACHE_MEM_TAG_WIDTH)
|
||||
) mem_bus_cache_if();
|
||||
|
||||
if (NC_OR_BYPASS) begin
|
||||
|
||||
`RESET_RELAY (nc_bypass_reset, reset);
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (MEM_TAG_WIDTH)
|
||||
) mem_bus_tmp_if();
|
||||
|
||||
if (NC_OR_BYPASS) begin : g_bypass
|
||||
|
||||
VX_cache_bypass #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
|
@ -108,13 +116,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.PASSTHRU (PASSTHRU),
|
||||
.NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE),
|
||||
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
|
||||
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
|
||||
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
|
||||
.CORE_TAG_WIDTH (TAG_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
|
||||
.MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH),
|
||||
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH),
|
||||
|
||||
|
@ -124,51 +132,31 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.MEM_OUT_BUF (MEM_OUT_BUF)
|
||||
) cache_bypass (
|
||||
.clk (clk),
|
||||
.reset (nc_bypass_reset),
|
||||
.reset (reset),
|
||||
|
||||
.core_bus_in_if (core_bus_if),
|
||||
.core_bus_out_if(core_bus_cache_if),
|
||||
|
||||
.mem_bus_in_if (mem_bus_cache_if),
|
||||
.mem_bus_out_if (mem_bus_if)
|
||||
.mem_bus_out_if (mem_bus_tmp_if)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
|
||||
end else begin : g_no_bypass
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_cache_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
|
||||
end
|
||||
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if);
|
||||
end
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_tmp_if, mem_bus_cache_if);
|
||||
end
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
if (WRITE_ENABLE) begin : g_mem_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if);
|
||||
end else begin : g_mem_bus_if_ro
|
||||
`ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_valid)
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_data)
|
||||
assign core_bus_cache_if[i].req_ready = 0;
|
||||
|
||||
assign core_bus_cache_if[i].rsp_valid = 0;
|
||||
assign core_bus_cache_if[i].rsp_data = '0;
|
||||
`UNUSED_VAR (core_bus_cache_if[i].rsp_ready)
|
||||
end
|
||||
|
||||
assign mem_bus_cache_if.req_valid = 0;
|
||||
assign mem_bus_cache_if.req_data = '0;
|
||||
`UNUSED_VAR (mem_bus_cache_if.req_ready)
|
||||
|
||||
`UNUSED_VAR (mem_bus_cache_if.rsp_valid)
|
||||
`UNUSED_VAR (mem_bus_cache_if.rsp_data)
|
||||
assign mem_bus_cache_if.rsp_ready = 0;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign cache_perf = '0;
|
||||
`endif
|
||||
|
||||
end else begin
|
||||
|
||||
`RESET_RELAY (cache_reset, reset);
|
||||
if (PASSTHRU == 0) begin : g_cache
|
||||
|
||||
VX_cache #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
|
@ -183,32 +171,57 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF)
|
||||
) cache (
|
||||
.clk (clk),
|
||||
.reset (cache_reset),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (cache_perf),
|
||||
`endif
|
||||
.core_bus_if (core_bus_cache_if),
|
||||
.mem_bus_if (mem_bus_cache_if)
|
||||
);
|
||||
|
||||
);
|
||||
|
||||
end else begin : g_passthru
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_cache_if
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_valid)
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_data)
|
||||
assign core_bus_cache_if[i].req_ready = 0;
|
||||
|
||||
assign core_bus_cache_if[i].rsp_valid = 0;
|
||||
assign core_bus_cache_if[i].rsp_data = '0;
|
||||
`UNUSED_VAR (core_bus_cache_if[i].rsp_ready)
|
||||
end
|
||||
|
||||
assign mem_bus_cache_if.req_valid = 0;
|
||||
assign mem_bus_cache_if.req_data = '0;
|
||||
`UNUSED_VAR (mem_bus_cache_if.req_ready)
|
||||
|
||||
`UNUSED_VAR (mem_bus_cache_if.rsp_valid)
|
||||
`UNUSED_VAR (mem_bus_cache_if.rsp_data)
|
||||
assign mem_bus_cache_if.rsp_ready = 0;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign cache_perf = '0;
|
||||
`endif
|
||||
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_trace
|
||||
wire [`UP(UUID_WIDTH)-1:0] core_req_uuid;
|
||||
wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
if (UUID_WIDTH != 0) begin : g_core_rsp_uuid
|
||||
assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
end else begin : g_no_core_rsp_uuid
|
||||
assign core_req_uuid = 0;
|
||||
assign core_rsp_uuid = 0;
|
||||
end
|
||||
|
@ -218,24 +231,25 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
|
||||
always @(posedge clk) begin
|
||||
if (core_req_fire) begin
|
||||
if (core_bus_if[i].req_data.rw)
|
||||
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid));
|
||||
if (core_bus_if[i].req_data.rw) begin
|
||||
`TRACE(1, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid))
|
||||
end else begin
|
||||
`TRACE(1, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid))
|
||||
end
|
||||
end
|
||||
if (core_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
|
||||
end
|
||||
`TRACE(1, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid;
|
||||
wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid;
|
||||
|
||||
if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin
|
||||
if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin : g_mem_req_uuid
|
||||
assign mem_req_uuid = mem_bus_if.req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign mem_rsp_uuid = mem_bus_if.rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
end else begin : g_no_mem_req_uuid
|
||||
assign mem_req_uuid = 0;
|
||||
assign mem_rsp_uuid = 0;
|
||||
end
|
||||
|
@ -245,18 +259,19 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_bus_if.req_data.rw)
|
||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
|
||||
if (mem_bus_if.req_data.rw) begin
|
||||
`TRACE(1, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid))
|
||||
end else begin
|
||||
`TRACE(1, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid))
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
|
||||
`TRACE(1, ("%t: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid))
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_int #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter BLOCK_IDX = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
|
@ -29,7 +29,7 @@ module VX_alu_int #(
|
|||
VX_branch_ctl_if.master branch_ctl_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam LANE_BITS = `CLOG2(NUM_LANES);
|
||||
localparam LANE_WIDTH = `UP(LANE_BITS);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
|
@ -71,19 +71,19 @@ module VX_alu_int #(
|
|||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_imm = execute_if.data.op_args.alu.use_imm ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_args.alu.imm)}} : alu_in2;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_br = (execute_if.data.op_args.alu.use_imm && ~is_br_op) ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_args.alu.imm)}} : alu_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_add_result
|
||||
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
||||
assign add_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] + alu_in2_imm[i][31:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_sub_result
|
||||
wire [`XLEN:0] sub_in1 = {is_signed & alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
wire [`XLEN:0] sub_in2 = {is_signed & alu_in2_br[i][`XLEN-1], alu_in2_br[i]};
|
||||
assign sub_result[i] = sub_in1 - sub_in2;
|
||||
assign sub_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] - alu_in2_imm[i][31:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_shr_result
|
||||
wire [`XLEN:0] shr_in1 = {is_signed && alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
always @(*) begin
|
||||
case (alu_op[1:0])
|
||||
|
@ -102,7 +102,7 @@ module VX_alu_int #(
|
|||
assign shr_result_w[i] = `XLEN'($signed(shr_res_w));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_msc_result
|
||||
always @(*) begin
|
||||
case (alu_op[1:0])
|
||||
2'b00: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; // AND
|
||||
|
@ -114,14 +114,14 @@ module VX_alu_int #(
|
|||
assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0])); // SLLW
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_alu_result
|
||||
wire [`XLEN-1:0] slt_br_result = `XLEN'({is_br_op && ~(| sub_result[i][`XLEN-1:0]), sub_result[i][`XLEN]});
|
||||
wire [`XLEN-1:0] sub_slt_br_result = (is_sub_op && ~is_br_op) ? sub_result[i][`XLEN-1:0] : slt_br_result;
|
||||
always @(*) begin
|
||||
case ({is_alu_w, op_class})
|
||||
3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
|
||||
3'b001: alu_result[i] = sub_slt_br_result; // SUB, SLTU, SLTI, BR*
|
||||
3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO*
|
||||
3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO*
|
||||
3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI
|
||||
3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW
|
||||
3'b101: alu_result[i] = sub_result_w[i]; // SUBW
|
||||
|
@ -141,9 +141,9 @@ module VX_alu_int #(
|
|||
|
||||
assign cbr_dest = add_result[0][1 +: `PC_BITS];
|
||||
|
||||
if (LANE_BITS != 0) begin
|
||||
if (LANE_BITS != 0) begin : g_tid
|
||||
assign tid = execute_if.data.tid[0 +: LANE_BITS];
|
||||
end else begin
|
||||
end else begin : g_tid_0
|
||||
assign tid = 0;
|
||||
end
|
||||
|
||||
|
@ -181,11 +181,11 @@ module VX_alu_int #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({br_enable, br_wid, br_taken, br_dest}),
|
||||
.data_in ({br_enable, br_wid, br_taken, br_dest}),
|
||||
.data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_commit
|
||||
assign commit_if.data.data[i] = (is_br_op_r && is_br_static) ? {(PC_r + `PC_BITS'(2)), 1'd0} : alu_result_r[i];
|
||||
end
|
||||
|
||||
|
@ -193,9 +193,9 @@ module VX_alu_int #(
|
|||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (branch_ctl_if.valid) begin
|
||||
`TRACE(1, ("%d: core%0d-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, CORE_ID, branch_ctl_if.wid, {commit_if.data.PC, 1'b0}, branch_ctl_if.taken, {branch_ctl_if.dest, 1'b0}, commit_if.data.uuid));
|
||||
if (br_enable) begin
|
||||
`TRACE(1, ("%t: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_muldiv #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -26,7 +26,7 @@ module VX_alu_muldiv #(
|
|||
// Outputs
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam TAG_WIDTH = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + PID_WIDTH + 1 + 1;
|
||||
|
@ -68,8 +68,8 @@ module VX_alu_muldiv #(
|
|||
|
||||
wire mul_fire_in = mul_valid_in && mul_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] mul_resultl, mul_resulth;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_result_tmp
|
||||
reg [`XLEN-1:0] mul_resultl, mul_resulth;
|
||||
wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i];
|
||||
wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i];
|
||||
always @(*) begin
|
||||
|
@ -83,7 +83,7 @@ module VX_alu_muldiv #(
|
|||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, mul_result_tmp}),
|
||||
|
@ -103,7 +103,7 @@ module VX_alu_muldiv #(
|
|||
wire [NUM_LANES-1:0][`XLEN:0] mul_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN:0] mul_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_in
|
||||
assign mul_in1[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]} : {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]};
|
||||
assign mul_in2[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]} : {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]};
|
||||
end
|
||||
|
@ -149,7 +149,7 @@ module VX_alu_muldiv #(
|
|||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_multiplier
|
||||
wire [`XLEN:0] mul_in1 = {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]};
|
||||
wire [`XLEN:0] mul_in2 = {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]};
|
||||
|
||||
|
@ -184,7 +184,7 @@ module VX_alu_muldiv #(
|
|||
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_result_out
|
||||
`ifdef XLEN_64
|
||||
assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] :
|
||||
(is_mul_w_out ? `XLEN'($signed(mul_result_tmp[i][31:0])) :
|
||||
|
@ -219,7 +219,7 @@ module VX_alu_muldiv #(
|
|||
wire [NUM_LANES-1:0][`XLEN-1:0] div_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_in
|
||||
`ifdef XLEN_64
|
||||
assign div_in1[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]}: execute_if.data.rs1_data[i];
|
||||
assign div_in2[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]}: execute_if.data.rs2_data[i];
|
||||
|
@ -234,8 +234,8 @@ module VX_alu_muldiv #(
|
|||
wire [NUM_LANES-1:0][`XLEN-1:0] div_result_in;
|
||||
wire div_fire_in = div_valid_in && div_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] div_quotient, div_remainder;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_result_in
|
||||
reg [`XLEN-1:0] div_quotient, div_remainder;
|
||||
always @(*) begin
|
||||
dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder);
|
||||
end
|
||||
|
@ -306,7 +306,7 @@ module VX_alu_muldiv #(
|
|||
|
||||
assign {div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out, is_div_w_out, div_pid_out, div_sop_out, div_eop_out} = div_tag_r;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_result_out
|
||||
`ifdef XLEN_64
|
||||
assign div_result_out[i] = is_rem_op_out ? (is_div_w_out ? `XLEN'($signed(div_remainder[i][31:0])) : div_remainder[i]) :
|
||||
(is_div_w_out ? `XLEN'($signed(div_quotient[i][31:0])) : div_quotient[i]);
|
||||
|
@ -324,7 +324,8 @@ module VX_alu_muldiv #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)),
|
||||
.OUT_BUF (1)
|
||||
.ARBITER ("P"),
|
||||
.OUT_BUF (2)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -27,23 +27,27 @@ module VX_alu_unit #(
|
|||
VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS]
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = `NUM_ALU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_ALU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
|
||||
localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS);
|
||||
localparam PE_COUNT = 1 + `EXT_M_ENABLED;
|
||||
localparam PE_SEL_BITS = `CLOG2(PE_COUNT);
|
||||
localparam PE_IDX_INT = 0;
|
||||
localparam PE_IDX_MDV = PE_IDX_INT + `EXT_M_ENABLED;
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_execute_if[BLOCK_SIZE]();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 0)
|
||||
.OUT_BUF (PARTIAL_BW ? 3 : 0)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -51,106 +55,62 @@ module VX_alu_unit #(
|
|||
.execute_if (per_block_execute_if)
|
||||
);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
|
||||
`RESET_RELAY (block_reset, reset);
|
||||
|
||||
wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV);
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_alus
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_execute_if();
|
||||
) pe_execute_if[PE_COUNT]();
|
||||
|
||||
VX_commit_if #(
|
||||
VX_commit_if#(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_commit_if();
|
||||
) pe_commit_if[PE_COUNT]();
|
||||
|
||||
assign int_execute_if.valid = per_block_execute_if[block_idx].valid && ~is_muldiv_op;
|
||||
assign int_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
reg [`UP(PE_SEL_BITS)-1:0] pe_select;
|
||||
always @(*) begin
|
||||
pe_select = PE_IDX_INT;
|
||||
if (`EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV))
|
||||
pe_select = PE_IDX_MDV;
|
||||
end
|
||||
|
||||
`RESET_RELAY (int_reset, block_reset);
|
||||
VX_pe_switch #(
|
||||
.PE_COUNT (PE_COUNT),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.ARBITER ("R"),
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) pe_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.pe_sel (pe_select),
|
||||
.execute_in_if (per_block_execute_if[block_idx]),
|
||||
.commit_out_if (per_block_commit_if[block_idx]),
|
||||
.execute_out_if (pe_execute_if),
|
||||
.commit_in_if (pe_commit_if)
|
||||
);
|
||||
|
||||
VX_alu_int #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
|
||||
.BLOCK_IDX (block_idx),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) alu_int (
|
||||
.clk (clk),
|
||||
.reset (int_reset),
|
||||
.execute_if (int_execute_if),
|
||||
.reset (reset),
|
||||
.execute_if (pe_execute_if[PE_IDX_INT]),
|
||||
.branch_ctl_if (branch_ctl_if[block_idx]),
|
||||
.commit_if (int_commit_if)
|
||||
.commit_if (pe_commit_if[PE_IDX_INT])
|
||||
);
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_execute_if();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_commit_if();
|
||||
|
||||
assign mdv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
|
||||
assign mdv_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
|
||||
`RESET_RELAY (mdv_reset, block_reset);
|
||||
|
||||
VX_alu_muldiv #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_unit (
|
||||
) muldiv_unit (
|
||||
.clk (clk),
|
||||
.reset (mdv_reset),
|
||||
.execute_if (mdv_execute_if),
|
||||
.commit_if (mdv_commit_if)
|
||||
.reset (reset),
|
||||
.execute_if (pe_execute_if[PE_IDX_MDV]),
|
||||
.commit_if (pe_commit_if[PE_IDX_MDV])
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
assign per_block_execute_if[block_idx].ready =
|
||||
`ifdef EXT_M_ENABLE
|
||||
is_muldiv_op ? mdv_execute_if.ready :
|
||||
`endif
|
||||
int_execute_if.ready;
|
||||
|
||||
// send response
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.valid_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.valid,
|
||||
`endif
|
||||
int_commit_if.valid
|
||||
}),
|
||||
.ready_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.ready,
|
||||
`endif
|
||||
int_commit_if.ready
|
||||
}),
|
||||
.data_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.data,
|
||||
`endif
|
||||
int_commit_if.data
|
||||
}),
|
||||
.data_out (per_block_commit_if[block_idx].data),
|
||||
.valid_out (per_block_commit_if[block_idx].valid),
|
||||
.ready_out (per_block_commit_if[block_idx].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
end
|
||||
|
||||
VX_gather_unit #(
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_commit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -27,7 +27,7 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
VX_commit_csr_if.master commit_csr_if,
|
||||
VX_commit_sched_if.master commit_sched_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1;
|
||||
localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1);
|
||||
localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1;
|
||||
|
@ -36,20 +36,18 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_commit_if commit_arb_if[`ISSUE_WIDTH]();
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] commit_fire;
|
||||
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] commit_wid;
|
||||
wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] commit_tmask;
|
||||
wire [`ISSUE_WIDTH-1:0] commit_eop;
|
||||
wire [`ISSUE_WIDTH-1:0] per_issue_commit_fire;
|
||||
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] per_issue_commit_wid;
|
||||
wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] per_issue_commit_tmask;
|
||||
wire [`ISSUE_WIDTH-1:0] per_issue_commit_eop;
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_commit_arbs
|
||||
|
||||
wire [`NUM_EX_UNITS-1:0] valid_in;
|
||||
wire [`NUM_EX_UNITS-1:0][DATAW-1:0] data_in;
|
||||
wire [`NUM_EX_UNITS-1:0] ready_in;
|
||||
|
||||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
|
||||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin : g_data_in
|
||||
assign valid_in[j] = commit_if[j * `ISSUE_WIDTH + i].valid;
|
||||
assign data_in[j] = commit_if[j * `ISSUE_WIDTH + i].data;
|
||||
assign commit_if[j * `ISSUE_WIDTH + i].ready = ready_in[j];
|
||||
|
@ -58,11 +56,11 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (`NUM_EX_UNITS),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER ("R"),
|
||||
.ARBITER ("P"),
|
||||
.OUT_BUF (1)
|
||||
) commit_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.data_in (data_in),
|
||||
|
@ -72,10 +70,10 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
assign commit_fire[i] = commit_arb_if[i].valid && commit_arb_if[i].ready;
|
||||
assign commit_tmask[i]= {`NUM_THREADS{commit_fire[i]}} & commit_arb_if[i].data.tmask;
|
||||
assign commit_wid[i] = commit_arb_if[i].data.wid;
|
||||
assign commit_eop[i] = commit_arb_if[i].data.eop;
|
||||
assign per_issue_commit_fire[i] = commit_arb_if[i].valid && commit_arb_if[i].ready;
|
||||
assign per_issue_commit_tmask[i]= {`NUM_THREADS{per_issue_commit_fire[i]}} & commit_arb_if[i].data.tmask;
|
||||
assign per_issue_commit_wid[i] = commit_arb_if[i].data.wid;
|
||||
assign per_issue_commit_eop[i] = commit_arb_if[i].data.eop;
|
||||
end
|
||||
|
||||
// CSRs update
|
||||
|
@ -84,11 +82,11 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
wire [COMMIT_ALL_SIZEW-1:0] commit_size_all_r, commit_size_all_rr;
|
||||
wire commit_fire_any, commit_fire_any_r, commit_fire_any_rr;
|
||||
|
||||
assign commit_fire_any = (| commit_fire);
|
||||
assign commit_fire_any = (| per_issue_commit_fire);
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_commit_size
|
||||
wire [COMMIT_SIZEW-1:0] count;
|
||||
`POP_COUNT(count, commit_tmask[i]);
|
||||
`POP_COUNT(count, per_issue_commit_tmask[i]);
|
||||
assign commit_size[i] = count;
|
||||
end
|
||||
|
||||
|
@ -136,24 +134,33 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
assign commit_csr_if.instret = instret;
|
||||
|
||||
// Committed instructions
|
||||
// Track committed instructions
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] committed = commit_fire & commit_eop;
|
||||
reg [`NUM_WARPS-1:0] committed_warps;
|
||||
|
||||
always @(*) begin
|
||||
committed_warps = 0;
|
||||
for (integer i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
if (per_issue_commit_fire[i] && per_issue_commit_eop[i]) begin
|
||||
committed_warps[per_issue_commit_wid[i]] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (`ISSUE_WIDTH * (1 + `NW_WIDTH)),
|
||||
.RESETW (`ISSUE_WIDTH)
|
||||
.DATAW (`NUM_WARPS),
|
||||
.RESETW (`NUM_WARPS)
|
||||
) committed_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({committed, commit_wid}),
|
||||
.data_out ({commit_sched_if.committed, commit_sched_if.committed_wid})
|
||||
.data_in (committed_warps),
|
||||
.data_out ({commit_sched_if.committed_warps})
|
||||
);
|
||||
|
||||
// Writeback
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_writeback
|
||||
assign writeback_if[i].valid = commit_arb_if[i].valid && commit_arb_if[i].data.wb;
|
||||
assign writeback_if[i].data.uuid = commit_arb_if[i].data.uuid;
|
||||
assign writeback_if[i].data.wis = wid_to_wis(commit_arb_if[i].data.wid);
|
||||
|
@ -167,15 +174,15 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_trace
|
||||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin : g_j
|
||||
always @(posedge clk) begin
|
||||
if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}));
|
||||
`TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}))
|
||||
trace_ex_type(1, j);
|
||||
`TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", commit_if[j * `ISSUE_WIDTH + i].data.uuid));
|
||||
`TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS)
|
||||
`TRACE(1, (" (#%0d)\n", commit_if[j * `ISSUE_WIDTH + i].data.uuid))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -18,7 +18,8 @@
|
|||
`endif
|
||||
|
||||
module VX_core import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -74,33 +75,26 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
assign mem_perf_tmp_if.mem = mem_perf_if.mem;
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (dcr_data_reset, reset);
|
||||
`RESET_RELAY (schedule_reset, reset);
|
||||
`RESET_RELAY (fetch_reset, reset);
|
||||
`RESET_RELAY (decode_reset, reset);
|
||||
`RESET_RELAY (issue_reset, reset);
|
||||
`RESET_RELAY (execute_reset, reset);
|
||||
`RESET_RELAY (commit_reset, reset);
|
||||
|
||||
base_dcrs_t base_dcrs;
|
||||
|
||||
VX_dcr_data dcr_data (
|
||||
.clk (clk),
|
||||
.reset (dcr_data_reset),
|
||||
.reset (reset),
|
||||
.dcr_bus_if (dcr_bus_if),
|
||||
.base_dcrs (base_dcrs)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (3)
|
||||
`SCOPE_IO_SWITCH (3);
|
||||
|
||||
VX_schedule #(
|
||||
.INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID)
|
||||
) schedule (
|
||||
.clk (clk),
|
||||
.reset (schedule_reset),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_schedule_if (pipeline_perf_if.schedule),
|
||||
.sched_perf (pipeline_perf_if.sched),
|
||||
`endif
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
|
@ -121,36 +115,36 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_fetch #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-fetch", INSTANCE_ID))
|
||||
) fetch (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (fetch_reset),
|
||||
.reset (reset),
|
||||
.icache_bus_if (icache_bus_if),
|
||||
.schedule_if (schedule_if),
|
||||
.fetch_if (fetch_if)
|
||||
);
|
||||
|
||||
VX_decode #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID))
|
||||
) decode (
|
||||
.clk (clk),
|
||||
.reset (decode_reset),
|
||||
.reset (reset),
|
||||
.fetch_if (fetch_if),
|
||||
.decode_if (decode_if),
|
||||
.decode_sched_if(decode_sched_if)
|
||||
);
|
||||
|
||||
VX_issue #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-issue", INSTANCE_ID))
|
||||
) issue (
|
||||
`SCOPE_IO_BIND (1)
|
||||
|
||||
.clk (clk),
|
||||
.reset (issue_reset),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_issue_if (pipeline_perf_if.issue),
|
||||
.issue_perf (pipeline_perf_if.issue),
|
||||
`endif
|
||||
|
||||
.decode_if (decode_if),
|
||||
|
@ -159,12 +153,13 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_execute #(
|
||||
.INSTANCE_ID ($sformatf("%s-execute", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID)
|
||||
) execute (
|
||||
`SCOPE_IO_BIND (2)
|
||||
|
||||
.clk (clk),
|
||||
.reset (execute_reset),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_tmp_if),
|
||||
|
@ -186,10 +181,10 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_commit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID))
|
||||
) commit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.reset (reset),
|
||||
|
||||
.commit_if (commit_if),
|
||||
|
||||
|
@ -199,136 +194,18 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
.commit_sched_if(commit_sched_if)
|
||||
);
|
||||
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lsu_dcache_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
`ifdef LMEM_ENABLE
|
||||
|
||||
`RESET_RELAY (lmem_unit_reset, reset);
|
||||
|
||||
VX_lmem_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) lmem_unit (
|
||||
.clk (clk),
|
||||
.reset (lmem_unit_reset),
|
||||
VX_mem_unit #(
|
||||
.INSTANCE_ID (INSTANCE_ID)
|
||||
) mem_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (mem_perf_tmp_if.lmem),
|
||||
.lmem_perf (mem_perf_tmp_if.lmem),
|
||||
`endif
|
||||
.lsu_mem_in_if (lsu_mem_if),
|
||||
.lsu_mem_out_if (lsu_dcache_if)
|
||||
.lsu_mem_if (lsu_mem_if),
|
||||
.dcache_bus_if (dcache_bus_if)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
`ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]);
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (DCACHE_CHANNELS),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_coalesced_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin
|
||||
|
||||
`RESET_RELAY (coalescer_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
VX_mem_coalescer #(
|
||||
.INSTANCE_ID ($sformatf("core%0d-coalescer", CORE_ID)),
|
||||
.NUM_REQS (`NUM_LSU_LANES),
|
||||
.DATA_IN_SIZE (LSU_WORD_SIZE),
|
||||
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),
|
||||
.ADDR_WIDTH (LSU_ADDR_WIDTH),
|
||||
.ATYPE_WIDTH (`ADDR_TYPE_WIDTH),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.QUEUE_SIZE (`LSUQ_OUT_SIZE)
|
||||
) coalescer (
|
||||
.clk (clk),
|
||||
.reset (coalescer_reset),
|
||||
|
||||
// Input request
|
||||
.in_req_valid (lsu_dcache_if[i].req_valid),
|
||||
.in_req_mask (lsu_dcache_if[i].req_data.mask),
|
||||
.in_req_rw (lsu_dcache_if[i].req_data.rw),
|
||||
.in_req_byteen (lsu_dcache_if[i].req_data.byteen),
|
||||
.in_req_addr (lsu_dcache_if[i].req_data.addr),
|
||||
.in_req_atype (lsu_dcache_if[i].req_data.atype),
|
||||
.in_req_data (lsu_dcache_if[i].req_data.data),
|
||||
.in_req_tag (lsu_dcache_if[i].req_data.tag),
|
||||
.in_req_ready (lsu_dcache_if[i].req_ready),
|
||||
|
||||
// Input response
|
||||
.in_rsp_valid (lsu_dcache_if[i].rsp_valid),
|
||||
.in_rsp_mask (lsu_dcache_if[i].rsp_data.mask),
|
||||
.in_rsp_data (lsu_dcache_if[i].rsp_data.data),
|
||||
.in_rsp_tag (lsu_dcache_if[i].rsp_data.tag),
|
||||
.in_rsp_ready (lsu_dcache_if[i].rsp_ready),
|
||||
|
||||
// Output request
|
||||
.out_req_valid (dcache_coalesced_if[i].req_valid),
|
||||
.out_req_mask (dcache_coalesced_if[i].req_data.mask),
|
||||
.out_req_rw (dcache_coalesced_if[i].req_data.rw),
|
||||
.out_req_byteen (dcache_coalesced_if[i].req_data.byteen),
|
||||
.out_req_addr (dcache_coalesced_if[i].req_data.addr),
|
||||
.out_req_atype (dcache_coalesced_if[i].req_data.atype),
|
||||
.out_req_data (dcache_coalesced_if[i].req_data.data),
|
||||
.out_req_tag (dcache_coalesced_if[i].req_data.tag),
|
||||
.out_req_ready (dcache_coalesced_if[i].req_ready),
|
||||
|
||||
// Output response
|
||||
.out_rsp_valid (dcache_coalesced_if[i].rsp_valid),
|
||||
.out_rsp_mask (dcache_coalesced_if[i].rsp_data.mask),
|
||||
.out_rsp_data (dcache_coalesced_if[i].rsp_data.data),
|
||||
.out_rsp_tag (dcache_coalesced_if[i].rsp_data.tag),
|
||||
.out_rsp_ready (dcache_coalesced_if[i].rsp_ready)
|
||||
);
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
`ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if[i], lsu_dcache_if[i]);
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
`RESET_RELAY (lsu_adapter_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_bus_tmp_if[DCACHE_CHANNELS]();
|
||||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (DCACHE_CHANNELS),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH),
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (0)
|
||||
) lsu_adapter (
|
||||
.clk (clk),
|
||||
.reset (lsu_adapter_reset),
|
||||
.lsu_mem_if (dcache_coalesced_if[i]),
|
||||
.mem_bus_if (dcache_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]);
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle;
|
||||
|
@ -352,8 +229,8 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
wire [LSU_NUM_REQS-1:0] perf_dcache_wr_req_fire, perf_dcache_wr_req_fire_r;
|
||||
wire [LSU_NUM_REQS-1:0] perf_dcache_rsp_fire;
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_perf_dcache
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin : g_j
|
||||
assign perf_dcache_rd_req_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].req_valid && lsu_mem_if[i].req_data.mask[j] && lsu_mem_if[i].req_ready && ~lsu_mem_if[i].req_data.rw;
|
||||
assign perf_dcache_wr_req_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].req_valid && lsu_mem_if[i].req_data.mask[j] && lsu_mem_if[i].req_ready && lsu_mem_if[i].req_data.rw;
|
||||
assign perf_dcache_rsp_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].rsp_valid && lsu_mem_if[i].rsp_data.mask[j] && lsu_mem_if[i].rsp_ready;
|
||||
|
|
|
@ -32,7 +32,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr,
|
||||
output wire [DCACHE_NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] dcache_req_atype,
|
||||
output wire [DCACHE_NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] dcache_req_flags,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] dcache_req_tag,
|
||||
input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready,
|
||||
|
@ -96,7 +96,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw;
|
||||
assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen;
|
||||
assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr;
|
||||
assign dcache_req_atype[i] = dcache_bus_if[i].req_data.atype;
|
||||
assign dcache_req_flags[i] = dcache_bus_if[i].req_data.flags;
|
||||
assign dcache_req_data[i] = dcache_bus_if[i].req_data.data;
|
||||
assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag;
|
||||
assign dcache_bus_if[i].req_ready = dcache_req_ready[i];
|
||||
|
@ -119,7 +119,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
assign icache_req_data = icache_bus_if.req_data.data;
|
||||
assign icache_req_tag = icache_bus_if.req_data.tag;
|
||||
assign icache_bus_if.req_ready = icache_req_ready;
|
||||
`UNUSED_VAR (icache_bus_if.req_data.atype)
|
||||
`UNUSED_VAR (icache_bus_if.req_data.flags)
|
||||
|
||||
assign icache_bus_if.rsp_valid = icache_rsp_valid;
|
||||
assign icache_bus_if.rsp_data.tag = icache_rsp_tag;
|
||||
|
@ -144,6 +144,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
VX_core #(
|
||||
.INSTANCE_ID ($sformatf("core")),
|
||||
.CORE_ID (CORE_ID)
|
||||
) core (
|
||||
`SCOPE_IO_BIND (0)
|
||||
|
|
|
@ -26,13 +26,13 @@
|
|||
addr+12'h80 : dst = 32'(src[$bits(src)-1:32])
|
||||
`endif
|
||||
|
||||
|
||||
module VX_csr_data
|
||||
import VX_gpu_pkg::*;
|
||||
`ifdef EXT_F_ENABLE
|
||||
import VX_fpu_pkg::*;
|
||||
`endif
|
||||
#(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -83,7 +83,7 @@ import VX_fpu_pkg::*;
|
|||
wire [`NUM_FPU_BLOCKS-1:0][`NW_WIDTH-1:0] fpu_write_wid;
|
||||
fflags_t [`NUM_FPU_BLOCKS-1:0] fpu_write_fflags;
|
||||
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin : g_fpu_write
|
||||
assign fpu_write_enable[i] = fpu_csr_if[i].write_enable;
|
||||
assign fpu_write_wid[i] = fpu_csr_if[i].write_wid;
|
||||
assign fpu_write_fflags[i] = fpu_csr_if[i].write_fflags;
|
||||
|
@ -107,7 +107,7 @@ import VX_fpu_pkg::*;
|
|||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin : g_fpu_csr_read_frm
|
||||
assign fpu_csr_if[i].read_frm = fcsr[fpu_csr_if[i].read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS];
|
||||
end
|
||||
|
||||
|
@ -147,7 +147,7 @@ import VX_fpu_pkg::*;
|
|||
mscratch <= write_data;
|
||||
end
|
||||
default: begin
|
||||
`ASSERT(0, ("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid));
|
||||
`ASSERT(0, ("%t: *** %s invalid CSR write address: %0h (#%0d)", $time, INSTANCE_ID, write_addr, write_uuid));
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
@ -155,41 +155,41 @@ import VX_fpu_pkg::*;
|
|||
|
||||
// CSRs read //////////////////////////////////////////////////////////////
|
||||
|
||||
reg [`XLEN-1:0] read_data_ro_r;
|
||||
reg [`XLEN-1:0] read_data_rw_r;
|
||||
reg read_addr_valid_r;
|
||||
reg [`XLEN-1:0] read_data_ro_w;
|
||||
reg [`XLEN-1:0] read_data_rw_w;
|
||||
reg read_addr_valid_w;
|
||||
|
||||
always @(*) begin
|
||||
read_data_ro_r = '0;
|
||||
read_data_rw_r = '0;
|
||||
read_addr_valid_r = 1;
|
||||
read_data_ro_w = '0;
|
||||
read_data_rw_w = '0;
|
||||
read_addr_valid_w = 1;
|
||||
case (read_addr)
|
||||
`VX_CSR_MVENDORID : read_data_ro_r = `XLEN'(`VENDOR_ID);
|
||||
`VX_CSR_MARCHID : read_data_ro_r = `XLEN'(`ARCHITECTURE_ID);
|
||||
`VX_CSR_MIMPID : read_data_ro_r = `XLEN'(`IMPLEMENTATION_ID);
|
||||
`VX_CSR_MISA : read_data_ro_r = `XLEN'({2'(`CLOG2(`XLEN/16)), 30'(`MISA_STD)});
|
||||
`VX_CSR_MVENDORID : read_data_ro_w = `XLEN'(`VENDOR_ID);
|
||||
`VX_CSR_MARCHID : read_data_ro_w = `XLEN'(`ARCHITECTURE_ID);
|
||||
`VX_CSR_MIMPID : read_data_ro_w = `XLEN'(`IMPLEMENTATION_ID);
|
||||
`VX_CSR_MISA : read_data_ro_w = `XLEN'({2'(`CLOG2(`XLEN/16)), 30'(`MISA_STD)});
|
||||
`ifdef EXT_F_ENABLE
|
||||
`VX_CSR_FFLAGS : read_data_rw_r = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]);
|
||||
`VX_CSR_FRM : read_data_rw_r = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]);
|
||||
`VX_CSR_FCSR : read_data_rw_r = `XLEN'(fcsr[read_wid]);
|
||||
`VX_CSR_FFLAGS : read_data_rw_w = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]);
|
||||
`VX_CSR_FRM : read_data_rw_w = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]);
|
||||
`VX_CSR_FCSR : read_data_rw_w = `XLEN'(fcsr[read_wid]);
|
||||
`endif
|
||||
`VX_CSR_MSCRATCH : read_data_rw_r = mscratch;
|
||||
`VX_CSR_MSCRATCH : read_data_rw_w = mscratch;
|
||||
|
||||
`VX_CSR_WARP_ID : read_data_ro_r = `XLEN'(read_wid);
|
||||
`VX_CSR_CORE_ID : read_data_ro_r = `XLEN'(CORE_ID);
|
||||
`VX_CSR_ACTIVE_THREADS: read_data_ro_r = `XLEN'(thread_masks[read_wid]);
|
||||
`VX_CSR_ACTIVE_WARPS: read_data_ro_r = `XLEN'(active_warps);
|
||||
`VX_CSR_NUM_THREADS: read_data_ro_r = `XLEN'(`NUM_THREADS);
|
||||
`VX_CSR_NUM_WARPS : read_data_ro_r = `XLEN'(`NUM_WARPS);
|
||||
`VX_CSR_NUM_CORES : read_data_ro_r = `XLEN'(`NUM_CORES * `NUM_CLUSTERS);
|
||||
`VX_CSR_LOCAL_MEM_BASE: read_data_ro_r = `XLEN'(`LMEM_BASE_ADDR);
|
||||
`VX_CSR_WARP_ID : read_data_ro_w = `XLEN'(read_wid);
|
||||
`VX_CSR_CORE_ID : read_data_ro_w = `XLEN'(CORE_ID);
|
||||
`VX_CSR_ACTIVE_THREADS: read_data_ro_w = `XLEN'(thread_masks[read_wid]);
|
||||
`VX_CSR_ACTIVE_WARPS: read_data_ro_w = `XLEN'(active_warps);
|
||||
`VX_CSR_NUM_THREADS: read_data_ro_w = `XLEN'(`NUM_THREADS);
|
||||
`VX_CSR_NUM_WARPS : read_data_ro_w = `XLEN'(`NUM_WARPS);
|
||||
`VX_CSR_NUM_CORES : read_data_ro_w = `XLEN'(`NUM_CORES * `NUM_CLUSTERS);
|
||||
`VX_CSR_LOCAL_MEM_BASE: read_data_ro_w = `XLEN'(`LMEM_BASE_ADDR);
|
||||
|
||||
`CSR_READ_64(`VX_CSR_MCYCLE, read_data_ro_r, cycles);
|
||||
`CSR_READ_64(`VX_CSR_MCYCLE, read_data_ro_w, cycles);
|
||||
|
||||
`VX_CSR_MPM_RESERVED : read_data_ro_r = 'x;
|
||||
`VX_CSR_MPM_RESERVED_H : read_data_ro_r = 'x;
|
||||
`VX_CSR_MPM_RESERVED : read_data_ro_w = 'x;
|
||||
`VX_CSR_MPM_RESERVED_H : read_data_ro_w = 'x;
|
||||
|
||||
`CSR_READ_64(`VX_CSR_MINSTRET, read_data_ro_r, commit_csr_if.instret);
|
||||
`CSR_READ_64(`VX_CSR_MINSTRET, read_data_ro_w, commit_csr_if.instret);
|
||||
|
||||
`VX_CSR_SATP,
|
||||
`VX_CSR_MSTATUS,
|
||||
|
@ -200,77 +200,77 @@ import VX_fpu_pkg::*;
|
|||
`VX_CSR_MTVEC,
|
||||
`VX_CSR_MEPC,
|
||||
`VX_CSR_PMPCFG0,
|
||||
`VX_CSR_PMPADDR0 : read_data_ro_r = `XLEN'(0);
|
||||
`VX_CSR_PMPADDR0 : read_data_ro_w = `XLEN'(0);
|
||||
|
||||
default: begin
|
||||
read_addr_valid_r = 0;
|
||||
read_addr_valid_w = 0;
|
||||
if ((read_addr >= `VX_CSR_MPM_USER && read_addr < (`VX_CSR_MPM_USER + 32))
|
||||
|| (read_addr >= `VX_CSR_MPM_USER_H && read_addr < (`VX_CSR_MPM_USER_H + 32))) begin
|
||||
read_addr_valid_r = 1;
|
||||
read_addr_valid_w = 1;
|
||||
`ifdef PERF_ENABLE
|
||||
case (base_dcrs.mpm_class)
|
||||
`VX_DCR_MPM_CLASS_CORE: begin
|
||||
case (read_addr)
|
||||
// PERF: pipeline
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched_idles);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.ibf_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.scb_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_ALU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_w, pipeline_perf_if.sched.idles);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_w, pipeline_perf_if.sched.stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_w, pipeline_perf_if.issue.ibf_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_w, pipeline_perf_if.issue.scb_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_w, pipeline_perf_if.issue.opd_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_ALU]);
|
||||
`ifdef EXT_F_ENABLE
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_FPU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_FPU]);
|
||||
`else
|
||||
`VX_CSR_MPM_SCRB_FPU : read_data_ro_r = '0;
|
||||
`VX_CSR_MPM_SCRB_FPU_H : read_data_ro_r = '0;
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_w, `PERF_CTR_BITS'(0));
|
||||
`endif
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_LSU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_SFU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.sfu_uses[`SFU_CSRS]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.sfu_uses[`SFU_WCTL]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_LSU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_SFU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_w, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_w, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]);
|
||||
// PERF: memory
|
||||
`CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_r, pipeline_perf_if.ifetches);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_r, pipeline_perf_if.loads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_STORES, read_data_ro_r, pipeline_perf_if.stores);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IFETCH_LT, read_data_ro_r, pipeline_perf_if.ifetch_latency);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LOAD_LT, read_data_ro_r, pipeline_perf_if.load_latency);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_w, pipeline_perf_if.ifetches);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_w, pipeline_perf_if.loads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_STORES, read_data_ro_w, pipeline_perf_if.stores);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IFETCH_LT, read_data_ro_w, pipeline_perf_if.ifetch_latency);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LOAD_LT, read_data_ro_w, pipeline_perf_if.load_latency);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`VX_DCR_MPM_CLASS_MEM: begin
|
||||
case (read_addr)
|
||||
// PERF: icache
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_READS, read_data_ro_r, mem_perf_if.icache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_MISS_R, read_data_ro_r, mem_perf_if.icache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_MSHR_ST, read_data_ro_r, mem_perf_if.icache.mshr_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_READS, read_data_ro_w, mem_perf_if.icache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_MISS_R, read_data_ro_w, mem_perf_if.icache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_MSHR_ST, read_data_ro_w, mem_perf_if.icache.mshr_stalls);
|
||||
// PERF: dcache
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_READS, read_data_ro_r, mem_perf_if.dcache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_WRITES, read_data_ro_r, mem_perf_if.dcache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_R, read_data_ro_r, mem_perf_if.dcache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_W, read_data_ro_r, mem_perf_if.dcache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_BANK_ST, read_data_ro_r, mem_perf_if.dcache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MSHR_ST, read_data_ro_r, mem_perf_if.dcache.mshr_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_READS, read_data_ro_w, mem_perf_if.dcache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_WRITES, read_data_ro_w, mem_perf_if.dcache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_R, read_data_ro_w, mem_perf_if.dcache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_W, read_data_ro_w, mem_perf_if.dcache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_BANK_ST, read_data_ro_w, mem_perf_if.dcache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MSHR_ST, read_data_ro_w, mem_perf_if.dcache.mshr_stalls);
|
||||
// PERF: lmem
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_READS, read_data_ro_r, mem_perf_if.lmem.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_WRITES, read_data_ro_r, mem_perf_if.lmem.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_BANK_ST, read_data_ro_r, mem_perf_if.lmem.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_READS, read_data_ro_w, mem_perf_if.lmem.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_WRITES, read_data_ro_w, mem_perf_if.lmem.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_BANK_ST, read_data_ro_w, mem_perf_if.lmem.bank_stalls);
|
||||
// PERF: l2cache
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_READS, read_data_ro_r, mem_perf_if.l2cache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_WRITES, read_data_ro_r, mem_perf_if.l2cache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_R, read_data_ro_r, mem_perf_if.l2cache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_W, read_data_ro_r, mem_perf_if.l2cache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_BANK_ST, read_data_ro_r, mem_perf_if.l2cache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l2cache.mshr_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_READS, read_data_ro_w, mem_perf_if.l2cache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_WRITES, read_data_ro_w, mem_perf_if.l2cache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_R, read_data_ro_w, mem_perf_if.l2cache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_W, read_data_ro_w, mem_perf_if.l2cache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_BANK_ST, read_data_ro_w, mem_perf_if.l2cache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MSHR_ST, read_data_ro_w, mem_perf_if.l2cache.mshr_stalls);
|
||||
// PERF: l3cache
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_READS, read_data_ro_r, mem_perf_if.l3cache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_WRITES, read_data_ro_r, mem_perf_if.l3cache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_R, read_data_ro_r, mem_perf_if.l3cache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_W, read_data_ro_r, mem_perf_if.l3cache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_BANK_ST, read_data_ro_r, mem_perf_if.l3cache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l3cache.mshr_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_READS, read_data_ro_w, mem_perf_if.l3cache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_WRITES, read_data_ro_w, mem_perf_if.l3cache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_R, read_data_ro_w, mem_perf_if.l3cache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_W, read_data_ro_w, mem_perf_if.l3cache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_BANK_ST, read_data_ro_w, mem_perf_if.l3cache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MSHR_ST, read_data_ro_w, mem_perf_if.l3cache.mshr_stalls);
|
||||
// PERF: memory
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_READS, read_data_ro_r, mem_perf_if.mem.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_r, mem_perf_if.mem.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_r, mem_perf_if.mem.latency);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_READS, read_data_ro_w, mem_perf_if.mem.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_w, mem_perf_if.mem.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_w, mem_perf_if.mem.latency);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
@ -282,12 +282,12 @@ import VX_fpu_pkg::*;
|
|||
endcase
|
||||
end
|
||||
|
||||
assign read_data_ro = read_data_ro_r;
|
||||
assign read_data_rw = read_data_rw_r;
|
||||
assign read_data_ro = read_data_ro_w;
|
||||
assign read_data_rw = read_data_rw_w;
|
||||
|
||||
`UNUSED_VAR (base_dcrs)
|
||||
|
||||
`RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid))
|
||||
`RUNTIME_ASSERT(~read_enable || read_addr_valid_w, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
`UNUSED_VAR (mem_perf_if.icache);
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_unit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
|
@ -36,7 +37,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
VX_execute_if.slave execute_if,
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
|
@ -65,14 +66,15 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] rs1_data;
|
||||
`UNUSED_VAR (rs1_data)
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_rs1_data
|
||||
assign rs1_data[i] = execute_if.data.rs1_data[i];
|
||||
end
|
||||
|
||||
wire csr_write_enable = (execute_if.data.op_type == `INST_SFU_CSRRW);
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.CORE_ID (CORE_ID)
|
||||
) csr_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -111,12 +113,15 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] wtid, gtid;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
if (PID_BITS != 0) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_wtid
|
||||
if (PID_BITS != 0) begin : g_pid
|
||||
assign wtid[i] = `XLEN'(execute_if.data.pid * NUM_LANES + i);
|
||||
end else begin
|
||||
end else begin : g_no_pid
|
||||
assign wtid[i] = `XLEN'(i);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_gtid
|
||||
assign gtid[i] = (`XLEN'(CORE_ID) << (`NW_BITS + `NT_BITS)) + (`XLEN'(execute_if.data.wid) << `NT_BITS) + wtid[i];
|
||||
end
|
||||
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_dcr_data import VX_gpu_pkg::*; (
|
||||
input wire clk,
|
||||
|
@ -51,9 +50,9 @@ module VX_dcr_data import VX_gpu_pkg::*; (
|
|||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (dcr_bus_if.write_valid) begin
|
||||
`TRACE(1, ("%d: base-dcr: state=", $time));
|
||||
`TRACE(1, ("%t: base-dcr: state=", $time))
|
||||
trace_base_dcr(1, dcr_bus_if.write_addr);
|
||||
`TRACE(1, (", data=0x%0h\n", dcr_bus_if.write_data));
|
||||
`TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -12,24 +12,23 @@
|
|||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define USED_IREG(x) \
|
||||
x``_r = {1'b0, ``x}; \
|
||||
x``_v = {1'b0, ``x}; \
|
||||
use_``x = 1
|
||||
|
||||
`define USED_FREG(x) \
|
||||
x``_r = {1'b1, ``x}; \
|
||||
x``_v = {1'b1, ``x}; \
|
||||
use_``x = 1
|
||||
`else
|
||||
`define USED_IREG(x) \
|
||||
x``_r = ``x; \
|
||||
x``_v = ``x; \
|
||||
use_``x = 1
|
||||
`endif
|
||||
|
||||
module VX_decode import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -44,14 +43,14 @@ module VX_decode import VX_gpu_pkg::*; #(
|
|||
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + (`NR_BITS * 4);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`INST_OP_BITS-1:0] op_type;
|
||||
op_args_t op_args;
|
||||
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [`NR_BITS-1:0] rd_v, rs1_v, rs2_v, rs3_v;
|
||||
reg use_rd, use_rs1, use_rs2, use_rs3;
|
||||
reg is_wstall;
|
||||
|
||||
|
@ -145,15 +144,21 @@ module VX_decode import VX_gpu_pkg::*; #(
|
|||
end
|
||||
`endif
|
||||
|
||||
`STATIC_ASSERT($bits(alu_args_t) == $bits(op_args_t), ("alu_args_t size mismatch: current=%0d, expected=%0d", $bits(alu_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(fpu_args_t) == $bits(op_args_t), ("fpu_args_t size mismatch: current=%0d, expected=%0d", $bits(fpu_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(lsu_args_t) == $bits(op_args_t), ("lsu_args_t size mismatch: current=%0d, expected=%0d", $bits(lsu_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(csr_args_t) == $bits(op_args_t), ("csr_args_t size mismatch: current=%0d, expected=%0d", $bits(csr_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(wctl_args_t) == $bits(op_args_t), ("wctl_args_t size mismatch: current=%0d, expected=%0d", $bits(wctl_args_t), $bits(op_args_t)));
|
||||
|
||||
always @(*) begin
|
||||
|
||||
ex_type = '0;
|
||||
ex_type = 'x;
|
||||
op_type = 'x;
|
||||
op_args = 'x;
|
||||
rd_r = '0;
|
||||
rs1_r = '0;
|
||||
rs2_r = '0;
|
||||
rs3_r = '0;
|
||||
rd_v = '0;
|
||||
rs1_v = '0;
|
||||
rs2_v = '0;
|
||||
rs3_v = '0;
|
||||
use_rd = 0;
|
||||
use_rs1 = 0;
|
||||
use_rs2 = 0;
|
||||
|
@ -371,14 +376,16 @@ module VX_decode import VX_gpu_pkg::*; #(
|
|||
`USED_IREG (rs2);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FMADD,
|
||||
`INST_FMSUB,
|
||||
`INST_FNMSUB,
|
||||
`INST_FNMADD: begin
|
||||
`INST_FMADD, // 7'b1000011
|
||||
`INST_FMSUB, // 7'b1000111
|
||||
`INST_FNMSUB, // 7'b1001011
|
||||
`INST_FNMADD: // 7'b1001111
|
||||
begin
|
||||
ex_type = `EX_FPU;
|
||||
op_type = `INST_OP_BITS'({2'b11, opcode[3:2]});
|
||||
op_type = `INST_OP_BITS'({2'b00, 1'b1, opcode[3]});
|
||||
op_args.fpu.frm = func3;
|
||||
op_args.fpu.fmt[0] = func2[0]; // float / double
|
||||
op_args.fpu.fmt[1] = opcode[3] ^ opcode[2]; // SUB
|
||||
use_rd = 1;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
|
@ -394,9 +401,10 @@ module VX_decode import VX_gpu_pkg::*; #(
|
|||
case (func5)
|
||||
5'b00000, // FADD
|
||||
5'b00001, // FSUB
|
||||
5'b00010, // FMUL
|
||||
5'b00011: begin // FDIV
|
||||
op_type = `INST_OP_BITS'(func5[1:0]);
|
||||
5'b00010: // FMUL
|
||||
begin
|
||||
op_type = `INST_OP_BITS'({2'b00, 1'b0, func5[1]});
|
||||
op_args.fpu.fmt[1] = func5[0]; // SUB
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
|
@ -425,6 +433,13 @@ module VX_decode import VX_gpu_pkg::*; #(
|
|||
`USED_FREG (rs1);
|
||||
end
|
||||
`endif
|
||||
5'b00011: begin
|
||||
// FDIV
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_DIV);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
5'b01011: begin
|
||||
// FSQRT
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
|
||||
|
@ -522,7 +537,7 @@ module VX_decode import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
// disable write to integer register r0
|
||||
wire wb = use_rd && (rd_r != 0);
|
||||
wire wb = use_rd && (rd_v != 0);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
|
@ -532,7 +547,7 @@ module VX_decode import VX_gpu_pkg::*; #(
|
|||
.reset (reset),
|
||||
.valid_in (fetch_if.valid),
|
||||
.ready_in (fetch_if.ready),
|
||||
.data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_args, wb, rd_r, rs1_r, rs2_r, rs3_r}),
|
||||
.data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_args, wb, rd_v, rs1_v, rs2_v, rs3_v}),
|
||||
.data_out ({decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3}),
|
||||
.valid_out (decode_if.valid),
|
||||
.ready_out (decode_if.ready)
|
||||
|
@ -542,9 +557,10 @@ module VX_decode import VX_gpu_pkg::*; #(
|
|||
|
||||
wire fetch_fire = fetch_if.valid && fetch_if.ready;
|
||||
|
||||
assign decode_sched_if.valid = fetch_fire;
|
||||
assign decode_sched_if.wid = fetch_if.data.wid;
|
||||
assign decode_sched_if.is_wstall = is_wstall;
|
||||
assign decode_sched_if.valid = fetch_fire;
|
||||
assign decode_sched_if.wid = fetch_if.data.wid;
|
||||
assign decode_sched_if.unlock = ~is_wstall;
|
||||
|
||||
`ifndef L1_ENABLE
|
||||
assign fetch_if.ibuf_pop = decode_if.ibuf_pop;
|
||||
`endif
|
||||
|
@ -552,14 +568,14 @@ module VX_decode import VX_gpu_pkg::*; #(
|
|||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
`TRACE(1, ("%d: core%0d-decode: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, CORE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr));
|
||||
`TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr))
|
||||
trace_ex_type(1, decode_if.data.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
`TRACE(1, (", op="))
|
||||
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, opds=%b%b%b%b",
|
||||
decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3));
|
||||
decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3))
|
||||
trace_op_args(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", decode_if.data.uuid));
|
||||
`TRACE(1, (" (#%0d)\n", decode_if.data.uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -12,10 +12,9 @@
|
|||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -24,118 +23,81 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
output wire [`PERF_CTR_BITS-1:0] perf_stalls [`NUM_EX_UNITS],
|
||||
`endif
|
||||
// inputs
|
||||
VX_operands_if.slave operands_if [`ISSUE_WIDTH],
|
||||
VX_operands_if.slave operands_if,
|
||||
|
||||
// outputs
|
||||
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH]
|
||||
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`NT_WIDTH-1:0] tids;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin : g_tids
|
||||
assign tids[i] = `NT_WIDTH'(i);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
wire [`NT_WIDTH-1:0] last_active_tid;
|
||||
|
||||
wire [`NT_WIDTH-1:0] last_active_tid;
|
||||
VX_find_first #(
|
||||
.N (`NUM_THREADS),
|
||||
.DATAW (`NT_WIDTH),
|
||||
.REVERSE (1)
|
||||
) last_tid_select (
|
||||
.valid_in (operands_if.data.tmask),
|
||||
.data_in (tids),
|
||||
.data_out (last_active_tid),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
VX_find_first #(
|
||||
.N (`NUM_THREADS),
|
||||
.DATAW (`NT_WIDTH),
|
||||
.REVERSE (1)
|
||||
) last_tid_select (
|
||||
.valid_in (operands_if[i].data.tmask),
|
||||
.data_in (tids),
|
||||
.data_out (last_active_tid),
|
||||
`UNUSED_PIN (valid_out)
|
||||
wire [`NUM_EX_UNITS-1:0] operands_ready_in;
|
||||
assign operands_if.ready = operands_ready_in[operands_if.data.ex_type];
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_buffers
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (1)
|
||||
) buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (operands_if.valid && (operands_if.data.ex_type == `EX_BITS'(i))),
|
||||
.ready_in (operands_ready_in[i]),
|
||||
.data_in ({
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.wis,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.PC,
|
||||
operands_if.data.op_type,
|
||||
operands_if.data.op_args,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.rd,
|
||||
last_active_tid,
|
||||
operands_if.data.rs1_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs3_data
|
||||
}),
|
||||
.data_out (dispatch_if[i].data),
|
||||
.valid_out (dispatch_if[i].valid),
|
||||
.ready_out (dispatch_if[i].ready)
|
||||
);
|
||||
|
||||
wire [`NUM_EX_UNITS-1:0] operands_reset;
|
||||
|
||||
`RESET_RELAY (buf_reset, reset);
|
||||
|
||||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) buffer (
|
||||
.clk (clk),
|
||||
.reset (buf_reset),
|
||||
.valid_in (operands_if[i].valid && (operands_if[i].data.ex_type == j)),
|
||||
.ready_in (operands_reset[j]),
|
||||
.data_in (`TO_DISPATCH_DATA(operands_if[i].data, last_active_tid)),
|
||||
.data_out (dispatch_if[j * `ISSUE_WIDTH + i].data),
|
||||
.valid_out (dispatch_if[j * `ISSUE_WIDTH + i].valid),
|
||||
.ready_out (dispatch_if[j * `ISSUE_WIDTH + i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
assign operands_if[i].ready = operands_reset[operands_if[i].data.ex_type];
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [`NUM_EX_UNITS-1:0] perf_unit_stalls_per_cycle, perf_unit_stalls_per_cycle_r;
|
||||
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_stalls_per_cycle;
|
||||
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_r;
|
||||
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(*) begin
|
||||
perf_issue_unit_stalls_per_cycle[i] = '0;
|
||||
if (operands_if[i].valid && ~operands_if[i].ready) begin
|
||||
perf_issue_unit_stalls_per_cycle[i][operands_if[i].data.ex_type] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
wire operands_if_stall = operands_if.valid && ~operands_if.ready;
|
||||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (`NUM_EX_UNITS),
|
||||
.N (`ISSUE_WIDTH),
|
||||
.OP ("|")
|
||||
) reduce (
|
||||
.data_in (perf_issue_unit_stalls_per_cycle),
|
||||
.data_out (perf_unit_stalls_per_cycle)
|
||||
);
|
||||
|
||||
`BUFFER(perf_unit_stalls_per_cycle_r, perf_unit_stalls_per_cycle);
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_perf_stalls
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_stalls_r[i] <= '0;
|
||||
end else begin
|
||||
perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(perf_unit_stalls_per_cycle_r[i]);
|
||||
perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(operands_if_stall && operands_if.data.ex_type == `EX_BITS'(i));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i=0; i < `NUM_EX_UNITS; ++i) begin
|
||||
assign perf_stalls[i] = perf_stalls_r[i];
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (operands_if[i].valid && operands_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, wis_to_wid(operands_if[i].data.wis, i), {operands_if[i].data.PC, 1'b0}));
|
||||
trace_ex_type(1, operands_if[i].data.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
trace_ex_op(1, operands_if[i].data.ex_type, operands_if[i].data.op_type, operands_if[i].data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if[i].data.tmask, operands_if[i].data.wb, operands_if[i].data.rd));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs1_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs2_data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs2_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs3_data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs3_data, `NUM_THREADS);
|
||||
trace_op_args(1, operands_if[i].data.ex_type, operands_if[i].data.op_type, operands_if[i].data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if[i].data.uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -40,7 +40,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH);
|
||||
localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN);
|
||||
localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam FANOUT_ENABLE= (`NUM_THREADS > MAX_FANOUT);
|
||||
localparam FANOUT_ENABLE= (`NUM_THREADS > (MAX_FANOUT + MAX_FANOUT /2));
|
||||
|
||||
localparam DATA_TMASK_OFF = IN_DATAW - (`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS);
|
||||
localparam DATA_REGS_OFF = 0;
|
||||
|
@ -49,13 +49,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
wire [`ISSUE_WIDTH-1:0][IN_DATAW-1:0] dispatch_data;
|
||||
wire [`ISSUE_WIDTH-1:0] dispatch_ready;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_dispatch_data
|
||||
assign dispatch_valid[i] = dispatch_if[i].valid;
|
||||
assign dispatch_data[i] = dispatch_if[i].data;
|
||||
assign dispatch_if[i].ready = dispatch_ready[i];
|
||||
end
|
||||
|
||||
wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices;
|
||||
wire [BLOCK_SIZE-1:0] block_ready;
|
||||
wire [BLOCK_SIZE-1:0][NUM_LANES-1:0] block_tmask;
|
||||
wire [BLOCK_SIZE-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] block_regs;
|
||||
|
@ -66,28 +65,53 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
wire batch_done = (& block_done);
|
||||
|
||||
// batch select logic
|
||||
|
||||
logic [BATCH_COUNT_W-1:0] batch_idx;
|
||||
if (BATCH_COUNT != 1) begin
|
||||
|
||||
if (BATCH_COUNT != 1) begin : g_batch_idx
|
||||
wire [BATCH_COUNT_W-1:0] batch_idx_n;
|
||||
wire [BATCH_COUNT-1:0] valid_batches;
|
||||
for (genvar i = 0; i < BATCH_COUNT; ++i) begin : g_valid_batches
|
||||
assign valid_batches[i] = | dispatch_valid[i * BLOCK_SIZE +: BLOCK_SIZE];
|
||||
end
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (BATCH_COUNT),
|
||||
.TYPE ("P")
|
||||
) batch_sel (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_batches),
|
||||
.grant_index (batch_idx_n),
|
||||
`UNUSED_PIN (grant_onehot),
|
||||
`UNUSED_PIN (grant_valid),
|
||||
.grant_ready (batch_done)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
batch_idx <= '0;
|
||||
end else begin
|
||||
batch_idx <= batch_idx + BATCH_COUNT_W'(batch_done);
|
||||
end else if (batch_done) begin
|
||||
batch_idx <= batch_idx_n;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
end else begin : g_batch_idx_0
|
||||
assign batch_idx = 0;
|
||||
`UNUSED_VAR (batch_done)
|
||||
end
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices;
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_issue_indices
|
||||
assign issue_indices[block_idx] = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx);
|
||||
end
|
||||
|
||||
wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx);
|
||||
assign issue_indices[block_idx] = issue_idx;
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_blocks
|
||||
|
||||
wire [ISSUE_W-1:0] issue_idx = issue_indices[block_idx];
|
||||
wire valid_p, ready_p;
|
||||
|
||||
if (`NUM_THREADS != NUM_LANES) begin
|
||||
if (`NUM_THREADS > NUM_LANES) begin : g_partial_threads
|
||||
reg [NUM_PACKETS-1:0] sent_mask_p;
|
||||
wire [PID_WIDTH-1:0] start_p_n, start_p, end_p;
|
||||
wire dispatch_valid_r;
|
||||
|
@ -122,8 +146,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data = dispatch_data[issue_idx][DATA_REGS_OFF + 1 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data = dispatch_data[issue_idx][DATA_REGS_OFF + 0 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin
|
||||
for (genvar j = 0; j < NUM_LANES; ++j) begin
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_per_packet_data
|
||||
for (genvar j = 0; j < NUM_LANES; ++j) begin : g_j
|
||||
localparam k = i * NUM_LANES + j;
|
||||
assign per_packet_tmask[i][j] = dispatch_tmask[k];
|
||||
assign per_packet_regs[i][0][j] = dispatch_rs1_data[k];
|
||||
|
@ -133,10 +157,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
wire [NUM_PACKETS-1:0] packet_valids;
|
||||
wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids;
|
||||
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_packet_valids
|
||||
assign packet_valids[i] = (| per_packet_tmask[i]);
|
||||
end
|
||||
|
||||
wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids;
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_packet_ids
|
||||
assign packet_ids[i] = PID_WIDTH'(i);
|
||||
end
|
||||
|
||||
|
@ -185,13 +211,13 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
assign block_pid[block_idx] = start_p;
|
||||
assign block_sop[block_idx] = is_first_p;
|
||||
assign block_eop[block_idx] = is_last_p;
|
||||
if (FANOUT_ENABLE) begin
|
||||
if (FANOUT_ENABLE) begin : g_block_ready_fanout
|
||||
assign block_ready[block_idx] = dispatch_valid_r && ready_p && block_enable;
|
||||
end else begin
|
||||
end else begin : g_block_ready
|
||||
assign block_ready[block_idx] = ready_p && block_enable;
|
||||
end
|
||||
assign block_done[block_idx] = ~dispatch_valid[issue_idx] || fire_eop;
|
||||
end else begin
|
||||
assign block_done[block_idx] = fire_eop || ~dispatch_valid[issue_idx];
|
||||
end else begin : g_full_threads
|
||||
assign valid_p = dispatch_valid[issue_idx];
|
||||
assign block_tmask[block_idx] = dispatch_data[issue_idx][DATA_TMASK_OFF +: `NUM_THREADS];
|
||||
assign block_regs[block_idx][0] = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
|
@ -201,31 +227,31 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
assign block_sop[block_idx] = 1'b1;
|
||||
assign block_eop[block_idx] = 1'b1;
|
||||
assign block_ready[block_idx] = ready_p;
|
||||
assign block_done[block_idx] = ~valid_p || ready_p;
|
||||
assign block_done[block_idx] = ready_p || ~valid_p;
|
||||
end
|
||||
|
||||
wire [ISSUE_ISW_W-1:0] isw;
|
||||
if (BATCH_COUNT != 1) begin
|
||||
if (BLOCK_SIZE != 1) begin
|
||||
if (BATCH_COUNT != 1) begin : g_isw_batch
|
||||
if (BLOCK_SIZE != 1) begin : g_block
|
||||
assign isw = {batch_idx, BLOCK_SIZE_W'(block_idx)};
|
||||
end else begin
|
||||
end else begin : g_no_block
|
||||
assign isw = batch_idx;
|
||||
end
|
||||
end else begin
|
||||
end else begin : g_isw
|
||||
assign isw = block_idx;
|
||||
end
|
||||
|
||||
`RESET_RELAY(buf_out_reset, reset);
|
||||
|
||||
wire [`NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_data[issue_idx][DATA_TMASK_OFF+`NUM_THREADS +: ISSUE_WIS_W], isw);
|
||||
|
||||
logic [OUT_DATAW-1:0] execute_data, execute_data_w;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (OUT_DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) buf_out (
|
||||
.clk (clk),
|
||||
.reset (buf_out_reset),
|
||||
.reset (reset),
|
||||
.valid_in (valid_p),
|
||||
.ready_in (ready_p),
|
||||
.data_in ({
|
||||
|
@ -239,17 +265,27 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
block_pid[block_idx],
|
||||
block_sop[block_idx],
|
||||
block_eop[block_idx]}),
|
||||
.data_out (execute_if[block_idx].data),
|
||||
.data_out (execute_data),
|
||||
.valid_out (execute_if[block_idx].valid),
|
||||
.ready_out (execute_if[block_idx].ready)
|
||||
);
|
||||
|
||||
if (`NUM_THREADS != NUM_LANES) begin : g_execute_data_w_partial
|
||||
assign execute_data_w = execute_data;
|
||||
end else begin : g_execute_data_w_full
|
||||
always @(*) begin
|
||||
execute_data_w = execute_data;
|
||||
execute_data_w[2:0] = {1'b0, 1'b1, 1'b1}; // default pid, sop, and eop
|
||||
end
|
||||
end
|
||||
assign execute_if[block_idx].data = execute_data_w;
|
||||
end
|
||||
|
||||
reg [`ISSUE_WIDTH-1:0] ready_in;
|
||||
always @(*) begin
|
||||
ready_in = 0;
|
||||
for (integer i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
ready_in[issue_indices[i]] = block_ready[i] && block_eop[i];
|
||||
for (integer block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
ready_in[issue_indices[block_idx]] = block_ready[block_idx] && block_eop[block_idx];
|
||||
end
|
||||
end
|
||||
assign dispatch_ready = ready_in;
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_execute import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
@ -50,41 +51,35 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
VX_fpu_csr_if fpu_csr_if[`NUM_FPU_BLOCKS]();
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (alu_reset, reset);
|
||||
`RESET_RELAY (lsu_reset, reset);
|
||||
`RESET_RELAY (sfu_reset, reset);
|
||||
|
||||
VX_alu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID))
|
||||
) alu_unit (
|
||||
.clk (clk),
|
||||
.reset (alu_reset),
|
||||
.reset (reset),
|
||||
.dispatch_if (dispatch_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.branch_ctl_if (branch_ctl_if)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (1)
|
||||
`SCOPE_IO_SWITCH (1);
|
||||
|
||||
VX_lsu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID))
|
||||
) lsu_unit (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (lsu_reset),
|
||||
.reset (reset),
|
||||
.dispatch_if (dispatch_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.lsu_mem_if (lsu_mem_if)
|
||||
);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`RESET_RELAY (fpu_reset, reset);
|
||||
|
||||
VX_fpu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID))
|
||||
) fpu_unit (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.reset (reset),
|
||||
.dispatch_if (dispatch_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.fpu_csr_if (fpu_csr_if)
|
||||
|
@ -92,10 +87,11 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
VX_sfu_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-sfu", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID)
|
||||
) sfu_unit (
|
||||
.clk (clk),
|
||||
.reset (sfu_reset),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
.pipeline_perf_if (pipeline_perf_if),
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fetch import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -30,7 +30,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
// outputs
|
||||
VX_fetch_if.master fetch_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire icache_req_valid;
|
||||
|
@ -56,9 +56,10 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
.LUTRAM (1)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (icache_req_fire),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (req_tag),
|
||||
.wdata ({schedule_if.data.PC, schedule_if.data.tmask}),
|
||||
.raddr (rsp_tag),
|
||||
|
@ -70,7 +71,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
// This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache requests.
|
||||
// This issue is particularly prevalent when the icache and dcache are disabled and both requests share the same bus.
|
||||
wire [`NUM_WARPS-1:0] pending_ibuf_full;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_pending_reads
|
||||
VX_pending_size #(
|
||||
.SIZE (`IBUF_SIZE)
|
||||
) pending_reads (
|
||||
|
@ -78,9 +79,11 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
.reset (reset),
|
||||
.incr (icache_req_fire && schedule_if.data.wid == i),
|
||||
.decr (fetch_if.ibuf_pop[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (pending_ibuf_full[i]),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
end
|
||||
wire ibuf_ready = ~pending_ibuf_full[schedule_if.data.wid];
|
||||
|
@ -89,7 +92,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
|
||||
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, {schedule_if.data.PC, 1'b0}, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
("%t: *** %s invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, INSTANCE_ID, {schedule_if.data.PC, 1'b0}, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
|
||||
// Icache Request
|
||||
|
||||
|
@ -113,9 +116,9 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
.ready_out (icache_bus_if.req_ready)
|
||||
);
|
||||
|
||||
assign icache_bus_if.req_data.atype = '0;
|
||||
assign icache_bus_if.req_data.flags = '0;
|
||||
assign icache_bus_if.req_data.rw = 0;
|
||||
assign icache_bus_if.req_data.byteen = 4'b1111;
|
||||
assign icache_bus_if.req_data.byteen = '1;
|
||||
assign icache_bus_if.req_data.data = '0;
|
||||
|
||||
// Icache Response
|
||||
|
@ -128,59 +131,57 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
assign fetch_if.data.uuid = rsp_uuid;
|
||||
assign icache_bus_if.rsp_ready = fetch_if.ready;
|
||||
|
||||
`ifdef SCOPE
|
||||
`ifdef DBG_SCOPE_FETCH
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (1),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
|
||||
ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
|
||||
(ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
schedule_fire,
|
||||
icache_req_fire,
|
||||
icache_rsp_fire
|
||||
}),
|
||||
.probes({
|
||||
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
|
||||
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
|
||||
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
|
||||
}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_fetch ila_fetch_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({reset, schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, schedule_if.ready, schedule_if.valid}),
|
||||
.probe1 ({icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.req_ready, icache_bus_if.req_valid}),
|
||||
.probe2 ({icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
`SCOPE_IO_SWITCH (1);
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire icache_bus_req_fire = icache_bus_if.req_valid && icache_bus_if.req_ready;
|
||||
wire icache_bus_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
wire [`UUID_WIDTH-1:0] icache_bus_req_uuid = icache_bus_if.req_data.tag[ICACHE_TAG_WIDTH-1 -: `UUID_WIDTH];
|
||||
wire [`UUID_WIDTH-1:0] icache_bus_rsp_uuid = icache_bus_if.rsp_data.tag[ICACHE_TAG_WIDTH-1 -: `UUID_WIDTH];
|
||||
`NEG_EDGE (reset_negedge, reset);
|
||||
`SCOPE_TAP_EX (0, 1, 6, 3, (
|
||||
`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
|
||||
`UUID_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
|
||||
`UUID_WIDTH + (ICACHE_WORD_SIZE * 8)
|
||||
), {
|
||||
schedule_if.valid,
|
||||
schedule_if.ready,
|
||||
icache_bus_if.req_valid,
|
||||
icache_bus_if.req_ready,
|
||||
icache_bus_if.rsp_valid,
|
||||
icache_bus_if.rsp_ready
|
||||
}, {
|
||||
schedule_fire,
|
||||
icache_bus_req_fire,
|
||||
icache_bus_rsp_fire
|
||||
},{
|
||||
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
|
||||
icache_bus_req_uuid, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
|
||||
icache_bus_rsp_uuid, icache_bus_if.rsp_data.data
|
||||
},
|
||||
reset_negedge, 1'b0, 4096
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_fetch ila_fetch_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({schedule_if.valid, schedule_if.data, schedule_if.ready}),
|
||||
.probe1 ({icache_bus_if.req_valid, icache_bus_if.req_data, icache_bus_if.req_ready}),
|
||||
.probe2 ({icache_bus_if.rsp_valid, icache_bus_if.rsp_data, icache_bus_if.rsp_ready})
|
||||
);
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_MEM
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire fetch_fire = fetch_if.valid && fetch_if.ready;
|
||||
always @(posedge clk) begin
|
||||
if (schedule_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid));
|
||||
if (schedule_if.valid && schedule_if.ready) begin
|
||||
`TRACE(1, ("%t: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
end
|
||||
if (fetch_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
|
||||
if (fetch_if.valid && fetch_if.ready) begin
|
||||
`TRACE(1, ("%t: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_unit import VX_fpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -26,7 +26,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_fpu_csr_if.master fpu_csr_if[`NUM_FPU_BLOCKS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = `NUM_FPU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_FPU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
|
@ -41,7 +41,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 0)
|
||||
.OUT_BUF (PARTIAL_BW ? 3 : 0)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -53,12 +53,10 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_fpus
|
||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.tid)
|
||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.wb)
|
||||
|
||||
`RESET_RELAY (block_reset, reset);
|
||||
|
||||
// Store request info
|
||||
wire fpu_req_valid, fpu_req_ready;
|
||||
wire fpu_rsp_valid, fpu_rsp_ready;
|
||||
|
@ -71,9 +69,9 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
wire [NUM_LANES-1:0] fpu_rsp_tmask;
|
||||
wire [`PC_BITS-1:0] fpu_rsp_PC;
|
||||
wire [`NR_BITS-1:0] fpu_rsp_rd;
|
||||
wire [PID_WIDTH-1:0] fpu_rsp_pid;
|
||||
wire fpu_rsp_sop;
|
||||
wire fpu_rsp_eop;
|
||||
wire [PID_WIDTH-1:0] fpu_rsp_pid, fpu_rsp_pid_u;
|
||||
wire fpu_rsp_sop, fpu_rsp_sop_u;
|
||||
wire fpu_rsp_eop, fpu_rsp_eop_u;
|
||||
|
||||
wire [TAG_WIDTH-1:0] fpu_req_tag, fpu_rsp_tag;
|
||||
wire mdata_full;
|
||||
|
@ -89,17 +87,30 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.SIZE (`FPUQ_SIZE)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.reset (reset),
|
||||
.acquire_en (execute_fire),
|
||||
.write_addr (fpu_req_tag),
|
||||
.write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}),
|
||||
.read_data ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),
|
||||
.read_data ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_pid_u, fpu_rsp_sop_u, fpu_rsp_eop_u}),
|
||||
.read_addr (fpu_rsp_tag),
|
||||
.release_en (fpu_rsp_fire),
|
||||
.full (mdata_full),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
if (PID_BITS != 0) begin : g_fpu_rsp_pid
|
||||
assign fpu_rsp_pid = fpu_rsp_pid_u;
|
||||
assign fpu_rsp_sop = fpu_rsp_sop_u;
|
||||
assign fpu_rsp_eop = fpu_rsp_eop_u;
|
||||
end else begin : g_no_fpu_rsp_pid
|
||||
`UNUSED_VAR (fpu_rsp_pid_u)
|
||||
`UNUSED_VAR (fpu_rsp_sop_u)
|
||||
`UNUSED_VAR (fpu_rsp_eop_u)
|
||||
assign fpu_rsp_pid = 0;
|
||||
assign fpu_rsp_sop = 1;
|
||||
assign fpu_rsp_eop = 1;
|
||||
end
|
||||
|
||||
// resolve dynamic FRM from CSR
|
||||
wire [`INST_FRM_BITS-1:0] fpu_req_frm;
|
||||
`ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].read_wid, per_block_execute_if[block_idx].data.wid, block_idx, `NUM_FPU_BLOCKS)
|
||||
|
@ -111,8 +122,6 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
assign fpu_req_valid = per_block_execute_if[block_idx].valid && ~mdata_full;
|
||||
assign per_block_execute_if[block_idx].ready = fpu_req_ready && ~mdata_full;
|
||||
|
||||
`RESET_RELAY (fpu_reset, block_reset);
|
||||
|
||||
`ifdef FPU_DPI
|
||||
|
||||
VX_fpu_dpi #(
|
||||
|
@ -121,7 +130,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_dpi (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
|
@ -150,7 +159,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_fpnew (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
|
@ -179,7 +188,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_dsp (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
|
@ -202,27 +211,38 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
`endif
|
||||
|
||||
// handle FPU response
|
||||
|
||||
// handle CSR update
|
||||
fflags_t fpu_rsp_fflags_q;
|
||||
|
||||
if (PID_BITS != 0) begin
|
||||
if (PID_BITS != 0) begin : g_pid
|
||||
fflags_t fpu_rsp_fflags_r;
|
||||
always @(posedge clk) begin
|
||||
if (block_reset) begin
|
||||
if (reset) begin
|
||||
fpu_rsp_fflags_r <= '0;
|
||||
end else if (fpu_rsp_fire) begin
|
||||
fpu_rsp_fflags_r <= fpu_rsp_eop ? '0 : (fpu_rsp_fflags_r | fpu_rsp_fflags);
|
||||
end
|
||||
end
|
||||
assign fpu_rsp_fflags_q = fpu_rsp_fflags_r | fpu_rsp_fflags;
|
||||
end else begin
|
||||
end else begin : g_no_pid
|
||||
assign fpu_rsp_fflags_q = fpu_rsp_fflags;
|
||||
end
|
||||
|
||||
assign fpu_csr_if[block_idx].write_enable = fpu_rsp_fire && fpu_rsp_eop && fpu_rsp_has_fflags;
|
||||
`ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].write_wid, fpu_rsp_wid, block_idx, `NUM_FPU_BLOCKS)
|
||||
assign fpu_csr_if[block_idx].write_fflags = fpu_rsp_fflags_q;
|
||||
VX_fpu_csr_if fpu_csr_tmp_if();
|
||||
assign fpu_csr_tmp_if.write_enable = fpu_rsp_fire && fpu_rsp_eop && fpu_rsp_has_fflags;
|
||||
`ASSIGN_BLOCKED_WID (fpu_csr_tmp_if.write_wid, fpu_rsp_wid, block_idx, `NUM_FPU_BLOCKS)
|
||||
assign fpu_csr_tmp_if.write_fflags = fpu_rsp_fflags_q;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_WIDTH + $bits(fflags_t)),
|
||||
.RESETW (1)
|
||||
) fpu_csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({fpu_csr_tmp_if.write_enable, fpu_csr_tmp_if.write_wid, fpu_csr_tmp_if.write_fflags}),
|
||||
.data_out ({fpu_csr_if[block_idx].write_enable, fpu_csr_if[block_idx].write_wid, fpu_csr_if[block_idx].write_fflags})
|
||||
);
|
||||
|
||||
// send response
|
||||
|
||||
|
@ -231,7 +251,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.SIZE (0)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.reset (reset),
|
||||
.valid_in (fpu_rsp_valid),
|
||||
.ready_in (fpu_rsp_ready),
|
||||
.data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),
|
||||
|
|
|
@ -41,17 +41,17 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||
wire [BLOCK_SIZE-1:0] commit_in_ready;
|
||||
wire [BLOCK_SIZE-1:0][ISSUE_ISW_W-1:0] commit_in_isw;
|
||||
|
||||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin : g_commit_in
|
||||
assign commit_in_valid[i] = commit_in_if[i].valid;
|
||||
assign commit_in_data[i] = commit_in_if[i].data;
|
||||
assign commit_in_if[i].ready = commit_in_ready[i];
|
||||
if (BLOCK_SIZE != `ISSUE_WIDTH) begin
|
||||
if (BLOCK_SIZE != 1) begin
|
||||
if (BLOCK_SIZE != `ISSUE_WIDTH) begin : g_commit_in_isw_partial
|
||||
if (BLOCK_SIZE != 1) begin : g_block
|
||||
assign commit_in_isw[i] = {commit_in_data[i][DATA_WIS_OFF+BLOCK_SIZE_W +: (ISSUE_ISW_W-BLOCK_SIZE_W)], BLOCK_SIZE_W'(i)};
|
||||
end else begin
|
||||
end else begin : g_no_block
|
||||
assign commit_in_isw[i] = commit_in_data[i][DATA_WIS_OFF +: ISSUE_ISW_W];
|
||||
end
|
||||
end else begin
|
||||
end else begin : g_commit_in_isw_full
|
||||
assign commit_in_isw[i] = BLOCK_SIZE_W'(i);
|
||||
end
|
||||
end
|
||||
|
@ -70,24 +70,23 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||
commit_out_data[commit_in_isw[i]] = commit_in_data[i];
|
||||
end
|
||||
end
|
||||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
|
||||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin : g_commit_in_ready
|
||||
assign commit_in_ready[i] = commit_out_ready[commit_in_isw[i]];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin: g_out_bufs
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_tmp_if();
|
||||
|
||||
`RESET_RELAY(commit_out_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (commit_out_reset),
|
||||
.reset (reset),
|
||||
.valid_in (commit_out_valid[i]),
|
||||
.ready_in (commit_out_ready[i]),
|
||||
.data_in (commit_out_data[i]),
|
||||
|
@ -96,31 +95,31 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||
.ready_out (commit_tmp_if.ready)
|
||||
);
|
||||
|
||||
logic [`NUM_THREADS-1:0] commit_tmask_r;
|
||||
logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_r;
|
||||
if (PID_BITS != 0) begin
|
||||
logic [`NUM_THREADS-1:0] commit_tmask_w;
|
||||
logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_w;
|
||||
if (PID_BITS != 0) begin : g_commit_data_with_pid
|
||||
always @(*) begin
|
||||
commit_tmask_r = '0;
|
||||
commit_data_r = 'x;
|
||||
commit_tmask_w = '0;
|
||||
commit_data_w = 'x;
|
||||
for (integer j = 0; j < NUM_LANES; ++j) begin
|
||||
commit_tmask_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.tmask[j];
|
||||
commit_data_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j];
|
||||
commit_tmask_w[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.tmask[j];
|
||||
commit_data_w[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
assign commit_tmask_r = commit_tmp_if.data.tmask;
|
||||
assign commit_data_r = commit_tmp_if.data.data;
|
||||
end else begin : g_commit_data_no_pid
|
||||
assign commit_tmask_w = commit_tmp_if.data.tmask;
|
||||
assign commit_data_w = commit_tmp_if.data.data;
|
||||
end
|
||||
|
||||
assign commit_out_if[i].valid = commit_tmp_if.valid;
|
||||
assign commit_out_if[i].data = {
|
||||
commit_tmp_if.data.uuid,
|
||||
commit_tmp_if.data.wid,
|
||||
commit_tmask_r,
|
||||
commit_tmask_w,
|
||||
commit_tmp_if.data.PC,
|
||||
commit_tmp_if.data.wb,
|
||||
commit_tmp_if.data.rd,
|
||||
commit_data_r,
|
||||
commit_data_w,
|
||||
1'b0, // PID
|
||||
commit_tmp_if.data.sop,
|
||||
commit_tmp_if.data.eop
|
||||
|
|
|
@ -1,286 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr_slice import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter CACHE_ENABLE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_scoreboard_if.slave scoreboard_if,
|
||||
VX_operands_if.master operands_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS;
|
||||
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * ISSUE_RATIO);
|
||||
|
||||
localparam STATE_IDLE = 2'd0;
|
||||
localparam STATE_FETCH1 = 2'd1;
|
||||
localparam STATE_FETCH2 = 2'd2;
|
||||
localparam STATE_FETCH3 = 2'd3;
|
||||
localparam STATE_BITS = 2;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data;
|
||||
reg [`NR_BITS-1:0] gpr_rd_rid, gpr_rd_rid_n;
|
||||
reg [ISSUE_WIS_W-1:0] gpr_rd_wis, gpr_rd_wis_n;
|
||||
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data [ISSUE_RATIO-1:0];
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data_n [ISSUE_RATIO-1:0];
|
||||
reg [`NR_BITS-1:0] cache_reg [ISSUE_RATIO-1:0];
|
||||
reg [`NR_BITS-1:0] cache_reg_n [ISSUE_RATIO-1:0];
|
||||
reg [`NUM_THREADS-1:0] cache_tmask [ISSUE_RATIO-1:0];
|
||||
reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0];
|
||||
reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n;
|
||||
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n;
|
||||
|
||||
reg [STATE_BITS-1:0] state, state_n;
|
||||
reg [`NR_BITS-1:0] rs2, rs2_n;
|
||||
reg [`NR_BITS-1:0] rs3, rs3_n;
|
||||
reg rs2_ready, rs2_ready_n;
|
||||
reg rs3_ready, rs3_ready_n;
|
||||
reg data_ready, data_ready_n;
|
||||
|
||||
wire stg_valid_in, stg_ready_in;
|
||||
|
||||
wire is_rs1_zero = (scoreboard_if.data.rs1 == 0);
|
||||
wire is_rs2_zero = (scoreboard_if.data.rs2 == 0);
|
||||
wire is_rs3_zero = (scoreboard_if.data.rs3 == 0);
|
||||
|
||||
always @(*) begin
|
||||
state_n = state;
|
||||
rs2_n = rs2;
|
||||
rs3_n = rs3;
|
||||
rs2_ready_n = rs2_ready;
|
||||
rs3_ready_n = rs3_ready;
|
||||
rs1_data_n = rs1_data;
|
||||
rs2_data_n = rs2_data;
|
||||
rs3_data_n = rs3_data;
|
||||
cache_data_n = cache_data;
|
||||
cache_reg_n = cache_reg;
|
||||
cache_tmask_n= cache_tmask;
|
||||
cache_eop_n = cache_eop;
|
||||
gpr_rd_rid_n = gpr_rd_rid;
|
||||
gpr_rd_wis_n = gpr_rd_wis;
|
||||
data_ready_n = data_ready;
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (operands_if.valid && operands_if.ready) begin
|
||||
data_ready_n = 0;
|
||||
end
|
||||
if (scoreboard_if.valid && data_ready_n == 0) begin
|
||||
data_ready_n = 1;
|
||||
if (is_rs3_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if.data.rs3 == cache_reg[scoreboard_if.data.wis] &&
|
||||
(scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin
|
||||
rs3_data_n = (is_rs3_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis];
|
||||
rs3_ready_n = 1;
|
||||
end else begin
|
||||
rs3_ready_n = 0;
|
||||
gpr_rd_rid_n = scoreboard_if.data.rs3;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH3;
|
||||
end
|
||||
if (is_rs2_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if.data.rs2 == cache_reg[scoreboard_if.data.wis] &&
|
||||
(scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin
|
||||
rs2_data_n = (is_rs2_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis];
|
||||
rs2_ready_n = 1;
|
||||
end else begin
|
||||
rs2_ready_n = 0;
|
||||
gpr_rd_rid_n = scoreboard_if.data.rs2;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH2;
|
||||
end
|
||||
if (is_rs1_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if.data.rs1 == cache_reg[scoreboard_if.data.wis] &&
|
||||
(scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin
|
||||
rs1_data_n = (is_rs1_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis];
|
||||
end else begin
|
||||
gpr_rd_rid_n = scoreboard_if.data.rs1;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH1;
|
||||
end
|
||||
end
|
||||
gpr_rd_wis_n = scoreboard_if.data.wis;
|
||||
rs2_n = scoreboard_if.data.rs2;
|
||||
rs3_n = scoreboard_if.data.rs3;
|
||||
end
|
||||
STATE_FETCH1: begin
|
||||
rs1_data_n = gpr_rd_data;
|
||||
if (~rs2_ready) begin
|
||||
gpr_rd_rid_n = rs2;
|
||||
state_n = STATE_FETCH2;
|
||||
end else if (~rs3_ready) begin
|
||||
gpr_rd_rid_n = rs3;
|
||||
state_n = STATE_FETCH3;
|
||||
end else begin
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FETCH2: begin
|
||||
rs2_data_n = gpr_rd_data;
|
||||
if (~rs3_ready) begin
|
||||
gpr_rd_rid_n = rs3;
|
||||
state_n = STATE_FETCH3;
|
||||
end else begin
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FETCH3: begin
|
||||
rs3_data_n = gpr_rd_data;
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
|
||||
if (CACHE_ENABLE != 0 && writeback_if.valid) begin
|
||||
if ((cache_reg[writeback_if.data.wis] == writeback_if.data.rd)
|
||||
|| (cache_eop[writeback_if.data.wis] && writeback_if.data.sop)) begin
|
||||
for (integer j = 0; j < `NUM_THREADS; ++j) begin
|
||||
if (writeback_if.data.tmask[j]) begin
|
||||
cache_data_n[writeback_if.data.wis][j] = writeback_if.data.data[j];
|
||||
end
|
||||
end
|
||||
cache_reg_n[writeback_if.data.wis] = writeback_if.data.rd;
|
||||
cache_eop_n[writeback_if.data.wis] = writeback_if.data.eop;
|
||||
cache_tmask_n[writeback_if.data.wis] = writeback_if.data.sop ? writeback_if.data.tmask :
|
||||
(cache_tmask_n[writeback_if.data.wis] | writeback_if.data.tmask);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
cache_eop <= {ISSUE_RATIO{1'b1}};
|
||||
data_ready <= 0;
|
||||
end else begin
|
||||
state <= state_n;
|
||||
cache_eop <= cache_eop_n;
|
||||
data_ready <= data_ready_n;
|
||||
end
|
||||
gpr_rd_rid <= gpr_rd_rid_n;
|
||||
gpr_rd_wis <= gpr_rd_wis_n;
|
||||
rs2_ready <= rs2_ready_n;
|
||||
rs3_ready <= rs3_ready_n;
|
||||
rs2 <= rs2_n;
|
||||
rs3 <= rs3_n;
|
||||
rs1_data <= rs1_data_n;
|
||||
rs2_data <= rs2_data_n;
|
||||
rs3_data <= rs3_data_n;
|
||||
cache_data <= cache_data_n;
|
||||
cache_reg <= cache_reg_n;
|
||||
cache_tmask <= cache_tmask_n;
|
||||
end
|
||||
|
||||
assign stg_valid_in = scoreboard_if.valid && data_ready;
|
||||
assign scoreboard_if.ready = stg_ready_in && data_ready;
|
||||
|
||||
VX_toggle_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) toggle_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (stg_valid_in),
|
||||
.data_in ({
|
||||
scoreboard_if.data.uuid,
|
||||
scoreboard_if.data.wis,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.rd
|
||||
}),
|
||||
.ready_in (stg_ready_in),
|
||||
.valid_out (operands_if.valid),
|
||||
.data_out ({
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.wis,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.PC,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.ex_type,
|
||||
operands_if.data.op_type,
|
||||
operands_if.data.op_args,
|
||||
operands_if.data.rd
|
||||
}),
|
||||
.ready_out (operands_if.ready)
|
||||
);
|
||||
|
||||
assign operands_if.data.rs1_data = rs1_data;
|
||||
assign operands_if.data.rs2_data = rs2_data;
|
||||
assign operands_if.data.rs3_data = rs3_data;
|
||||
|
||||
// GPR banks
|
||||
|
||||
reg [RAM_ADDRW-1:0] gpr_rd_addr;
|
||||
wire [RAM_ADDRW-1:0] gpr_wr_addr;
|
||||
if (ISSUE_WIS != 0) begin
|
||||
assign gpr_wr_addr = {writeback_if.data.wis, writeback_if.data.rd};
|
||||
always @(posedge clk) begin
|
||||
gpr_rd_addr <= {gpr_rd_wis_n, gpr_rd_rid_n};
|
||||
end
|
||||
end else begin
|
||||
assign gpr_wr_addr = writeback_if.data.rd;
|
||||
always @(posedge clk) begin
|
||||
gpr_rd_addr <= gpr_rd_rid_n;
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef GPR_RESET
|
||||
reg wr_enabled = 0;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_enabled <= 1;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
for (genvar j = 0; j < `NUM_THREADS; ++j) begin
|
||||
VX_dp_ram #(
|
||||
.DATAW (`XLEN),
|
||||
.SIZE (`NUM_REGS * ISSUE_RATIO),
|
||||
`ifdef GPR_RESET
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0),
|
||||
`endif
|
||||
.NO_RWCHECK (1)
|
||||
) gpr_ram (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
`UNUSED_PIN (wren),
|
||||
`ifdef GPR_RESET
|
||||
.write (wr_enabled && writeback_if.valid && writeback_if.data.tmask[j]),
|
||||
`else
|
||||
.write (writeback_if.valid && writeback_if.data.tmask[j]),
|
||||
`endif
|
||||
.waddr (gpr_wr_addr),
|
||||
.wdata (writeback_if.data.data[j]),
|
||||
.raddr (gpr_rd_addr),
|
||||
.rdata (gpr_rd_data[j])
|
||||
);
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -14,33 +14,36 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_ibuffer import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire [`PERF_CTR_BITS-1:0] perf_stalls,
|
||||
`endif
|
||||
|
||||
// inputs
|
||||
VX_decode_if.slave decode_if,
|
||||
|
||||
// outputs
|
||||
VX_ibuffer_if.master ibuffer_if [`NUM_WARPS]
|
||||
VX_ibuffer_if.master ibuffer_if [PER_ISSUE_WARPS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4);
|
||||
|
||||
wire [`NUM_WARPS-1:0] ibuf_ready_in;
|
||||
|
||||
wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in;
|
||||
assign decode_if.ready = ibuf_ready_in[decode_if.data.wid];
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_instr_bufs
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`IBUF_SIZE),
|
||||
.OUT_REG (2) // use a 2-cycle FIFO
|
||||
.OUT_REG (2) // 2-cycle EB for area reduction
|
||||
) instr_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (decode_if.valid && decode_if.data.wid == i),
|
||||
.valid_in (decode_if.valid && decode_if.data.wid == ISSUE_WIS_W'(w)),
|
||||
.data_in ({
|
||||
decode_if.data.uuid,
|
||||
decode_if.data.tmask,
|
||||
|
@ -52,15 +55,32 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
|
|||
decode_if.data.rd,
|
||||
decode_if.data.rs1,
|
||||
decode_if.data.rs2,
|
||||
decode_if.data.rs3}),
|
||||
.ready_in (ibuf_ready_in[i]),
|
||||
.valid_out(ibuffer_if[i].valid),
|
||||
.data_out (ibuffer_if[i].data),
|
||||
.ready_out(ibuffer_if[i].ready)
|
||||
decode_if.data.rs3
|
||||
}),
|
||||
.ready_in (ibuf_ready_in[w]),
|
||||
.valid_out(ibuffer_if[w].valid),
|
||||
.data_out (ibuffer_if[w].data),
|
||||
.ready_out(ibuffer_if[w].ready)
|
||||
);
|
||||
`ifndef L1_ENABLE
|
||||
assign decode_if.ibuf_pop[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
|
||||
assign decode_if.ibuf_pop[w] = ibuffer_if[w].valid && ibuffer_if[w].ready;
|
||||
`endif
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
|
||||
|
||||
wire decode_if_stall = decode_if.valid && ~decode_if.ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_ibf_stalls <= '0;
|
||||
end else begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(decode_if_stall);
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_stalls = perf_ibf_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -48,9 +48,9 @@ module VX_ipdom_stack #(
|
|||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
end else begin
|
||||
`ASSERT(~push || ~full, ("runtime error: writing to a full stack!"));
|
||||
`ASSERT(~pop || ~empty, ("runtime error: reading an empty stack!"));
|
||||
`ASSERT(~push || ~pop, ("runtime error: push and pop in same cycle not supported!"));
|
||||
`ASSERT(~push || ~full, ("%t: runtime error: writing to a full stack!", $time));
|
||||
`ASSERT(~pop || ~empty, ("%t: runtime error: reading an empty stack!", $time));
|
||||
`ASSERT(~push || ~pop, ("%t: runtime error: push and pop in same cycle not supported!", $time));
|
||||
if (push) begin
|
||||
rd_ptr <= wr_ptr;
|
||||
wr_ptr <= wr_ptr + ADDRW'(1);
|
||||
|
@ -72,9 +72,10 @@ module VX_ipdom_stack #(
|
|||
.LUTRAM (OUT_REG ? 0 : 1)
|
||||
) store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (wr_ptr),
|
||||
.wdata ({q1, q0}),
|
||||
.raddr (rd_ptr),
|
||||
|
|
|
@ -12,10 +12,9 @@
|
|||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_issue #(
|
||||
parameter CORE_ID = 0
|
||||
module VX_issue import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -23,137 +22,80 @@ module VX_issue #(
|
|||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_pipeline_perf_if.issue perf_issue_if,
|
||||
output issue_perf_t issue_perf,
|
||||
`endif
|
||||
|
||||
VX_decode_if.slave decode_if,
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH]
|
||||
);
|
||||
VX_ibuffer_if ibuffer_if [`NUM_WARPS]();
|
||||
VX_scoreboard_if scoreboard_if [`ISSUE_WIDTH]();
|
||||
VX_operands_if operands_if [`ISSUE_WIDTH]();
|
||||
|
||||
`RESET_RELAY (ibuf_reset, reset);
|
||||
`RESET_RELAY (scoreboard_reset, reset);
|
||||
`RESET_RELAY (operands_reset, reset);
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
|
||||
VX_ibuffer #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (ibuf_reset),
|
||||
.decode_if (decode_if),
|
||||
.ibuffer_if (ibuffer_if)
|
||||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (scoreboard_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_scb_stalls(perf_issue_if.scb_stalls),
|
||||
.perf_units_uses(perf_issue_if.units_uses),
|
||||
.perf_sfu_uses (perf_issue_if.sfu_uses),
|
||||
`endif
|
||||
.writeback_if (writeback_if),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.scoreboard_if (scoreboard_if)
|
||||
);
|
||||
|
||||
VX_operands #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) operands (
|
||||
.clk (clk),
|
||||
.reset (operands_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.scoreboard_if (scoreboard_if),
|
||||
.operands_if (operands_if)
|
||||
);
|
||||
|
||||
VX_dispatch #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) dispatch (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
`UNUSED_PIN (perf_stalls),
|
||||
`endif
|
||||
.operands_if (operands_if),
|
||||
.dispatch_if (dispatch_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_SCOPE_ISSUE
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
wire operands_if_fire = operands_if[0].valid && operands_if[0].ready;
|
||||
wire operands_if_not_ready = ~operands_if[0].ready;
|
||||
wire writeback_if_valid = writeback_if[0].valid;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (2),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS +
|
||||
1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) +
|
||||
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
operands_if_fire,
|
||||
operands_if_not_ready,
|
||||
writeback_if_valid
|
||||
}),
|
||||
.probes({
|
||||
operands_if[0].data.uuid,
|
||||
operands_if[0].data.tmask,
|
||||
operands_if[0].data.ex_type,
|
||||
operands_if[0].data.op_type,
|
||||
operands_if[0].data.wb,
|
||||
operands_if[0].data.rd,
|
||||
operands_if[0].data.rs1_data,
|
||||
operands_if[0].data.rs2_data,
|
||||
operands_if[0].data.rs3_data,
|
||||
writeback_if[0].data.uuid,
|
||||
writeback_if[0].data.tmask,
|
||||
writeback_if[0].data.rd,
|
||||
writeback_if[0].data.data,
|
||||
writeback_if[0].data.eop
|
||||
}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_issue ila_issue_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({operands_if.uuid, ibuffer.rs3, ibuffer.rs2, ibuffer.rs1, operands_if.PC, operands_if.tmask, operands_if.wid, operands_if.ex_type, operands_if.op_type, operands_if.ready, operands_if.valid}),
|
||||
.probe1 ({writeback_if.uuid, writeback_if.data[0], writeback_if.PC, writeback_if.tmask, writeback_if.wid, writeback_if.eop, writeback_if.valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
`STATIC_ASSERT ((`ISSUE_WIDTH <= `NUM_WARPS), ("invalid parameter"))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
|
||||
|
||||
wire decode_stall = decode_if.valid && ~decode_if.ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_ibf_stalls <= '0;
|
||||
end else begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(decode_stall);
|
||||
end
|
||||
issue_perf_t per_issue_perf [`ISSUE_WIDTH];
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, ibf_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, scb_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, opd_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_issue_perf_units_uses
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, units_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
end
|
||||
for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin : g_issue_perf_sfu_uses
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, sfu_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
end
|
||||
|
||||
assign perf_issue_if.ibf_stalls = perf_ibf_stalls;
|
||||
`endif
|
||||
|
||||
wire [ISSUE_ISW_W-1:0] decode_isw = wid_to_isw(decode_if.data.wid);
|
||||
wire [ISSUE_WIS_W-1:0] decode_wis = wid_to_wis(decode_if.data.wid);
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] decode_ready_in;
|
||||
assign decode_if.ready = decode_ready_in[decode_isw];
|
||||
|
||||
`SCOPE_IO_SWITCH (`ISSUE_WIDTH);
|
||||
|
||||
for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_issue_slices
|
||||
VX_decode_if #(
|
||||
.NUM_WARPS (PER_ISSUE_WARPS)
|
||||
) per_issue_decode_if();
|
||||
|
||||
VX_dispatch_if per_issue_dispatch_if[`NUM_EX_UNITS]();
|
||||
|
||||
assign per_issue_decode_if.valid = decode_if.valid && (decode_isw == ISSUE_ISW_W'(issue_id));
|
||||
assign per_issue_decode_if.data.uuid = decode_if.data.uuid;
|
||||
assign per_issue_decode_if.data.wid = decode_wis;
|
||||
assign per_issue_decode_if.data.tmask = decode_if.data.tmask;
|
||||
assign per_issue_decode_if.data.PC = decode_if.data.PC;
|
||||
assign per_issue_decode_if.data.ex_type = decode_if.data.ex_type;
|
||||
assign per_issue_decode_if.data.op_type = decode_if.data.op_type;
|
||||
assign per_issue_decode_if.data.op_args = decode_if.data.op_args;
|
||||
assign per_issue_decode_if.data.wb = decode_if.data.wb;
|
||||
assign per_issue_decode_if.data.rd = decode_if.data.rd;
|
||||
assign per_issue_decode_if.data.rs1 = decode_if.data.rs1;
|
||||
assign per_issue_decode_if.data.rs2 = decode_if.data.rs2;
|
||||
assign per_issue_decode_if.data.rs3 = decode_if.data.rs3;
|
||||
assign decode_ready_in[issue_id] = per_issue_decode_if.ready;
|
||||
`ifndef L1_ENABLE
|
||||
assign decode_if.ibuf_pop[issue_id * PER_ISSUE_WARPS +: PER_ISSUE_WARPS] = per_issue_decode_if.ibuf_pop;
|
||||
`endif
|
||||
|
||||
VX_issue_slice #(
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)),
|
||||
.ISSUE_ID (issue_id)
|
||||
) issue_slice (
|
||||
`SCOPE_IO_BIND(issue_id)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.issue_perf (per_issue_perf[issue_id]),
|
||||
`endif
|
||||
.decode_if (per_issue_decode_if),
|
||||
.writeback_if (writeback_if[issue_id]),
|
||||
.dispatch_if (per_issue_dispatch_if)
|
||||
);
|
||||
|
||||
// Assign transposed dispatch_if
|
||||
for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin : g_dispatch_if
|
||||
`ASSIGN_VX_IF(dispatch_if[ex_id * `ISSUE_WIDTH + issue_id], per_issue_dispatch_if[ex_id]);
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
157
hw/rtl/core/VX_issue_slice.sv
Normal file
157
hw/rtl/core/VX_issue_slice.sv
Normal file
|
@ -0,0 +1,157 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue_slice import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter ISSUE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output issue_perf_t issue_perf,
|
||||
`endif
|
||||
|
||||
VX_decode_if.slave decode_if,
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS]
|
||||
);
|
||||
`UNUSED_PARAM (ISSUE_ID)
|
||||
|
||||
VX_ibuffer_if ibuffer_if [PER_ISSUE_WARPS]();
|
||||
VX_scoreboard_if scoreboard_if();
|
||||
VX_operands_if operands_if();
|
||||
|
||||
VX_ibuffer #(
|
||||
.INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID))
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (issue_perf.ibf_stalls),
|
||||
`endif
|
||||
.decode_if (decode_if),
|
||||
.ibuffer_if (ibuffer_if)
|
||||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID))
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (issue_perf.scb_stalls),
|
||||
.perf_units_uses(issue_perf.units_uses),
|
||||
.perf_sfu_uses (issue_perf.sfu_uses),
|
||||
`endif
|
||||
.writeback_if (writeback_if),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.scoreboard_if (scoreboard_if)
|
||||
);
|
||||
|
||||
VX_operands #(
|
||||
.INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID))
|
||||
) operands (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (issue_perf.opd_stalls),
|
||||
`endif
|
||||
.writeback_if (writeback_if),
|
||||
.scoreboard_if (scoreboard_if),
|
||||
.operands_if (operands_if)
|
||||
);
|
||||
|
||||
VX_dispatch #(
|
||||
.INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID))
|
||||
) dispatch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
`UNUSED_PIN (perf_stalls),
|
||||
`endif
|
||||
.operands_if (operands_if),
|
||||
.dispatch_if (dispatch_if)
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
`ifdef DBG_SCOPE_ISSUE
|
||||
`SCOPE_IO_SWITCH (1);
|
||||
wire operands_fire = operands_if.valid && operands_if.ready;
|
||||
`NEG_EDGE (reset_negedge, reset);
|
||||
`SCOPE_TAP_EX (0, 2, 2, 2, (
|
||||
`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS +
|
||||
1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) +
|
||||
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1
|
||||
), {
|
||||
operands_if.valid,
|
||||
operands_if.ready
|
||||
}, {
|
||||
operands_fire,
|
||||
writeback_if.valid // ack-free
|
||||
}, {
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.ex_type,
|
||||
operands_if.data.op_type,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.rd,
|
||||
operands_if.data.rs1_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs3_data,
|
||||
writeback_if.data.uuid,
|
||||
writeback_if.data.tmask,
|
||||
writeback_if.data.rd,
|
||||
writeback_if.data.data,
|
||||
writeback_if.data.eop
|
||||
},
|
||||
reset_negedge, 1'b0, 4096
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_issue ila_issue_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({decode_if.valid, decode_if.data, decode_if.ready}),
|
||||
.probe1 ({scoreboard_if.valid, scoreboard_if.data, scoreboard_if.ready}),
|
||||
.probe2 ({operands_if.valid, operands_if.data, operands_if.ready}),
|
||||
.probe3 ({writeback_if.valid, writeback_if.data})
|
||||
);
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (operands_if.valid && operands_if.ready) begin
|
||||
`TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0}))
|
||||
trace_ex_type(1, operands_if.data.ex_type);
|
||||
`TRACE(1, (", op="))
|
||||
trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS)
|
||||
`TRACE(1, (", rs2_data="))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS)
|
||||
`TRACE(1, (", rs3_data="))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS)
|
||||
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
139
hw/rtl/core/VX_issue_top.sv
Normal file
139
hw/rtl/core/VX_issue_top.sv
Normal file
|
@ -0,0 +1,139 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue_top import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "issue"
|
||||
) (
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire decode_valid,
|
||||
input wire [`UUID_WIDTH-1:0] decode_uuid,
|
||||
input wire [`NW_WIDTH-1:0] decode_wid,
|
||||
input wire [`NUM_THREADS-1:0] decode_tmask,
|
||||
input wire [`PC_BITS-1:0] decode_PC,
|
||||
input wire [`EX_BITS-1:0] decode_ex_type,
|
||||
input wire [`INST_OP_BITS-1:0] decode_op_type,
|
||||
input op_args_t decode_op_args,
|
||||
input wire decode_wb,
|
||||
input wire [`NR_BITS-1:0] decode_rd,
|
||||
input wire [`NR_BITS-1:0] decode_rs1,
|
||||
input wire [`NR_BITS-1:0] decode_rs2,
|
||||
input wire [`NR_BITS-1:0] decode_rs3,
|
||||
output wire decode_ready,
|
||||
|
||||
input wire writeback_valid[`ISSUE_WIDTH],
|
||||
input wire [`UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH],
|
||||
input wire [ISSUE_WIS_W-1:0] writeback_wis[`ISSUE_WIDTH],
|
||||
input wire [`NUM_THREADS-1:0] writeback_tmask[`ISSUE_WIDTH],
|
||||
input wire [`PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH],
|
||||
input wire [`NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH],
|
||||
input wire [`NUM_THREADS-1:0][`XLEN-1:0] writeback_data[`ISSUE_WIDTH],
|
||||
input wire writeback_sop[`ISSUE_WIDTH],
|
||||
input wire writeback_eop[`ISSUE_WIDTH],
|
||||
|
||||
output wire dispatch_valid[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`UUID_WIDTH-1:0] dispatch_uuid[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [ISSUE_WIS_W-1:0] dispatch_wis[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0] dispatch_tmask[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`PC_BITS-1:0] dispatch_PC[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`INST_ALU_BITS-1:0] dispatch_op_type[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output op_args_t dispatch_op_args[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire dispatch_wb[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NR_BITS-1:0] dispatch_rd[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NT_WIDTH-1:0] dispatch_tid[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
input wire dispatch_ready[`NUM_EX_UNITS * `ISSUE_WIDTH]
|
||||
);
|
||||
|
||||
VX_decode_if decode_if();
|
||||
VX_dispatch_if dispatch_if[`NUM_EX_UNITS * `ISSUE_WIDTH]();
|
||||
VX_writeback_if writeback_if[`ISSUE_WIDTH]();
|
||||
|
||||
assign decode_if.valid = decode_valid;
|
||||
assign decode_if.data.uuid = decode_uuid;
|
||||
assign decode_if.data.wid = decode_wid;
|
||||
assign decode_if.data.tmask = decode_tmask;
|
||||
assign decode_if.data.PC = decode_PC;
|
||||
assign decode_if.data.ex_type = decode_ex_type;
|
||||
assign decode_if.data.op_type = decode_op_type;
|
||||
assign decode_if.data.op_args = decode_op_args;
|
||||
assign decode_if.data.wb = decode_wb;
|
||||
assign decode_if.data.rd = decode_rd;
|
||||
assign decode_if.data.rs1 = decode_rs1;
|
||||
assign decode_if.data.rs2 = decode_rs2;
|
||||
assign decode_if.data.rs3 = decode_rs3;
|
||||
assign decode_ready = decode_if.ready;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_writeback_if
|
||||
assign writeback_if[i].valid = writeback_valid[i];
|
||||
assign writeback_if[i].data.uuid = writeback_uuid[i];
|
||||
assign writeback_if[i].data.wis = writeback_wis[i];
|
||||
assign writeback_if[i].data.tmask = writeback_tmask[i];
|
||||
assign writeback_if[i].data.PC = writeback_PC[i];
|
||||
assign writeback_if[i].data.rd = writeback_rd[i];
|
||||
assign writeback_if[i].data.data = writeback_data[i];
|
||||
assign writeback_if[i].data.sop = writeback_sop[i];
|
||||
assign writeback_if[i].data.eop = writeback_eop[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin : g_dispatch_if
|
||||
assign dispatch_valid[i] = dispatch_if[i].valid;
|
||||
assign dispatch_uuid[i] = dispatch_if[i].data.uuid;
|
||||
assign dispatch_wis[i] = dispatch_if[i].data.wis;
|
||||
assign dispatch_tmask[i] = dispatch_if[i].data.tmask;
|
||||
assign dispatch_PC[i] = dispatch_if[i].data.PC;
|
||||
assign dispatch_op_type[i] = dispatch_if[i].data.op_type;
|
||||
assign dispatch_op_args[i] = dispatch_if[i].data.op_args;
|
||||
assign dispatch_wb[i] = dispatch_if[i].data.wb;
|
||||
assign dispatch_rd[i] = dispatch_if[i].data.rd;
|
||||
assign dispatch_tid[i] = dispatch_if[i].data.tid;
|
||||
assign dispatch_rs1_data[i] = dispatch_if[i].data.rs1_data;
|
||||
assign dispatch_rs2_data[i] = dispatch_if[i].data.rs2_data;
|
||||
assign dispatch_rs3_data[i] = dispatch_if[i].data.rs3_data;
|
||||
assign dispatch_if[i].ready = dispatch_ready[i];
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
issue_perf_t issue_perf = '0;
|
||||
`endif
|
||||
|
||||
`ifdef SCOPE
|
||||
wire [0:0] scope_reset_w = 1'b0;
|
||||
wire [0:0] scope_bus_in_w = 1'b0;
|
||||
wire [0:0] scope_bus_out_w;
|
||||
`UNUSED_VAR (scope_bus_out_w)
|
||||
`endif
|
||||
|
||||
VX_issue #(
|
||||
.INSTANCE_ID (INSTANCE_ID)
|
||||
) issue (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.issue_perf (issue_perf),
|
||||
`endif
|
||||
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.dispatch_if (dispatch_if)
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -1,227 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lmem_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t cache_perf,
|
||||
`endif
|
||||
|
||||
VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS],
|
||||
VX_lsu_mem_if.master lsu_mem_out_if [`NUM_LSU_BLOCKS]
|
||||
);
|
||||
`STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter"))
|
||||
|
||||
localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `ADDR_TYPE_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH;
|
||||
localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH;
|
||||
localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE);
|
||||
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_lsu_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
`RESET_RELAY (req_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
|
||||
assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL];
|
||||
end
|
||||
|
||||
wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask);
|
||||
wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask);
|
||||
|
||||
wire req_global_ready;
|
||||
wire req_local_ready;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (1)
|
||||
) req_global_buf (
|
||||
.clk (clk),
|
||||
.reset (req_reset),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
|
||||
.data_in ({
|
||||
lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask,
|
||||
lsu_mem_in_if[i].req_data.rw,
|
||||
lsu_mem_in_if[i].req_data.byteen,
|
||||
lsu_mem_in_if[i].req_data.addr,
|
||||
lsu_mem_in_if[i].req_data.atype,
|
||||
lsu_mem_in_if[i].req_data.data,
|
||||
lsu_mem_in_if[i].req_data.tag
|
||||
}),
|
||||
.ready_in (req_global_ready),
|
||||
.valid_out (lsu_mem_out_if[i].req_valid),
|
||||
.data_out ({
|
||||
lsu_mem_out_if[i].req_data.mask,
|
||||
lsu_mem_out_if[i].req_data.rw,
|
||||
lsu_mem_out_if[i].req_data.byteen,
|
||||
lsu_mem_out_if[i].req_data.addr,
|
||||
lsu_mem_out_if[i].req_data.atype,
|
||||
lsu_mem_out_if[i].req_data.data,
|
||||
lsu_mem_out_if[i].req_data.tag
|
||||
}),
|
||||
.ready_out (lsu_mem_out_if[i].req_ready)
|
||||
);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.SIZE (0),
|
||||
.OUT_REG (0)
|
||||
) req_local_buf (
|
||||
.clk (clk),
|
||||
.reset (req_reset),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
|
||||
.data_in ({
|
||||
lsu_mem_in_if[i].req_data.mask & is_addr_local_mask,
|
||||
lsu_mem_in_if[i].req_data.rw,
|
||||
lsu_mem_in_if[i].req_data.byteen,
|
||||
lsu_mem_in_if[i].req_data.addr,
|
||||
lsu_mem_in_if[i].req_data.atype,
|
||||
lsu_mem_in_if[i].req_data.data,
|
||||
lsu_mem_in_if[i].req_data.tag
|
||||
}),
|
||||
.ready_in (req_local_ready),
|
||||
.valid_out (lmem_lsu_if[i].req_valid),
|
||||
.data_out ({
|
||||
lmem_lsu_if[i].req_data.mask,
|
||||
lmem_lsu_if[i].req_data.rw,
|
||||
lmem_lsu_if[i].req_data.byteen,
|
||||
lmem_lsu_if[i].req_data.addr,
|
||||
lmem_lsu_if[i].req_data.atype,
|
||||
lmem_lsu_if[i].req_data.data,
|
||||
lmem_lsu_if[i].req_data.tag
|
||||
}),
|
||||
.ready_out (lmem_lsu_if[i].req_ready)
|
||||
);
|
||||
|
||||
assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global)
|
||||
|| (req_local_ready && is_addr_local);
|
||||
end
|
||||
|
||||
`RESET_RELAY (rsp_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
wire rsp_arb_valid;
|
||||
wire rsp_arb_index;
|
||||
wire rsp_arb_ready;
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (2),
|
||||
.LOCK_ENABLE (1),
|
||||
.TYPE ("R")
|
||||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (rsp_reset),
|
||||
.requests ({
|
||||
lmem_lsu_if[i].rsp_valid,
|
||||
lsu_mem_out_if[i].rsp_valid
|
||||
}),
|
||||
.grant_valid (rsp_arb_valid),
|
||||
.grant_index (rsp_arb_index),
|
||||
`UNUSED_PIN (grant_onehot),
|
||||
.grant_unlock(rsp_arb_ready)
|
||||
);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (RSP_DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (0)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (rsp_reset),
|
||||
.valid_in (rsp_arb_valid),
|
||||
.data_in ({
|
||||
rsp_arb_index ? lmem_lsu_if[i].rsp_data.mask : lsu_mem_out_if[i].rsp_data.mask,
|
||||
rsp_arb_index ? lmem_lsu_if[i].rsp_data.data : lsu_mem_out_if[i].rsp_data.data,
|
||||
rsp_arb_index ? lmem_lsu_if[i].rsp_data.tag : lsu_mem_out_if[i].rsp_data.tag
|
||||
}),
|
||||
.ready_in (rsp_arb_ready),
|
||||
.valid_out (lsu_mem_in_if[i].rsp_valid),
|
||||
.data_out ({
|
||||
lsu_mem_in_if[i].rsp_data.mask,
|
||||
lsu_mem_in_if[i].rsp_data.data,
|
||||
lsu_mem_in_if[i].rsp_data.tag
|
||||
}),
|
||||
.ready_out (lsu_mem_in_if[i].rsp_ready)
|
||||
);
|
||||
|
||||
assign lsu_mem_out_if[i].rsp_ready = rsp_arb_ready && ~rsp_arb_index;
|
||||
assign lmem_lsu_if[i].rsp_ready = rsp_arb_ready && rsp_arb_index;
|
||||
end
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_bus_if[LSU_NUM_REQS]();
|
||||
|
||||
`RESET_RELAY (adapter_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_bus_tmp_if[`NUM_LSU_LANES]();
|
||||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
|
||||
.REQ_OUT_BUF (2),
|
||||
.RSP_OUT_BUF (1)
|
||||
) lsu_adapter (
|
||||
.clk (clk),
|
||||
.reset (adapter_reset),
|
||||
.lsu_mem_if (lmem_lsu_if[i]),
|
||||
.mem_bus_if (lmem_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]);
|
||||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (lmem_reset, reset);
|
||||
|
||||
VX_local_mem #(
|
||||
.INSTANCE_ID($sformatf("core%0d-lmem", CORE_ID)),
|
||||
.SIZE (1 << `LMEM_LOG_SIZE),
|
||||
.NUM_REQS (LSU_NUM_REQS),
|
||||
.NUM_BANKS (`LMEM_NUM_BANKS),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) local_mem (
|
||||
.clk (clk),
|
||||
.reset (lmem_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (cache_perf),
|
||||
`endif
|
||||
.mem_bus_if (lmem_bus_if)
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -14,8 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_slice import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter BLOCK_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -60,25 +59,25 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
wire req_is_fence, rsp_is_fence;
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] full_addr;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_full_addr
|
||||
assign full_addr[i] = execute_if.data.rs1_data[i] + `SEXT(`XLEN, execute_if.data.op_args.lsu.offset);
|
||||
end
|
||||
|
||||
// address type calculation
|
||||
|
||||
wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_atype;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_flags
|
||||
wire [MEM_ADDRW-1:0] block_addr = full_addr[i][MEM_ASHIFT +: MEM_ADDRW];
|
||||
// is I/O address
|
||||
wire [MEM_ADDRW-1:0] io_addr_start = MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT);
|
||||
wire [MEM_ADDRW-1:0] io_addr_end = MEM_ADDRW'(`XLEN'(`IO_END_ADDR) >> MEM_ASHIFT);
|
||||
assign mem_req_atype[i][`ADDR_TYPE_FLUSH] = req_is_fence;
|
||||
assign mem_req_atype[i][`ADDR_TYPE_IO] = (block_addr >= io_addr_start) && (block_addr < io_addr_end);
|
||||
assign mem_req_flags[i][`MEM_REQ_FLAG_FLUSH] = req_is_fence;
|
||||
assign mem_req_flags[i][`MEM_REQ_FLAG_IO] = (block_addr >= io_addr_start) && (block_addr < io_addr_end);
|
||||
`ifdef LMEM_ENABLE
|
||||
// is local memory address
|
||||
wire [MEM_ADDRW-1:0] lmem_addr_start = MEM_ADDRW'(`XLEN'(`LMEM_BASE_ADDR) >> MEM_ASHIFT);
|
||||
wire [MEM_ADDRW-1:0] lmem_addr_end = MEM_ADDRW'((`XLEN'(`LMEM_BASE_ADDR) + `XLEN'(1 << `LMEM_LOG_SIZE)) >> MEM_ASHIFT);
|
||||
assign mem_req_atype[i][`ADDR_TYPE_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end);
|
||||
assign mem_req_flags[i][`MEM_REQ_FLAG_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end);
|
||||
`endif
|
||||
end
|
||||
|
||||
|
@ -88,7 +87,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
wire [NUM_LANES-1:0] mem_req_mask;
|
||||
wire mem_req_rw;
|
||||
wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] mem_req_addr;
|
||||
reg [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] mem_req_byteen;
|
||||
wire [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] mem_req_byteen;
|
||||
reg [NUM_LANES-1:0][LSU_WORD_SIZE*8-1:0] mem_req_data;
|
||||
wire [TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_ready;
|
||||
|
@ -103,8 +102,6 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
`UNUSED_VAR (mem_req_fire)
|
||||
`UNUSED_VAR (mem_rsp_fire)
|
||||
|
||||
wire mem_rsp_sop_pkt, mem_rsp_eop_pkt;
|
||||
wire no_rsp_buf_valid, no_rsp_buf_ready;
|
||||
|
@ -152,46 +149,49 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] req_align;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_addr
|
||||
assign req_align[i] = full_addr[i][REQ_ASHIFT-1:0];
|
||||
assign mem_req_addr[i] = full_addr[i][`MEM_ADDR_WIDTH-1:REQ_ASHIFT];
|
||||
end
|
||||
|
||||
// byte enable formatting
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_byteen_w
|
||||
reg [LSU_WORD_SIZE-1:0] mem_req_byteen_w;
|
||||
always @(*) begin
|
||||
mem_req_byteen[i] = '0;
|
||||
mem_req_byteen_w = '0;
|
||||
case (`INST_LSU_WSIZE(execute_if.data.op_type))
|
||||
0: begin // 8-bit
|
||||
mem_req_byteen[i][req_align[i]] = 1'b1;
|
||||
mem_req_byteen_w[req_align[i]] = 1'b1;
|
||||
end
|
||||
1: begin // 16 bit
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1;
|
||||
mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1;
|
||||
mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1;
|
||||
end
|
||||
`ifdef XLEN_64
|
||||
2: begin // 32 bit
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1;
|
||||
mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1;
|
||||
mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1;
|
||||
mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1;
|
||||
mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1;
|
||||
end
|
||||
`endif
|
||||
default : mem_req_byteen[i] = {LSU_WORD_SIZE{1'b1}};
|
||||
// 3: 64 bit
|
||||
default : mem_req_byteen_w = {LSU_WORD_SIZE{1'b1}};
|
||||
endcase
|
||||
end
|
||||
assign mem_req_byteen[i] = mem_req_byteen_w;
|
||||
end
|
||||
|
||||
// memory misalignment not supported!
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_missalign
|
||||
wire lsu_req_fire = execute_if.valid && execute_if.ready;
|
||||
`RUNTIME_ASSERT((~lsu_req_fire || ~execute_if.data.tmask[i] || req_is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if.data.op_type))) == 0),
|
||||
("misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)",
|
||||
execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid));
|
||||
("%t: misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)",
|
||||
$time, execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid))
|
||||
end
|
||||
|
||||
// store data formatting
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_data
|
||||
always @(*) begin
|
||||
mem_req_data[i] = execute_if.data.rs2_data[i];
|
||||
case (req_align[i])
|
||||
|
@ -213,7 +213,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [LSUQ_SIZEW-1:0] pkt_waddr, pkt_raddr;
|
||||
|
||||
if (PID_BITS != 0) begin
|
||||
if (PID_BITS != 0) begin : g_pids
|
||||
reg [`LSUQ_IN_SIZE-1:0][PID_BITS:0] pkt_ctr;
|
||||
reg [`LSUQ_IN_SIZE-1:0] pkt_sop, pkt_eop;
|
||||
|
||||
|
@ -269,10 +269,10 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
|
||||
assign mem_rsp_sop_pkt = pkt_sop[pkt_raddr];
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop_fire && pkt_eop[pkt_raddr] && (pkt_ctr[pkt_raddr] == 1);
|
||||
`RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("allocator full!"))
|
||||
`RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("Oops!"))
|
||||
`RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("%t: allocator full!", $time))
|
||||
`RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("%t: oops! broken sop request!", $time))
|
||||
`UNUSED_VAR (mem_rsp_sop)
|
||||
end else begin
|
||||
end else begin : g_no_pids
|
||||
assign pkt_waddr = 0;
|
||||
assign mem_rsp_sop_pkt = mem_rsp_sop;
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop;
|
||||
|
@ -298,7 +298,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
wire [NUM_LANES-1:0] lsu_mem_req_mask;
|
||||
wire [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_mem_req_byteen;
|
||||
wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_mem_req_addr;
|
||||
wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] lsu_mem_req_atype;
|
||||
wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] lsu_mem_req_flags;
|
||||
wire [NUM_LANES-1:0][(LSU_WORD_SIZE*8)-1:0] lsu_mem_req_data;
|
||||
wire [LSU_TAG_WIDTH-1:0] lsu_mem_req_tag;
|
||||
wire lsu_mem_req_ready;
|
||||
|
@ -309,16 +309,14 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
wire [LSU_TAG_WIDTH-1:0] lsu_mem_rsp_tag;
|
||||
wire lsu_mem_rsp_ready;
|
||||
|
||||
`RESET_RELAY (mem_scheduler_reset, reset);
|
||||
|
||||
VX_mem_scheduler #(
|
||||
.INSTANCE_ID ($sformatf("core%0d-lsu-memsched%0d", CORE_ID, BLOCK_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)),
|
||||
.CORE_REQS (NUM_LANES),
|
||||
.MEM_CHANNELS(NUM_LANES),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
.LINE_SIZE (LSU_WORD_SIZE),
|
||||
.ADDR_WIDTH (LSU_ADDR_WIDTH),
|
||||
.ATYPE_WIDTH (`ADDR_TYPE_WIDTH),
|
||||
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_QUEUE_SIZE (`LSUQ_IN_SIZE),
|
||||
.MEM_QUEUE_SIZE (`LSUQ_OUT_SIZE),
|
||||
|
@ -328,7 +326,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
.CORE_OUT_BUF(0)
|
||||
) mem_scheduler (
|
||||
.clk (clk),
|
||||
.reset (mem_scheduler_reset),
|
||||
.reset (reset),
|
||||
|
||||
// Input request
|
||||
.core_req_valid (mem_req_valid),
|
||||
|
@ -336,12 +334,12 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
.core_req_mask (mem_req_mask),
|
||||
.core_req_byteen(mem_req_byteen),
|
||||
.core_req_addr (mem_req_addr),
|
||||
.core_req_atype (mem_req_atype),
|
||||
.core_req_flags (mem_req_flags),
|
||||
.core_req_data (mem_req_data),
|
||||
.core_req_tag (mem_req_tag),
|
||||
.core_req_ready (mem_req_ready),
|
||||
`UNUSED_PIN (core_req_empty),
|
||||
`UNUSED_PIN (core_req_sent),
|
||||
`UNUSED_PIN (core_req_wr_notify),
|
||||
|
||||
// Output response
|
||||
.core_rsp_valid (mem_rsp_valid),
|
||||
|
@ -358,7 +356,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
.mem_req_mask (lsu_mem_req_mask),
|
||||
.mem_req_byteen (lsu_mem_req_byteen),
|
||||
.mem_req_addr (lsu_mem_req_addr),
|
||||
.mem_req_atype (lsu_mem_req_atype),
|
||||
.mem_req_flags (lsu_mem_req_flags),
|
||||
.mem_req_data (lsu_mem_req_data),
|
||||
.mem_req_tag (lsu_mem_req_tag),
|
||||
.mem_req_ready (lsu_mem_req_ready),
|
||||
|
@ -376,7 +374,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
assign lsu_mem_if.req_data.rw = lsu_mem_req_rw;
|
||||
assign lsu_mem_if.req_data.byteen = lsu_mem_req_byteen;
|
||||
assign lsu_mem_if.req_data.addr = lsu_mem_req_addr;
|
||||
assign lsu_mem_if.req_data.atype = lsu_mem_req_atype;
|
||||
assign lsu_mem_if.req_data.flags = lsu_mem_req_flags;
|
||||
assign lsu_mem_if.req_data.data = lsu_mem_req_data;
|
||||
assign lsu_mem_if.req_data.tag = lsu_mem_req_tag;
|
||||
assign lsu_mem_req_ready = lsu_mem_if.req_ready;
|
||||
|
@ -424,7 +422,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; i++) begin
|
||||
for (genvar i = 0; i < NUM_LANES; i++) begin : g_rsp_data
|
||||
`ifdef XLEN_64
|
||||
wire [63:0] rsp_data64 = mem_rsp_data[i];
|
||||
wire [31:0] rsp_data32 = (rsp_align[i][2] ? mem_rsp_data[i][63:32] : mem_rsp_data[i][31:0]);
|
||||
|
@ -481,6 +479,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
.valid_out (commit_no_rsp_if.valid),
|
||||
.ready_out (commit_no_rsp_if.ready)
|
||||
);
|
||||
|
||||
assign commit_no_rsp_if.data.rd = '0;
|
||||
assign commit_no_rsp_if.data.wb = 1'b0;
|
||||
assign commit_no_rsp_if.data.data = commit_rsp_if.data.data; // arbiter MUX optimization
|
||||
|
@ -488,6 +487,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.ARBITER ("P"), // prioritize commit_rsp_if
|
||||
.OUT_BUF (3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
|
@ -504,67 +504,70 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`ifdef DBG_TRACE_MEM
|
||||
always @(posedge clk) begin
|
||||
if (execute_if.valid && fence_lock) begin
|
||||
`TRACE(1, ("%d: *** D$%0d fence wait\n", $time, CORE_ID));
|
||||
`TRACE(1, ("%t: *** %s fence wait\n", $time, INSTANCE_ID))
|
||||
end
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%d: D$%0d Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
`TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid));
|
||||
`TRACE(1, ("%t: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask))
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES)
|
||||
`TRACE(1, (", flags="))
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES)
|
||||
`TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES)
|
||||
`TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid))
|
||||
end else begin
|
||||
`TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
`TRACE(1, (", byteen=0x%0h, rd=%0d, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, mem_req_tag, execute_if.data.uuid));
|
||||
`TRACE(1, ("%t: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask))
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES)
|
||||
`TRACE(1, (", flags="))
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES)
|
||||
`TRACE(1, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid))
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, CORE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid));
|
||||
`TRACE(1, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES)
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef SCOPE
|
||||
`ifdef DBG_SCOPE_LSU
|
||||
if (CORE_ID == 0 && BLOCK_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (3),
|
||||
.TRIGGERW (3),
|
||||
.PROBEW (`UUID_WIDTH+NUM_LANES*(`XLEN+4+`XLEN)+1+`UUID_WIDTH+NUM_LANES*`XLEN)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({reset, mem_req_fire, mem_rsp_fire}),
|
||||
.probes({execute_if.data.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
wire [31:0] full_addr_0 = full_addr[0];
|
||||
wire [31:0] mem_req_data_0 = mem_req_data[0];
|
||||
wire [31:0] rsp_data_0 = rsp_data[0];
|
||||
ila_lsu ila_lsu_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({mem_req_data_0, execute_if.data.uuid, execute_if.data.wid, execute_if.data.PC, mem_req_mask, full_addr_0, mem_req_byteen, mem_req_rw, mem_req_ready, mem_req_valid}),
|
||||
.probe1 ({rsp_data_0, rsp_uuid, mem_rsp_eop, rsp_pc, rsp_rd, mem_rsp_mask, rsp_wid, mem_rsp_ready, mem_rsp_valid}),
|
||||
.probe2 ({lsu_mem_if.req_data.data, lsu_mem_if.req_data.tag, lsu_mem_if.req_data.byteen, lsu_mem_if.req_data.addr, lsu_mem_if.req_data.rw, lsu_mem_if.req_ready, lsu_mem_if.req_valid}),
|
||||
.probe3 ({lsu_mem_if.rsp_data.data, lsu_mem_if.rsp_data.tag, lsu_mem_if.rsp_ready, lsu_mem_if.rsp_valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
`SCOPE_IO_SWITCH (1);
|
||||
`NEG_EDGE (reset_negedge, reset);
|
||||
`SCOPE_TAP_EX (0, 3, 4, 2, (
|
||||
1 + NUM_LANES * (`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE * 8) + `UUID_WIDTH + NUM_LANES * LSU_WORD_SIZE * 8 + `UUID_WIDTH
|
||||
), {
|
||||
mem_req_valid,
|
||||
mem_req_ready,
|
||||
mem_rsp_valid,
|
||||
mem_rsp_ready
|
||||
}, {
|
||||
mem_req_fire,
|
||||
mem_rsp_fire
|
||||
}, {
|
||||
mem_req_rw,
|
||||
full_addr,
|
||||
mem_req_byteen,
|
||||
mem_req_data,
|
||||
execute_if.data.uuid,
|
||||
rsp_data,
|
||||
rsp_uuid
|
||||
},
|
||||
reset_negedge, 1'b0, 4096
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_lsu ila_lsu_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({execute_if.valid, execute_if.data, execute_if.ready}),
|
||||
.probe1 ({lsu_mem_if.req_valid, lsu_mem_if.req_data, lsu_mem_if.req_ready}),
|
||||
.probe2 ({lsu_mem_if.rsp_valid, lsu_mem_if.rsp_data, lsu_mem_if.rsp_ready})
|
||||
);
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,8 +14,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
|
@ -24,18 +24,15 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
// Inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// Outputs
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_lsu_mem_if.master lsu_mem_if [`NUM_LSU_BLOCKS]
|
||||
);
|
||||
localparam BLOCK_SIZE = `NUM_LSU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_LSU_LANES;
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_lsu = 0;
|
||||
`SCOPE_IO_SWITCH (BLOCK_SIZE);
|
||||
`endif
|
||||
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_execute_if[BLOCK_SIZE]();
|
||||
|
@ -43,7 +40,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_BUF (1)
|
||||
.OUT_BUF (3)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -55,17 +52,13 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
|
||||
`RESET_RELAY (block_reset, reset);
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_lsus
|
||||
VX_lsu_slice #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.BLOCK_ID (block_idx)
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx))
|
||||
) lsu_slice(
|
||||
`SCOPE_IO_BIND (scope_lsu+block_idx)
|
||||
`SCOPE_IO_BIND (block_idx)
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.reset (reset),
|
||||
.execute_if (per_block_execute_if[block_idx]),
|
||||
.commit_if (per_block_commit_if[block_idx]),
|
||||
.lsu_mem_if (lsu_mem_if[block_idx])
|
||||
|
@ -82,5 +75,5 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
.commit_in_if (per_block_commit_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
|
||||
endmodule
|
||||
|
|
221
hw/rtl/core/VX_mem_unit.sv
Normal file
221
hw/rtl/core/VX_mem_unit.sv
Normal file
|
@ -0,0 +1,221 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_mem_unit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t lmem_perf,
|
||||
`endif
|
||||
|
||||
VX_lsu_mem_if.slave lsu_mem_if [`NUM_LSU_BLOCKS],
|
||||
VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS]
|
||||
);
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lsu_dcache_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
`ifdef LMEM_ENABLE
|
||||
|
||||
`STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter"))
|
||||
|
||||
localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE);
|
||||
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lsu_lmem_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_switches
|
||||
VX_lmem_switch #(
|
||||
.REQ0_OUT_BUF (3),
|
||||
.REQ1_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (1),
|
||||
.ARBITER ("P")
|
||||
) lmem_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.lsu_in_if (lsu_mem_if[i]),
|
||||
.global_out_if(lsu_dcache_if[i]),
|
||||
.local_out_if (lsu_lmem_if[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_bus_if[LSU_NUM_REQS]();
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_adapters
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_bus_tmp_if[`NUM_LSU_LANES]();
|
||||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
|
||||
.ARBITER ("P"),
|
||||
.REQ_OUT_BUF (3),
|
||||
.RSP_OUT_BUF (0)
|
||||
) lmem_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.lsu_mem_if (lsu_lmem_if[i]),
|
||||
.mem_bus_if (lmem_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin : g_lmem_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]);
|
||||
end
|
||||
end
|
||||
|
||||
VX_local_mem #(
|
||||
.INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)),
|
||||
.SIZE (1 << `LMEM_LOG_SIZE),
|
||||
.NUM_REQS (LSU_NUM_REQS),
|
||||
.NUM_BANKS (`LMEM_NUM_BANKS),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.OUT_BUF (3)
|
||||
) local_mem (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.lmem_perf (lmem_perf),
|
||||
`endif
|
||||
.mem_bus_if (lmem_bus_if)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign lmem_perf = '0;
|
||||
`endif
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_dcache_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (lsu_dcache_if[i], lsu_mem_if[i]);
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (DCACHE_CHANNELS),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_coalesced_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : g_enabled
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_coalescers
|
||||
VX_mem_coalescer #(
|
||||
.INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)),
|
||||
.NUM_REQS (`NUM_LSU_LANES),
|
||||
.DATA_IN_SIZE (LSU_WORD_SIZE),
|
||||
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),
|
||||
.ADDR_WIDTH (LSU_ADDR_WIDTH),
|
||||
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.QUEUE_SIZE (`LSUQ_OUT_SIZE)
|
||||
) mem_coalescer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Input request
|
||||
.in_req_valid (lsu_dcache_if[i].req_valid),
|
||||
.in_req_mask (lsu_dcache_if[i].req_data.mask),
|
||||
.in_req_rw (lsu_dcache_if[i].req_data.rw),
|
||||
.in_req_byteen (lsu_dcache_if[i].req_data.byteen),
|
||||
.in_req_addr (lsu_dcache_if[i].req_data.addr),
|
||||
.in_req_flags (lsu_dcache_if[i].req_data.flags),
|
||||
.in_req_data (lsu_dcache_if[i].req_data.data),
|
||||
.in_req_tag (lsu_dcache_if[i].req_data.tag),
|
||||
.in_req_ready (lsu_dcache_if[i].req_ready),
|
||||
|
||||
// Input response
|
||||
.in_rsp_valid (lsu_dcache_if[i].rsp_valid),
|
||||
.in_rsp_mask (lsu_dcache_if[i].rsp_data.mask),
|
||||
.in_rsp_data (lsu_dcache_if[i].rsp_data.data),
|
||||
.in_rsp_tag (lsu_dcache_if[i].rsp_data.tag),
|
||||
.in_rsp_ready (lsu_dcache_if[i].rsp_ready),
|
||||
|
||||
// Output request
|
||||
.out_req_valid (dcache_coalesced_if[i].req_valid),
|
||||
.out_req_mask (dcache_coalesced_if[i].req_data.mask),
|
||||
.out_req_rw (dcache_coalesced_if[i].req_data.rw),
|
||||
.out_req_byteen (dcache_coalesced_if[i].req_data.byteen),
|
||||
.out_req_addr (dcache_coalesced_if[i].req_data.addr),
|
||||
.out_req_flags (dcache_coalesced_if[i].req_data.flags),
|
||||
.out_req_data (dcache_coalesced_if[i].req_data.data),
|
||||
.out_req_tag (dcache_coalesced_if[i].req_data.tag),
|
||||
.out_req_ready (dcache_coalesced_if[i].req_ready),
|
||||
|
||||
// Output response
|
||||
.out_rsp_valid (dcache_coalesced_if[i].rsp_valid),
|
||||
.out_rsp_mask (dcache_coalesced_if[i].rsp_data.mask),
|
||||
.out_rsp_data (dcache_coalesced_if[i].rsp_data.data),
|
||||
.out_rsp_tag (dcache_coalesced_if[i].rsp_data.tag),
|
||||
.out_rsp_ready (dcache_coalesced_if[i].rsp_ready)
|
||||
);
|
||||
end
|
||||
|
||||
end else begin : g_passthru
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_dcache_coalesced_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (dcache_coalesced_if[i], lsu_dcache_if[i]);
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_dcache_adapters
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_bus_tmp_if[DCACHE_CHANNELS]();
|
||||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (DCACHE_CHANNELS),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH),
|
||||
.ARBITER ("P"),
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (0)
|
||||
) dcache_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.lsu_mem_if (dcache_coalesced_if[i]),
|
||||
.mem_bus_if (dcache_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin : g_dcache_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]);
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
127
hw/rtl/core/VX_mem_unit_top.sv
Normal file
127
hw/rtl/core/VX_mem_unit_top.sv
Normal file
|
@ -0,0 +1,127 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_mem_unit_top import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter LSU_WORD_WIDTH = LSU_WORD_SIZE * 8
|
||||
) (
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// LSU memory request
|
||||
input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_valid,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_rw,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0] lsu_req_mask,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_req_byteen,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_req_addr,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] lsu_req_flags,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_WIDTH-1:0] lsu_req_data,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0][LSU_TAG_WIDTH-1:0] lsu_req_tag,
|
||||
output wire [`NUM_LSU_BLOCKS-1:0] lsu_req_ready,
|
||||
|
||||
// LSU memory response
|
||||
output wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_valid,
|
||||
output wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0] lsu_rsp_mask,
|
||||
output wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_WIDTH-1:0] lsu_rsp_data,
|
||||
output wire [`NUM_LSU_BLOCKS-1:0][LSU_TAG_WIDTH-1:0] lsu_rsp_tag,
|
||||
input wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_ready,
|
||||
|
||||
// Memory request
|
||||
output wire [DCACHE_NUM_REQS-1:0] mem_req_valid,
|
||||
output wire [DCACHE_NUM_REQS-1:0] mem_req_rw,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] mem_req_byteen,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [DCACHE_NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] mem_req_data,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire [DCACHE_NUM_REQS-1:0] mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire [DCACHE_NUM_REQS-1:0] mem_rsp_valid,
|
||||
input wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] mem_rsp_data,
|
||||
input wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire [DCACHE_NUM_REQS-1:0] mem_rsp_ready
|
||||
);
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lsu_mem_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
// LSU memory request
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_mem_req
|
||||
assign lsu_mem_if[i].req_valid = lsu_req_valid[i];
|
||||
assign lsu_mem_if[i].req_data.rw = lsu_req_rw[i];
|
||||
assign lsu_mem_if[i].req_data.mask = lsu_req_mask[i];
|
||||
assign lsu_mem_if[i].req_data.byteen = lsu_req_byteen[i];
|
||||
assign lsu_mem_if[i].req_data.addr = lsu_req_addr[i];
|
||||
assign lsu_mem_if[i].req_data.flags = lsu_req_flags[i];
|
||||
assign lsu_mem_if[i].req_data.data = lsu_req_data[i];
|
||||
assign lsu_mem_if[i].req_data.tag = lsu_req_tag[i];
|
||||
assign lsu_req_ready[i] = lsu_mem_if[i].req_ready;
|
||||
end
|
||||
|
||||
// LSU memory response
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_rsp
|
||||
assign lsu_rsp_valid[i] = lsu_mem_if[i].rsp_valid;
|
||||
assign lsu_rsp_mask[i] = lsu_mem_if[i].rsp_data.mask;
|
||||
assign lsu_rsp_data[i] = lsu_mem_if[i].rsp_data.data;
|
||||
assign lsu_rsp_tag[i] = lsu_mem_if[i].rsp_data.tag;
|
||||
assign lsu_mem_if[i].rsp_ready = lsu_rsp_ready[i];
|
||||
end
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) mem_bus_if[DCACHE_NUM_REQS]();
|
||||
|
||||
// memory request
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin : g_mem_req
|
||||
assign mem_req_valid[i] = mem_bus_if[i].req_valid;
|
||||
assign mem_req_rw[i] = mem_bus_if[i].req_data.rw;
|
||||
assign mem_req_byteen[i] = mem_bus_if[i].req_data.byteen;
|
||||
assign mem_req_addr[i] = mem_bus_if[i].req_data.addr;
|
||||
assign mem_req_flags[i] = mem_bus_if[i].req_data.flags;
|
||||
assign mem_req_data[i] = mem_bus_if[i].req_data.data;
|
||||
assign mem_req_tag[i] = mem_bus_if[i].req_data.tag;
|
||||
assign mem_bus_if[i].req_ready = mem_req_ready[i];
|
||||
end
|
||||
|
||||
// memory response
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin : g_mem_bus_rsp
|
||||
assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i];
|
||||
assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i];
|
||||
assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i];
|
||||
assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
cache_perf_t lmem_perf = '0;
|
||||
`endif
|
||||
|
||||
VX_mem_unit #(
|
||||
.INSTANCE_ID (INSTANCE_ID)
|
||||
) mem_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.lmem_perf (lmem_perf),
|
||||
`endif
|
||||
.lsu_mem_if (lsu_mem_if),
|
||||
.dcache_bus_if (mem_bus_if)
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,30 +13,289 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
// reset all GPRs in debug mode
|
||||
`ifdef SIMULATION
|
||||
`ifndef NDEBUG
|
||||
`define GPR_RESET
|
||||
`endif
|
||||
`endif
|
||||
|
||||
module VX_operands import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_BANKS = 4,
|
||||
parameter OUT_BUF = 3
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_scoreboard_if.slave scoreboard_if [`ISSUE_WIDTH],
|
||||
VX_operands_if.master operands_if [`ISSUE_WIDTH]
|
||||
`ifdef PERF_ENABLE
|
||||
output wire [`PERF_CTR_BITS-1:0] perf_stalls,
|
||||
`endif
|
||||
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_scoreboard_if.slave scoreboard_if,
|
||||
VX_operands_if.master operands_if
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam NUM_SRC_OPDS = 3;
|
||||
localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_OPDS);
|
||||
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
|
||||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
|
||||
localparam META_DATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS + `UUID_WIDTH;
|
||||
localparam REGS_DATAW = `XLEN * `NUM_THREADS;
|
||||
localparam DATAW = META_DATAW + NUM_SRC_OPDS * REGS_DATAW;
|
||||
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
|
||||
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
|
||||
localparam XLEN_SIZE = `XLEN / 8;
|
||||
localparam BYTEENW = `NUM_THREADS * XLEN_SIZE;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
`UNUSED_VAR (writeback_if.data.sop)
|
||||
|
||||
VX_gpr_slice #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) gpr_slice (
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
.writeback_if (writeback_if[i]),
|
||||
.scoreboard_if(scoreboard_if[i]),
|
||||
.operands_if (operands_if[i])
|
||||
wire [NUM_SRC_OPDS-1:0] src_valid;
|
||||
wire [NUM_SRC_OPDS-1:0] req_valid_in, req_ready_in;
|
||||
wire [NUM_SRC_OPDS-1:0][PER_BANK_ADDRW-1:0] req_data_in;
|
||||
wire [NUM_SRC_OPDS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
|
||||
|
||||
wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready;
|
||||
wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2;
|
||||
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1;
|
||||
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2;
|
||||
|
||||
wire pipe_ready_in;
|
||||
wire pipe_valid_st1, pipe_ready_st1;
|
||||
wire pipe_valid_st2, pipe_ready_st2;
|
||||
wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2;
|
||||
|
||||
reg [NUM_SRC_OPDS-1:0][(`NUM_THREADS * `XLEN)-1:0] src_data_st2, src_data_m_st2;
|
||||
|
||||
reg [NUM_SRC_OPDS-1:0] data_fetched_st1;
|
||||
|
||||
reg has_collision_n;
|
||||
wire has_collision_st1;
|
||||
|
||||
wire [NUM_SRC_OPDS-1:0][`NR_BITS-1:0] src_opds;
|
||||
assign src_opds = {scoreboard_if.data.rs3, scoreboard_if.data.rs2, scoreboard_if.data.rs1};
|
||||
|
||||
for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_data_in
|
||||
if (ISSUE_WIS != 0) begin : g_wis
|
||||
assign req_data_in[i] = {src_opds[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis};
|
||||
end else begin : g_no_wis
|
||||
assign req_data_in[i] = src_opds[i][`NR_BITS-1:BANK_SEL_BITS];
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_bank_idx
|
||||
if (NUM_BANKS != 1) begin : g_multibanks
|
||||
assign req_bank_idx[i] = src_opds[i][BANK_SEL_BITS-1:0];
|
||||
end else begin : g_singlebank
|
||||
assign req_bank_idx[i] = '0;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_src_valid
|
||||
assign src_valid[i] = (src_opds[i] != 0) && ~data_fetched_st1[i];
|
||||
end
|
||||
|
||||
assign req_valid_in = {NUM_SRC_OPDS{scoreboard_if.valid}} & src_valid;
|
||||
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_SRC_OPDS),
|
||||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (PER_BANK_ADDRW),
|
||||
.ARBITER ("P"), // use priority arbiter
|
||||
.PERF_CTR_BITS(`PERF_CTR_BITS),
|
||||
.OUT_BUF (0) // no output buffering
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`UNUSED_PIN(collisions),
|
||||
.valid_in (req_valid_in),
|
||||
.data_in (req_data_in),
|
||||
.sel_in (req_bank_idx),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (gpr_rd_valid),
|
||||
.data_out (gpr_rd_addr),
|
||||
.sel_out (gpr_rd_req_idx),
|
||||
.ready_out (gpr_rd_ready)
|
||||
);
|
||||
|
||||
assign gpr_rd_ready = {NUM_BANKS{pipe_ready_in}};
|
||||
|
||||
always @(*) begin
|
||||
has_collision_n = 0;
|
||||
for (integer i = 0; i < NUM_SRC_OPDS; ++i) begin
|
||||
for (integer j = 1; j < (NUM_SRC_OPDS-i); ++j) begin
|
||||
has_collision_n |= src_valid[i]
|
||||
&& src_valid[j+i]
|
||||
&& (req_bank_idx[i] == req_bank_idx[j+i]);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_SRC_OPDS-1:0] req_fire_in = req_valid_in & req_ready_in;
|
||||
|
||||
assign pipe_data = {
|
||||
scoreboard_if.data.wis,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.rd,
|
||||
scoreboard_if.data.uuid
|
||||
};
|
||||
|
||||
assign scoreboard_if.ready = pipe_ready_in && ~has_collision_n;
|
||||
|
||||
wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1;
|
||||
wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2;
|
||||
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH))
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (scoreboard_if.valid),
|
||||
.ready_in (pipe_ready_in),
|
||||
.data_in ({gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}),
|
||||
.data_out ({gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}),
|
||||
.valid_out(pipe_valid_st1),
|
||||
.ready_out(pipe_ready_st1)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || scoreboard_if.ready) begin
|
||||
data_fetched_st1 <= 0;
|
||||
end else begin
|
||||
data_fetched_st1 <= data_fetched_st1 | req_fire_in;
|
||||
end
|
||||
end
|
||||
|
||||
wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1;
|
||||
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (pipe_valid2_st1),
|
||||
.ready_in (pipe_ready_st1),
|
||||
.data_in ({gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}),
|
||||
.data_out ({gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}),
|
||||
.valid_out(pipe_valid_st2),
|
||||
.ready_out(pipe_ready_st2)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
src_data_m_st2 = src_data_st2;
|
||||
for (integer b = 0; b < NUM_BANKS; ++b) begin
|
||||
if (gpr_rd_valid_st2[b]) begin
|
||||
src_data_m_st2[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || pipe_fire_st2) begin
|
||||
src_data_st2 <= 0;
|
||||
end else begin
|
||||
src_data_st2 <= src_data_m_st2;
|
||||
end
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (pipe_valid_st2),
|
||||
.ready_in (pipe_ready_st2),
|
||||
.data_in ({pipe_data_st2, src_data_m_st2}),
|
||||
.data_out ({
|
||||
operands_if.data.wis,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.PC,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.ex_type,
|
||||
operands_if.data.op_type,
|
||||
operands_if.data.op_args,
|
||||
operands_if.data.rd,
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.rs3_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs1_data
|
||||
}),
|
||||
.valid_out (operands_if.valid),
|
||||
.ready_out (operands_if.ready)
|
||||
);
|
||||
|
||||
wire [PER_BANK_ADDRW-1:0] gpr_wr_addr;
|
||||
if (ISSUE_WIS != 0) begin : g_gpr_wr_addr
|
||||
assign gpr_wr_addr = {writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS], writeback_if.data.wis};
|
||||
end else begin : g_gpr_wr_addr_no_wis
|
||||
assign gpr_wr_addr = writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS];
|
||||
end
|
||||
|
||||
wire [BANK_SEL_WIDTH-1:0] gpr_wr_bank_idx;
|
||||
if (NUM_BANKS != 1) begin : g_gpr_wr_bank_idx
|
||||
assign gpr_wr_bank_idx = writeback_if.data.rd[BANK_SEL_BITS-1:0];
|
||||
end else begin : g_gpr_wr_bank_idx_0
|
||||
assign gpr_wr_bank_idx = '0;
|
||||
end
|
||||
|
||||
for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_gpr_rams
|
||||
wire gpr_wr_enabled;
|
||||
if (BANK_SEL_BITS != 0) begin : g_gpr_wr_enabled_multibanks
|
||||
assign gpr_wr_enabled = writeback_if.valid
|
||||
&& (gpr_wr_bank_idx == BANK_SEL_BITS'(b));
|
||||
end else begin : g_gpr_wr_enabled
|
||||
assign gpr_wr_enabled = writeback_if.valid;
|
||||
end
|
||||
|
||||
wire [BYTEENW-1:0] wren;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin : g_wren
|
||||
assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}};
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (REGS_DATAW),
|
||||
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
|
||||
.OUT_REG (1),
|
||||
.READ_ENABLE (1),
|
||||
.WRENW (BYTEENW),
|
||||
`ifdef GPR_RESET
|
||||
.RESET_RAM (1),
|
||||
`endif
|
||||
.NO_RWCHECK (1)
|
||||
) gpr_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (pipe_fire_st1),
|
||||
.wren (wren),
|
||||
.write (gpr_wr_enabled),
|
||||
.waddr (gpr_wr_addr),
|
||||
.wdata (writeback_if.data.data),
|
||||
.raddr (gpr_rd_addr_st1[b]),
|
||||
.rdata (gpr_rd_data_st2[b])
|
||||
);
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] collisions_r;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
collisions_r <= '0;
|
||||
end else begin
|
||||
collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_ready_in && has_collision_n);
|
||||
end
|
||||
end
|
||||
assign perf_stalls = collisions_r;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
92
hw/rtl/core/VX_pe_switch.sv
Normal file
92
hw/rtl/core/VX_pe_switch.sv
Normal file
|
@ -0,0 +1,92 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_pe_switch import VX_gpu_pkg::*; #(
|
||||
parameter PE_COUNT = 0,
|
||||
parameter NUM_LANES = 0,
|
||||
parameter REQ_OUT_BUF = 0,
|
||||
parameter RSP_OUT_BUF = 0,
|
||||
parameter `STRING ARBITER = "R",
|
||||
parameter PE_SEL_BITS = `CLOG2(PE_COUNT)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [`UP(PE_SEL_BITS)-1:0] pe_sel,
|
||||
VX_execute_if.slave execute_in_if,
|
||||
VX_commit_if.master commit_out_if,
|
||||
VX_execute_if.master execute_out_if[PE_COUNT],
|
||||
VX_commit_if .slave commit_in_if[PE_COUNT]
|
||||
);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam REQ_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `INST_ALU_BITS + $bits(op_args_t) + 1 + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
|
||||
wire [PE_COUNT-1:0] pe_req_valid;
|
||||
wire [PE_COUNT-1:0][REQ_DATAW-1:0] pe_req_data;
|
||||
wire [PE_COUNT-1:0] pe_req_ready;
|
||||
|
||||
VX_stream_switch #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.NUM_OUTPUTS (PE_COUNT),
|
||||
.OUT_BUF (REQ_OUT_BUF)
|
||||
) req_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel_in (pe_sel),
|
||||
.valid_in (execute_in_if.valid),
|
||||
.ready_in (execute_in_if.ready),
|
||||
.data_in (execute_in_if.data),
|
||||
.data_out (pe_req_data),
|
||||
.valid_out (pe_req_valid),
|
||||
.ready_out (pe_req_ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < PE_COUNT; ++i) begin : g_execute_out_if
|
||||
assign execute_out_if[i].valid = pe_req_valid[i];
|
||||
assign execute_out_if[i].data = pe_req_data[i];
|
||||
assign pe_req_ready[i] = execute_out_if[i].ready;
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [PE_COUNT-1:0] pe_rsp_valid;
|
||||
wire [PE_COUNT-1:0][RSP_DATAW-1:0] pe_rsp_data;
|
||||
wire [PE_COUNT-1:0] pe_rsp_ready;
|
||||
|
||||
for (genvar i = 0; i < PE_COUNT; ++i) begin : g_commit_in_if
|
||||
assign pe_rsp_valid[i] = commit_in_if[i].valid;
|
||||
assign pe_rsp_data[i] = commit_in_if[i].data;
|
||||
assign commit_in_if[i].ready = pe_rsp_ready[i];
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (PE_COUNT),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.OUT_BUF (RSP_OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (pe_rsp_valid),
|
||||
.ready_in (pe_rsp_ready),
|
||||
.data_in (pe_rsp_data),
|
||||
.data_out (commit_out_if.data),
|
||||
.valid_out (commit_out_if.valid),
|
||||
.ready_out (commit_out_if.ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -1,79 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_pending_instr #(
|
||||
parameter CTR_WIDTH = 12,
|
||||
parameter ALM_EMPTY = 1,
|
||||
parameter DECR_COUNT = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire incr,
|
||||
input wire [`NW_WIDTH-1:0] incr_wid,
|
||||
input wire [DECR_COUNT-1:0] decr,
|
||||
input wire [DECR_COUNT-1:0][`NW_WIDTH-1:0] decr_wid,
|
||||
input wire [`NW_WIDTH-1:0] alm_empty_wid,
|
||||
output wire empty,
|
||||
output wire alm_empty
|
||||
);
|
||||
localparam COUNTW = `CLOG2(DECR_COUNT+1);
|
||||
|
||||
reg [`NUM_WARPS-1:0][CTR_WIDTH-1:0] pending_instrs;
|
||||
reg [`NUM_WARPS-1:0][COUNTW-1:0] decr_cnt;
|
||||
reg [`NUM_WARPS-1:0][DECR_COUNT-1:0] decr_mask;
|
||||
reg [`NUM_WARPS-1:0] incr_cnt, incr_cnt_n;
|
||||
reg [`NUM_WARPS-1:0] alm_empty_r, empty_r;
|
||||
|
||||
always @(*) begin
|
||||
incr_cnt_n = 0;
|
||||
decr_mask = 0;
|
||||
if (incr) begin
|
||||
incr_cnt_n[incr_wid] = 1;
|
||||
end
|
||||
for (integer i = 0; i < DECR_COUNT; ++i) begin
|
||||
if (decr[i]) begin
|
||||
decr_mask[decr_wid[i]][i] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
wire [COUNTW-1:0] decr_cnt_n;
|
||||
`POP_COUNT(decr_cnt_n, decr_mask[i]);
|
||||
|
||||
wire [CTR_WIDTH-1:0] pending_instrs_n = pending_instrs[i] + CTR_WIDTH'(incr_cnt[i]) - CTR_WIDTH'(decr_cnt[i]);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
incr_cnt[i] <= '0;
|
||||
decr_cnt[i] <= '0;
|
||||
pending_instrs[i] <= '0;
|
||||
alm_empty_r[i] <= 0;
|
||||
empty_r[i] <= 1;
|
||||
end else begin
|
||||
incr_cnt[i] <= incr_cnt_n[i];
|
||||
decr_cnt[i] <= decr_cnt_n;
|
||||
pending_instrs[i] <= pending_instrs_n;
|
||||
alm_empty_r[i] <= (pending_instrs_n == ALM_EMPTY);
|
||||
empty_r[i] <= (pending_instrs_n == 0);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign alm_empty = alm_empty_r[alm_empty_wid];
|
||||
assign empty = (& empty_r);
|
||||
|
||||
endmodule
|
|
@ -14,13 +14,14 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_schedule import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_pipeline_perf_if.schedule perf_schedule_if,
|
||||
output sched_perf_t sched_perf,
|
||||
`endif
|
||||
|
||||
// configuration
|
||||
|
@ -42,6 +43,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
// status
|
||||
output wire busy
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // updated when a warp is activated or disabled
|
||||
|
@ -76,7 +78,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
wire [`NUM_ALU_BLOCKS-1:0][`NW_WIDTH-1:0] branch_wid;
|
||||
wire [`NUM_ALU_BLOCKS-1:0] branch_taken;
|
||||
wire [`NUM_ALU_BLOCKS-1:0][`PC_BITS-1:0] branch_dest;
|
||||
for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin : g_branch_init
|
||||
assign branch_valid[i] = branch_ctl_if[i].valid;
|
||||
assign branch_wid[i] = branch_ctl_if[i].wid;
|
||||
assign branch_taken[i] = branch_ctl_if[i].taken;
|
||||
|
@ -187,7 +189,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
// decode unlock
|
||||
if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin
|
||||
if (decode_sched_if.valid && decode_sched_if.unlock) begin
|
||||
stalled_warps_n[decode_sched_if.wid] = 0;
|
||||
end
|
||||
|
||||
|
@ -287,13 +289,11 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
|
||||
// split/join handling
|
||||
|
||||
`RESET_RELAY (split_join_reset, reset);
|
||||
|
||||
VX_split_join #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID))
|
||||
) split_join (
|
||||
.clk (clk),
|
||||
.reset (split_join_reset),
|
||||
.reset (reset),
|
||||
.valid (warp_ctl_if.valid),
|
||||
.wid (warp_ctl_if.wid),
|
||||
.split (warp_ctl_if.split),
|
||||
|
@ -322,7 +322,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
wire [`NUM_WARPS-1:0][(`NUM_THREADS + `PC_BITS)-1:0] schedule_data;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_schedule_data
|
||||
assign schedule_data[i] = {thread_masks[i], warp_pcs[i]};
|
||||
end
|
||||
|
||||
|
@ -331,61 +331,62 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
schedule_data[schedule_wid][(`NUM_THREADS + `PC_BITS)-5:0]
|
||||
};
|
||||
|
||||
`ifndef NDEBUG
|
||||
localparam GNW_WIDTH = `LOG2UP(`NUM_CLUSTERS * `NUM_CORES * `NUM_WARPS);
|
||||
reg [`UUID_WIDTH-1:0] instr_uuid;
|
||||
wire [GNW_WIDTH-1:0] g_wid = (GNW_WIDTH'(CORE_ID) << `NW_BITS) + GNW_WIDTH'(schedule_wid);
|
||||
`ifdef SV_DPI
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(1, 32'd0));
|
||||
end else if (schedule_fire) begin
|
||||
instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(0, 32'(g_wid)));
|
||||
end
|
||||
end
|
||||
wire [`UUID_WIDTH-1:0] instr_uuid;
|
||||
`ifdef UUID_ENABLE
|
||||
VX_uuid_gen #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.UUID_WIDTH (`UUID_WIDTH)
|
||||
) uuid_gen (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (schedule_fire),
|
||||
.wid (schedule_wid),
|
||||
.uuid (instr_uuid)
|
||||
);
|
||||
`else
|
||||
wire [GNW_WIDTH+16-1:0] w_uuid = {g_wid, 16'(schedule_pc)};
|
||||
always @(*) begin
|
||||
instr_uuid = `UUID_WIDTH'(w_uuid);
|
||||
end
|
||||
`endif
|
||||
`else
|
||||
wire [`UUID_WIDTH-1:0] instr_uuid = '0;
|
||||
assign instr_uuid = '0;
|
||||
`endif
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH)
|
||||
.DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH + `UUID_WIDTH),
|
||||
.SIZE (2), // need to buffer out ready_in
|
||||
.OUT_REG (1) // should be registered for BRAM acces in fetch unit
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (schedule_valid),
|
||||
.ready_in (schedule_ready),
|
||||
.data_in ({schedule_tmask, schedule_pc, schedule_wid}),
|
||||
.data_out ({schedule_if.data.tmask, schedule_if.data.PC, schedule_if.data.wid}),
|
||||
.data_in ({schedule_tmask, schedule_pc, schedule_wid, instr_uuid}),
|
||||
.data_out ({schedule_if.data.tmask, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.uuid}),
|
||||
.valid_out (schedule_if.valid),
|
||||
.ready_out (schedule_if.ready)
|
||||
);
|
||||
|
||||
assign schedule_if.data.uuid = instr_uuid;
|
||||
// Track pending instructions per warp
|
||||
|
||||
`RESET_RELAY (pending_instr_reset, reset);
|
||||
wire [`NUM_WARPS-1:0] pending_warp_empty;
|
||||
wire [`NUM_WARPS-1:0] pending_warp_alm_empty;
|
||||
|
||||
wire no_pending_instr;
|
||||
VX_pending_instr #(
|
||||
.CTR_WIDTH (12),
|
||||
.DECR_COUNT (`ISSUE_WIDTH),
|
||||
.ALM_EMPTY (1)
|
||||
) pending_instr(
|
||||
.clk (clk),
|
||||
.reset (pending_instr_reset),
|
||||
.incr (schedule_if_fire),
|
||||
.incr_wid (schedule_if.data.wid),
|
||||
.decr (commit_sched_if.committed),
|
||||
.decr_wid (commit_sched_if.committed_wid),
|
||||
.alm_empty_wid (sched_csr_if.alm_empty_wid),
|
||||
.alm_empty (sched_csr_if.alm_empty),
|
||||
.empty (no_pending_instr)
|
||||
);
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_pending_sizes
|
||||
VX_pending_size #(
|
||||
.SIZE (4096),
|
||||
.ALM_EMPTY (1)
|
||||
) counter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (schedule_if_fire && (schedule_if.data.wid == `NW_WIDTH'(i))),
|
||||
.decr (commit_sched_if.committed_warps[i]),
|
||||
.empty (pending_warp_empty[i]),
|
||||
.alm_empty (pending_warp_alm_empty[i]),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
end
|
||||
|
||||
assign sched_csr_if.alm_empty = pending_warp_alm_empty[sched_csr_if.alm_empty_wid];
|
||||
|
||||
wire no_pending_instr = (& pending_warp_empty);
|
||||
|
||||
`BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1);
|
||||
|
||||
|
@ -402,7 +403,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
timeout_ctr <= '0;
|
||||
timeout_enable <= 0;
|
||||
end else begin
|
||||
if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin
|
||||
if (decode_sched_if.valid && decode_sched_if.unlock) begin
|
||||
timeout_enable <= 1;
|
||||
end
|
||||
if (timeout_enable && active_warps !=0 && active_warps == stalled_warps) begin
|
||||
|
@ -412,7 +413,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
end
|
||||
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** core%0d-scheduler-timeout: stalled_warps=%b", $time, CORE_ID, stalled_warps))
|
||||
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** %s timeout: stalled_warps=%b", $time, INSTANCE_ID, stalled_warps))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_sched_idles;
|
||||
|
@ -431,8 +432,8 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
assign perf_schedule_if.sched_idles = perf_sched_idles;
|
||||
assign perf_schedule_if.sched_stalls = perf_sched_stalls;
|
||||
assign sched_perf.idles = perf_sched_idles;
|
||||
assign sched_perf.stalls = perf_sched_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -14,39 +14,39 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_scoreboard import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output reg [`PERF_CTR_BITS-1:0] perf_scb_stalls,
|
||||
output reg [`PERF_CTR_BITS-1:0] perf_units_uses [`NUM_EX_UNITS],
|
||||
output reg [`PERF_CTR_BITS-1:0] perf_sfu_uses [`NUM_SFU_UNITS],
|
||||
output reg [`PERF_CTR_BITS-1:0] perf_stalls,
|
||||
output reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_units_uses,
|
||||
output reg [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_sfu_uses,
|
||||
`endif
|
||||
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_ibuffer_if.slave ibuffer_if [`NUM_WARPS],
|
||||
VX_scoreboard_if.master scoreboard_if [`ISSUE_WIDTH]
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_ibuffer_if.slave ibuffer_if [PER_ISSUE_WARPS],
|
||||
VX_scoreboard_if.master scoreboard_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam NUM_SRC_OPDS = 3;
|
||||
localparam NUM_OPDS = NUM_SRC_OPDS + 1;
|
||||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4) + 1;
|
||||
|
||||
VX_ibuffer_if staging_if [PER_ISSUE_WARPS]();
|
||||
reg [PER_ISSUE_WARPS-1:0] operands_ready;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`NUM_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle;
|
||||
reg [PER_ISSUE_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle;
|
||||
wire [`NUM_EX_UNITS-1:0] perf_units_per_cycle, perf_units_per_cycle_r;
|
||||
|
||||
reg [`NUM_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle;
|
||||
reg [PER_ISSUE_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle;
|
||||
wire [`NUM_SFU_UNITS-1:0] perf_sfu_per_cycle, perf_sfu_per_cycle_r;
|
||||
|
||||
wire [`NUM_WARPS-1:0] perf_issue_stalls_per_cycle;
|
||||
wire [`CLOG2(`NUM_WARPS+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r;
|
||||
|
||||
`POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle);
|
||||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (`NUM_EX_UNITS),
|
||||
.N (`NUM_WARPS),
|
||||
.N (PER_ISSUE_WARPS),
|
||||
.OP ("|")
|
||||
) perf_units_reduce (
|
||||
.data_in (perf_inuse_units_per_cycle),
|
||||
|
@ -55,26 +55,32 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (`NUM_SFU_UNITS),
|
||||
.N (`NUM_WARPS),
|
||||
.N (PER_ISSUE_WARPS),
|
||||
.OP ("|")
|
||||
) perf_sfu_reduce (
|
||||
.data_in (perf_inuse_sfu_per_cycle),
|
||||
.data_out (perf_sfu_per_cycle)
|
||||
);
|
||||
|
||||
`BUFFER(perf_stalls_per_cycle_r, perf_stalls_per_cycle);
|
||||
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT));
|
||||
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT));
|
||||
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
|
||||
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
|
||||
|
||||
always @(posedge clk) begin
|
||||
wire [PER_ISSUE_WARPS-1:0] stg_valid_in;
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stg_valid_in
|
||||
assign stg_valid_in[w] = staging_if[w].valid;
|
||||
end
|
||||
|
||||
wire perf_stall_per_cycle = (|stg_valid_in) && ~(|(stg_valid_in & operands_ready));
|
||||
|
||||
always @(posedge clk) begin : g_perf_stalls
|
||||
if (reset) begin
|
||||
perf_scb_stalls <= '0;
|
||||
perf_stalls <= '0;
|
||||
end else begin
|
||||
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(perf_stalls_per_cycle_r);
|
||||
perf_stalls <= perf_stalls + `PERF_CTR_BITS'(perf_stall_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_perf_units_uses
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_units_uses[i] <= '0;
|
||||
|
@ -84,7 +90,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin : g_perf_sfu_uses
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_sfu_uses[i] <= '0;
|
||||
|
@ -95,139 +101,75 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
`endif
|
||||
|
||||
VX_ibuffer_if staging_if [`NUM_WARPS]();
|
||||
wire [`NUM_WARPS-1:0][3:0] staging_opds_busy;
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (1)
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stanging_bufs
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) stanging_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (ibuffer_if[i].valid),
|
||||
.data_in (ibuffer_if[i].data),
|
||||
.ready_in (ibuffer_if[i].ready),
|
||||
.valid_out(staging_if[i].valid),
|
||||
.data_out (staging_if[i].data),
|
||||
.ready_out(staging_if[i].ready)
|
||||
.valid_in (ibuffer_if[w].valid),
|
||||
.data_in (ibuffer_if[w].data),
|
||||
.ready_in (ibuffer_if[w].ready),
|
||||
.valid_out(staging_if[w].valid),
|
||||
.data_out (staging_if[w].data),
|
||||
.ready_out(staging_if[w].ready)
|
||||
);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_scoreboard
|
||||
reg [`NUM_REGS-1:0] inuse_regs;
|
||||
|
||||
reg [3:0] operands_busy_r, operands_busy_n;
|
||||
reg [NUM_OPDS-1:0] operands_busy, operands_busy_n;
|
||||
|
||||
localparam iw = i % `ISSUE_WIDTH;
|
||||
localparam wis = i / `ISSUE_WIDTH;
|
||||
wire ibuffer_fire = ibuffer_if[w].valid && ibuffer_if[w].ready;
|
||||
|
||||
wire ibuffer_fire = ibuffer_if[i].valid && ibuffer_if[i].ready;
|
||||
wire staging_fire = staging_if[w].valid && staging_if[w].ready;
|
||||
|
||||
wire staging_fire = staging_if[i].valid && staging_if[i].ready;
|
||||
wire writeback_fire = writeback_if.valid
|
||||
&& (writeback_if.data.wis == ISSUE_WIS_W'(w))
|
||||
&& writeback_if.data.eop;
|
||||
|
||||
wire writeback_fire = writeback_if[iw].valid
|
||||
&& (writeback_if[iw].data.wis == ISSUE_WIS_W'(wis))
|
||||
&& writeback_if[iw].data.eop;
|
||||
wire [NUM_OPDS-1:0][`NR_BITS-1:0] ibuf_opds, stg_opds;
|
||||
assign ibuf_opds = {ibuffer_if[w].data.rs3, ibuffer_if[w].data.rs2, ibuffer_if[w].data.rs1, ibuffer_if[w].data.rd};
|
||||
assign stg_opds = {staging_if[w].data.rs3, staging_if[w].data.rs2, staging_if[w].data.rs1, staging_if[w].data.rd};
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units;
|
||||
reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu;
|
||||
|
||||
reg [`SFU_WIDTH-1:0] sfu_type;
|
||||
always @(*) begin
|
||||
case (staging_if[i].data.op_type)
|
||||
`INST_SFU_CSRRW,
|
||||
`INST_SFU_CSRRS,
|
||||
`INST_SFU_CSRRC: sfu_type = `SFU_CSRS;
|
||||
default: sfu_type = `SFU_WCTL;
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
perf_inuse_units_per_cycle[i] = '0;
|
||||
perf_inuse_sfu_per_cycle[i] = '0;
|
||||
if (staging_if[i].valid) begin
|
||||
if (operands_busy_r[0]) begin
|
||||
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rd]] = 1;
|
||||
if (inuse_units[staging_if[i].data.rd] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rd]] = 1;
|
||||
end
|
||||
end
|
||||
if (operands_busy_r[1]) begin
|
||||
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs1]] = 1;
|
||||
if (inuse_units[staging_if[i].data.rs1] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs1]] = 1;
|
||||
end
|
||||
end
|
||||
if (operands_busy_r[2]) begin
|
||||
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs2]] = 1;
|
||||
if (inuse_units[staging_if[i].data.rs2] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs2]] = 1;
|
||||
end
|
||||
end
|
||||
if (operands_busy_r[3]) begin
|
||||
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs3]] = 1;
|
||||
if (inuse_units[staging_if[i].data.rs3] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs3]] = 1;
|
||||
perf_inuse_units_per_cycle[w] = '0;
|
||||
perf_inuse_sfu_per_cycle[w] = '0;
|
||||
for (integer i = 0; i < NUM_OPDS; ++i) begin
|
||||
if (staging_if[w].valid && operands_busy[i]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[stg_opds[i]]] = 1;
|
||||
if (inuse_units[stg_opds[i]] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[stg_opds[i]]] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
assign perf_issue_stalls_per_cycle[i] = staging_if[i].valid && ~staging_if[i].ready;
|
||||
`endif
|
||||
|
||||
always @(*) begin
|
||||
operands_busy_n = operands_busy_r;
|
||||
if (ibuffer_fire) begin
|
||||
operands_busy_n = {
|
||||
inuse_regs[ibuffer_if[i].data.rs3],
|
||||
inuse_regs[ibuffer_if[i].data.rs2],
|
||||
inuse_regs[ibuffer_if[i].data.rs1],
|
||||
inuse_regs[ibuffer_if[i].data.rd]
|
||||
};
|
||||
end
|
||||
if (writeback_fire) begin
|
||||
for (integer i = 0; i < NUM_OPDS; ++i) begin
|
||||
operands_busy_n[i] = operands_busy[i];
|
||||
if (ibuffer_fire) begin
|
||||
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rd) begin
|
||||
operands_busy_n[0] = 0;
|
||||
end
|
||||
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs1) begin
|
||||
operands_busy_n[1] = 0;
|
||||
end
|
||||
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs2) begin
|
||||
operands_busy_n[2] = 0;
|
||||
end
|
||||
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs3) begin
|
||||
operands_busy_n[3] = 0;
|
||||
end
|
||||
end else begin
|
||||
if (writeback_if[iw].data.rd == staging_if[i].data.rd) begin
|
||||
operands_busy_n[0] = 0;
|
||||
end
|
||||
if (writeback_if[iw].data.rd == staging_if[i].data.rs1) begin
|
||||
operands_busy_n[1] = 0;
|
||||
end
|
||||
if (writeback_if[iw].data.rd == staging_if[i].data.rs2) begin
|
||||
operands_busy_n[2] = 0;
|
||||
end
|
||||
if (writeback_if[iw].data.rd == staging_if[i].data.rs3) begin
|
||||
operands_busy_n[3] = 0;
|
||||
operands_busy_n[i] = inuse_regs[ibuf_opds[i]];
|
||||
end
|
||||
if (writeback_fire) begin
|
||||
if (ibuffer_fire) begin
|
||||
if (writeback_if.data.rd == ibuf_opds[i]) begin
|
||||
operands_busy_n[i] = 0;
|
||||
end
|
||||
end else begin
|
||||
if (writeback_if.data.rd == stg_opds[i]) begin
|
||||
operands_busy_n[i] = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
if (staging_fire && staging_if[i].data.wb) begin
|
||||
if (staging_if[i].data.rd == ibuffer_if[i].data.rd) begin
|
||||
operands_busy_n[0] = 1;
|
||||
end
|
||||
if (staging_if[i].data.rd == ibuffer_if[i].data.rs1) begin
|
||||
operands_busy_n[1] = 1;
|
||||
end
|
||||
if (staging_if[i].data.rd == ibuffer_if[i].data.rs2) begin
|
||||
operands_busy_n[2] = 1;
|
||||
end
|
||||
if (staging_if[i].data.rd == ibuffer_if[i].data.rs3) begin
|
||||
operands_busy_n[3] = 1;
|
||||
if (staging_fire && staging_if[w].data.wb && staging_if[w].data.rd == ibuf_opds[i]) begin
|
||||
operands_busy_n[i] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -237,25 +179,24 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
inuse_regs <= '0;
|
||||
end else begin
|
||||
if (writeback_fire) begin
|
||||
inuse_regs[writeback_if[iw].data.rd] <= 0;
|
||||
inuse_regs[writeback_if.data.rd] <= 0;
|
||||
end
|
||||
if (staging_fire && staging_if[i].data.wb) begin
|
||||
inuse_regs[staging_if[i].data.rd] <= 1;
|
||||
if (staging_fire && staging_if[w].data.wb) begin
|
||||
inuse_regs[staging_if[w].data.rd] <= 1;
|
||||
end
|
||||
end
|
||||
operands_busy_r <= operands_busy_n;
|
||||
operands_busy <= operands_busy_n;
|
||||
operands_ready[w] <= ~(| operands_busy_n);
|
||||
`ifdef PERF_ENABLE
|
||||
if (staging_fire && staging_if[i].data.wb) begin
|
||||
inuse_units[staging_if[i].data.rd] <= staging_if[i].data.ex_type;
|
||||
if (staging_if[i].data.ex_type == `EX_SFU) begin
|
||||
inuse_sfu[staging_if[i].data.rd] <= sfu_type;
|
||||
if (staging_fire && staging_if[w].data.wb) begin
|
||||
inuse_units[staging_if[w].data.rd] <= staging_if[w].data.ex_type;
|
||||
if (staging_if[w].data.ex_type == `EX_SFU) begin
|
||||
inuse_sfu[staging_if[w].data.rd] <= op_to_sfu_type(staging_if[w].data.op_type);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
||||
assign staging_opds_busy[i] = operands_busy_r;
|
||||
|
||||
`ifdef SIMULATION
|
||||
reg [31:0] timeout_ctr;
|
||||
|
||||
|
@ -263,11 +204,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
if (reset) begin
|
||||
timeout_ctr <= '0;
|
||||
end else begin
|
||||
if (staging_if[i].valid && ~staging_if[i].ready) begin
|
||||
if (staging_if[w].valid && ~staging_if[w].ready) begin
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
`TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
$time, CORE_ID, i, {staging_if[i].data.PC, 1'b0}, staging_if[i].data.tmask, timeout_ctr,
|
||||
operands_busy_r, staging_if[i].data.uuid));
|
||||
`TRACE(3, ("%t: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
$time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
|
||||
operands_busy, staging_if[w].data.uuid))
|
||||
`endif
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
end else if (ibuffer_fire) begin
|
||||
|
@ -277,59 +218,54 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
|
||||
("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
||||
$time, CORE_ID, i, {staging_if[i].data.PC, 1'b0}, staging_if[i].data.tmask, timeout_ctr,
|
||||
operands_busy_r, staging_if[i].data.uuid));
|
||||
("%t: *** %s timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
||||
$time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
|
||||
operands_busy, staging_if[w].data.uuid))
|
||||
|
||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[iw].data.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
$time, CORE_ID, i, {writeback_if[iw].data.PC, 1'b0}, writeback_if[iw].data.tmask, writeback_if[iw].data.rd, writeback_if[iw].data.uuid));
|
||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if.data.rd] != 0,
|
||||
("%t: *** %s invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
$time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid))
|
||||
`endif
|
||||
|
||||
end
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
wire [PER_ISSUE_WARPS-1:0] arb_valid_in;
|
||||
wire [PER_ISSUE_WARPS-1:0][DATAW-1:0] arb_data_in;
|
||||
wire [PER_ISSUE_WARPS-1:0] arb_ready_in;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
wire [ISSUE_RATIO-1:0] valid_in;
|
||||
wire [ISSUE_RATIO-1:0][DATAW-1:0] data_in;
|
||||
wire [ISSUE_RATIO-1:0] ready_in;
|
||||
|
||||
for (genvar j = 0; j < ISSUE_RATIO; ++j) begin
|
||||
wire operands_ready = ~(| staging_opds_busy[j * `ISSUE_WIDTH + i]);
|
||||
assign valid_in[j] = staging_if[j * `ISSUE_WIDTH + i].valid && operands_ready;
|
||||
assign data_in[j] = staging_if[j * `ISSUE_WIDTH + i].data;
|
||||
assign staging_if[j * `ISSUE_WIDTH + i].ready = ready_in[j] && operands_ready;
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (ISSUE_RATIO),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (2)
|
||||
) out_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.data_in (data_in),
|
||||
.data_out ({
|
||||
scoreboard_if[i].data.uuid,
|
||||
scoreboard_if[i].data.tmask,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.op_type,
|
||||
scoreboard_if[i].data.op_args,
|
||||
scoreboard_if[i].data.wb,
|
||||
scoreboard_if[i].data.rd,
|
||||
scoreboard_if[i].data.rs1,
|
||||
scoreboard_if[i].data.rs2,
|
||||
scoreboard_if[i].data.rs3
|
||||
}),
|
||||
.valid_out (scoreboard_if[i].valid),
|
||||
.ready_out (scoreboard_if[i].ready),
|
||||
.sel_out (scoreboard_if[i].data.wis)
|
||||
);
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_arb_data_in
|
||||
assign arb_valid_in[w] = staging_if[w].valid && operands_ready[w];
|
||||
assign arb_data_in[w] = staging_if[w].data;
|
||||
assign staging_if[w].ready = arb_ready_in[w] && operands_ready[w];
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (PER_ISSUE_WARPS),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER ("C"),
|
||||
.OUT_BUF (3)
|
||||
) out_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (arb_valid_in),
|
||||
.ready_in (arb_ready_in),
|
||||
.data_in (arb_data_in),
|
||||
.data_out ({
|
||||
scoreboard_if.data.uuid,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.rd,
|
||||
scoreboard_if.data.rs1,
|
||||
scoreboard_if.data.rs2,
|
||||
scoreboard_if.data.rs3
|
||||
}),
|
||||
.valid_out (scoreboard_if.valid),
|
||||
.ready_out (scoreboard_if.ready),
|
||||
.sel_out (scoreboard_if.data.wis)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_sfu_unit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -39,25 +40,26 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_warp_ctl_if.master warp_ctl_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_SFU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
|
||||
localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `PC_BITS + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + 1;
|
||||
localparam RSP_ARB_IDX_WCTL = 0;
|
||||
localparam RSP_ARB_IDX_CSRS = 1;
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_SFU_LANES;
|
||||
localparam PE_COUNT = 2;
|
||||
localparam PE_SEL_BITS = `CLOG2(PE_COUNT);
|
||||
localparam PE_IDX_WCTL = 0;
|
||||
localparam PE_IDX_CSRS = 1;
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_execute_if[BLOCK_SIZE]();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_BUF (1)
|
||||
.OUT_BUF (3)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -65,60 +67,58 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
.execute_if (per_block_execute_if)
|
||||
);
|
||||
|
||||
wire [RSP_ARB_SIZE-1:0] rsp_arb_valid_in;
|
||||
wire [RSP_ARB_SIZE-1:0] rsp_arb_ready_in;
|
||||
wire [RSP_ARB_SIZE-1:0][RSP_ARB_DATAW-1:0] rsp_arb_data_in;
|
||||
|
||||
// Warp control block
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_execute_if();
|
||||
) pe_execute_if[PE_COUNT]();
|
||||
|
||||
VX_commit_if#(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_commit_if();
|
||||
) pe_commit_if[PE_COUNT]();
|
||||
|
||||
assign wctl_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_WCTL(per_block_execute_if[0].data.op_type);
|
||||
assign wctl_execute_if.data = per_block_execute_if[0].data;
|
||||
reg [PE_SEL_BITS-1:0] pe_select;
|
||||
always @(*) begin
|
||||
pe_select = PE_IDX_WCTL;
|
||||
if (`INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type))
|
||||
pe_select = PE_IDX_CSRS;
|
||||
end
|
||||
|
||||
`RESET_RELAY (wctl_reset, reset);
|
||||
VX_pe_switch #(
|
||||
.PE_COUNT (PE_COUNT),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.ARBITER ("R"),
|
||||
.REQ_OUT_BUF(0),
|
||||
.RSP_OUT_BUF(3)
|
||||
) pe_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.pe_sel (pe_select),
|
||||
.execute_in_if (per_block_execute_if[0]),
|
||||
.commit_out_if (per_block_commit_if[0]),
|
||||
.execute_out_if (pe_execute_if),
|
||||
.commit_in_if (pe_commit_if)
|
||||
);
|
||||
|
||||
VX_wctl_unit #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_unit (
|
||||
.clk (clk),
|
||||
.reset (wctl_reset),
|
||||
.execute_if (wctl_execute_if),
|
||||
.reset (reset),
|
||||
.execute_if (pe_execute_if[PE_IDX_WCTL]),
|
||||
.warp_ctl_if(warp_ctl_if),
|
||||
.commit_if (wctl_commit_if)
|
||||
.commit_if (pe_commit_if[PE_IDX_WCTL])
|
||||
);
|
||||
|
||||
assign rsp_arb_valid_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.valid;
|
||||
assign rsp_arb_data_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.data;
|
||||
assign wctl_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_WCTL];
|
||||
|
||||
// CSR unit
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_execute_if();
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_commit_if();
|
||||
|
||||
assign csr_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type);
|
||||
assign csr_execute_if.data = per_block_execute_if[0].data;
|
||||
|
||||
`RESET_RELAY (csr_reset, reset);
|
||||
|
||||
VX_csr_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_unit (
|
||||
.clk (clk),
|
||||
.reset (csr_reset),
|
||||
.reset (reset),
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
.execute_if (csr_execute_if),
|
||||
.execute_if (pe_execute_if[PE_IDX_CSRS]),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
|
@ -131,57 +131,17 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
.sched_csr_if (sched_csr_if),
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.commit_if (csr_commit_if)
|
||||
);
|
||||
|
||||
assign rsp_arb_valid_in[RSP_ARB_IDX_CSRS] = csr_commit_if.valid;
|
||||
assign rsp_arb_data_in[RSP_ARB_IDX_CSRS] = csr_commit_if.data;
|
||||
assign csr_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_CSRS];
|
||||
|
||||
// can accept new request?
|
||||
|
||||
reg sfu_req_ready;
|
||||
always @(*) begin
|
||||
case (per_block_execute_if[0].data.op_type)
|
||||
`INST_SFU_CSRRW,
|
||||
`INST_SFU_CSRRS,
|
||||
`INST_SFU_CSRRC: sfu_req_ready = csr_execute_if.ready;
|
||||
default: sfu_req_ready = wctl_execute_if.ready;
|
||||
endcase
|
||||
end
|
||||
assign per_block_execute_if[0].ready = sfu_req_ready;
|
||||
|
||||
// response arbitration
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) arb_commit_if[BLOCK_SIZE]();
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_arb_valid_in),
|
||||
.ready_in (rsp_arb_ready_in),
|
||||
.data_in (rsp_arb_data_in),
|
||||
.data_out (arb_commit_if[0].data),
|
||||
.valid_out (arb_commit_if[0].valid),
|
||||
.ready_out (arb_commit_if[0].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
.commit_if (pe_commit_if[PE_IDX_CSRS])
|
||||
);
|
||||
|
||||
VX_gather_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_BUF (1)
|
||||
.OUT_BUF (3)
|
||||
) gather_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.commit_in_if (arb_commit_if),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.commit_in_if (per_block_commit_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_split_join import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -31,7 +31,7 @@ module VX_split_join import VX_gpu_pkg::*; #(
|
|||
input wire [`NW_WIDTH-1:0] stack_wid,
|
||||
output wire [`DV_STACK_SIZEW-1:0] stack_ptr
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_data [`NUM_WARPS-1:0];
|
||||
wire [`DV_STACK_SIZEW-1:0] ipdom_q_ptr [`NUM_WARPS-1:0];
|
||||
|
@ -45,16 +45,14 @@ module VX_split_join import VX_gpu_pkg::*; #(
|
|||
wire ipdom_push = valid && split.valid && split.is_dvg;
|
||||
wire ipdom_pop = valid && sjoin.valid && sjoin_is_dvg;
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
`RESET_RELAY (ipdom_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_ipdom_stacks
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH (`NUM_THREADS+`PC_BITS),
|
||||
.DEPTH (`DV_STACK_SIZE)
|
||||
.DEPTH (`DV_STACK_SIZE),
|
||||
.OUT_REG (0)
|
||||
) ipdom_stack (
|
||||
.clk (clk),
|
||||
.reset (ipdom_reset),
|
||||
.reset (reset),
|
||||
.q0 (ipdom_q0),
|
||||
.q1 (ipdom_q1),
|
||||
.d (ipdom_data[i]),
|
||||
|
|
|
@ -1,387 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VX_TRACE_VH
|
||||
`define VX_TRACE_VH
|
||||
|
||||
`ifdef SIMULATION
|
||||
|
||||
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
|
||||
case (ex_type)
|
||||
`EX_ALU: `TRACE(level, ("ALU"));
|
||||
`EX_LSU: `TRACE(level, ("LSU"));
|
||||
`EX_FPU: `TRACE(level, ("FPU"));
|
||||
`EX_SFU: `TRACE(level, ("SFU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_ex_op(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
case (op_args.alu.xtype)
|
||||
`ALU_TYPE_ARITH: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDIW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLIW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLIW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAIW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDW"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUBW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDI"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLI"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLI"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAI"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLTI"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XORI"));
|
||||
`INST_ALU_OR: `TRACE(level, ("ORI"));
|
||||
`INST_ALU_AND: `TRACE(level, ("ANDI"));
|
||||
`INST_ALU_LUI: `TRACE(level, ("LUI"));
|
||||
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADD"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUB"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLL"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRL"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRA"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLT"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XOR"));
|
||||
`INST_ALU_OR: `TRACE(level, ("OR"));
|
||||
`INST_ALU_AND: `TRACE(level, ("AND"));
|
||||
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"));
|
||||
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
`ALU_TYPE_BRANCH: begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: `TRACE(level, ("BEQ"));
|
||||
`INST_BR_NE: `TRACE(level, ("BNE"));
|
||||
`INST_BR_LT: `TRACE(level, ("BLT"));
|
||||
`INST_BR_GE: `TRACE(level, ("BGE"));
|
||||
`INST_BR_LTU: `TRACE(level, ("BLTU"));
|
||||
`INST_BR_GEU: `TRACE(level, ("BGEU"));
|
||||
`INST_BR_JAL: `TRACE(level, ("JAL"));
|
||||
`INST_BR_JALR: `TRACE(level, ("JALR"));
|
||||
`INST_BR_ECALL: `TRACE(level, ("ECALL"));
|
||||
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"));
|
||||
`INST_BR_URET: `TRACE(level, ("URET"));
|
||||
`INST_BR_SRET: `TRACE(level, ("SRET"));
|
||||
`INST_BR_MRET: `TRACE(level, ("MRET"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`ALU_TYPE_MULDIV: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MULW"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIVW"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVUW"));
|
||||
`INST_M_REM: `TRACE(level, ("REMW"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMUW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MUL"));
|
||||
`INST_M_MULH: `TRACE(level, ("MULH"));
|
||||
`INST_M_MULHSU:`TRACE(level, ("MULHSU"));
|
||||
`INST_M_MULHU: `TRACE(level, ("MULHU"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIV"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVU"));
|
||||
`INST_M_REM: `TRACE(level, ("REM"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`EX_LSU: begin
|
||||
if (op_args.lsu.is_float) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LW: `TRACE(level, ("FLW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("FLD"));
|
||||
`INST_LSU_SW: `TRACE(level, ("FSW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("FSD"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: `TRACE(level, ("LB"));
|
||||
`INST_LSU_LH: `TRACE(level, ("LH"));
|
||||
`INST_LSU_LW: `TRACE(level, ("LW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("LD"));
|
||||
`INST_LSU_LBU:`TRACE(level, ("LBU"));
|
||||
`INST_LSU_LHU:`TRACE(level, ("LHU"));
|
||||
`INST_LSU_LWU:`TRACE(level, ("LWU"));
|
||||
`INST_LSU_SB: `TRACE(level, ("SB"));
|
||||
`INST_LSU_SH: `TRACE(level, ("SH"));
|
||||
`INST_LSU_SW: `TRACE(level, ("SW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("SD"));
|
||||
`INST_LSU_FENCE:`TRACE(level,("FENCE"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_FPU: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FADD.S"));
|
||||
end
|
||||
`INST_FPU_SUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FSUB.S"));
|
||||
end
|
||||
`INST_FPU_MUL: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMUL.D"));
|
||||
else
|
||||
`TRACE(level, ("FMUL.S"));
|
||||
end
|
||||
`INST_FPU_DIV: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FDIV.D"));
|
||||
else
|
||||
`TRACE(level, ("FDIV.S"));
|
||||
end
|
||||
`INST_FPU_SQRT: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FSQRT.D"));
|
||||
else
|
||||
`TRACE(level, ("FSQRT.S"));
|
||||
end
|
||||
`INST_FPU_MADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FMADD.S"));
|
||||
end
|
||||
`INST_FPU_MSUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FMSUB.S"));
|
||||
end
|
||||
`INST_FPU_NMADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FNMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMADD.S"));
|
||||
end
|
||||
`INST_FPU_NMSUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FNMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMSUB.S"));
|
||||
end
|
||||
`INST_FPU_CMP: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.D"));
|
||||
1: `TRACE(level, ("FLT.D"));
|
||||
2: `TRACE(level, ("FEQ.D"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.S"));
|
||||
1: `TRACE(level, ("FLT.S"));
|
||||
2: `TRACE(level, ("FEQ.S"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FCVT.D.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.D"));
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2I: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.D"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2U: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.D"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_I2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.W"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.W"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_U2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.WU"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.WU"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MISC: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.D"));
|
||||
1: `TRACE(level, ("FSGNJN.D"));
|
||||
2: `TRACE(level, ("FSGNJX.D"));
|
||||
3: `TRACE(level, ("FCLASS.D"));
|
||||
4: `TRACE(level, ("FMV.X.D"));
|
||||
5: `TRACE(level, ("FMV.D.X"));
|
||||
6: `TRACE(level, ("FMIN.D"));
|
||||
7: `TRACE(level, ("FMAX.D"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.S"));
|
||||
1: `TRACE(level, ("FSGNJN.S"));
|
||||
2: `TRACE(level, ("FSGNJX.S"));
|
||||
3: `TRACE(level, ("FCLASS.S"));
|
||||
4: `TRACE(level, ("FMV.X.S"));
|
||||
5: `TRACE(level, ("FMV.S.X"));
|
||||
6: `TRACE(level, ("FMIN.S"));
|
||||
7: `TRACE(level, ("FMAX.S"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`EX_SFU: begin
|
||||
case (`INST_SFU_BITS'(op_type))
|
||||
`INST_SFU_TMC: `TRACE(level, ("TMC"));
|
||||
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"));
|
||||
`INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end
|
||||
`INST_SFU_JOIN: `TRACE(level, ("JOIN"));
|
||||
`INST_SFU_BAR: `TRACE(level, ("BAR"));
|
||||
`INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end
|
||||
`INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
|
||||
`INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
|
||||
`INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_op_args(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
`TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm));
|
||||
end
|
||||
`EX_LSU: begin
|
||||
`TRACE(level, (", offset=0x%0h", op_args.lsu.offset));
|
||||
end
|
||||
`EX_FPU: begin
|
||||
`TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm));
|
||||
end
|
||||
`EX_SFU: begin
|
||||
if (`INST_SFU_IS_CSR(op_type)) begin
|
||||
`TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm));
|
||||
end
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
|
||||
case (addr)
|
||||
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"));
|
||||
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"));
|
||||
`VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0"));
|
||||
`VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1"));
|
||||
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
`endif
|
||||
|
||||
`endif // VX_TRACE_VH
|
44
hw/rtl/core/VX_uuid_gen.sv
Normal file
44
hw/rtl/core/VX_uuid_gen.sv
Normal file
|
@ -0,0 +1,44 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_uuid_gen import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter UUID_WIDTH = 48
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire incr,
|
||||
input wire [`NW_WIDTH-1:0] wid,
|
||||
output wire [UUID_WIDTH-1:0] uuid
|
||||
);
|
||||
localparam GNW_WIDTH = UUID_WIDTH - 32;
|
||||
reg [31:0] uuid_cntrs [0:`NUM_WARPS-1];
|
||||
reg [`NUM_WARPS-1:0] has_uuid_cntrs;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
has_uuid_cntrs <= '0;
|
||||
end else if (incr) begin
|
||||
has_uuid_cntrs[wid] <= 1;
|
||||
end
|
||||
if (incr) begin
|
||||
uuid_cntrs[wid] <= has_uuid_cntrs[wid] ? (uuid_cntrs[wid] + 1) : 1;
|
||||
end
|
||||
end
|
||||
|
||||
wire [GNW_WIDTH-1:0] g_wid = (GNW_WIDTH'(CORE_ID) << `NW_BITS) + GNW_WIDTH'(wid);
|
||||
assign uuid = {g_wid, (has_uuid_cntrs[wid] ? uuid_cntrs[wid] : 0)};
|
||||
|
||||
endmodule
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_wctl_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -27,7 +27,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam LANE_BITS = `CLOG2(NUM_LANES);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
|
@ -50,9 +50,9 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
wire is_bar = (execute_if.data.op_type == `INST_SFU_BAR);
|
||||
|
||||
wire [`UP(LANE_BITS)-1:0] tid;
|
||||
if (LANE_BITS != 0) begin
|
||||
if (LANE_BITS != 0) begin : g_tid
|
||||
assign tid = execute_if.data.tid[0 +: LANE_BITS];
|
||||
end else begin
|
||||
end else begin : g_no_tid
|
||||
assign tid = 0;
|
||||
end
|
||||
|
||||
|
@ -63,7 +63,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
wire not_pred = execute_if.data.op_args.wctl.is_neg;
|
||||
|
||||
wire [NUM_LANES-1:0] taken;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_taken
|
||||
assign taken[i] = (execute_if.data.rs1_data[i][0] ^ not_pred);
|
||||
end
|
||||
|
||||
|
@ -131,7 +131,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
// wspawn
|
||||
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_wspawn_wmask
|
||||
assign wspawn_wmask[i] = (i < rs1_data[`NW_BITS:0]) && (i != execute_if.data.wid);
|
||||
end
|
||||
assign wspawn.valid = is_wspawn;
|
||||
|
@ -162,7 +162,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
assign warp_ctl_if.sjoin = sjoin_r;
|
||||
assign warp_ctl_if.barrier = barrier_r;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_commit_if
|
||||
assign commit_if.data.data[i] = `XLEN'(dvstack_ptr);
|
||||
end
|
||||
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Modified port of cast module from fpnew Libray
|
||||
// Modified port of cast module from fpnew Libray
|
||||
// reference: https://github.com/pulp-platform/fpnew
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
@ -22,7 +22,8 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
parameter LATENCY = 1,
|
||||
parameter INT_WIDTH = 32,
|
||||
parameter MAN_BITS = 23,
|
||||
parameter EXP_BITS = 8
|
||||
parameter EXP_BITS = 8,
|
||||
parameter OUT_REG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -35,10 +36,10 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
input wire is_signed,
|
||||
|
||||
input wire [31:0] dataa,
|
||||
output wire [31:0] result,
|
||||
output wire [31:0] result,
|
||||
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags
|
||||
);
|
||||
);
|
||||
// Constants
|
||||
localparam EXP_BIAS = 2**(EXP_BITS-1)-1;
|
||||
|
||||
|
@ -55,11 +56,11 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
localparam FMT_SHIFT_COMPENSATION = S_MAN_WIDTH - 1 - MAN_BITS;
|
||||
localparam NUM_FP_STICKY = 2 * S_MAN_WIDTH - MAN_BITS - 1; // removed mantissa, 1. and R
|
||||
localparam NUM_INT_STICKY = 2 * S_MAN_WIDTH - INT_WIDTH; // removed int and R
|
||||
|
||||
|
||||
// Input processing
|
||||
|
||||
fclass_t fclass;
|
||||
VX_fp_classifier #(
|
||||
|
||||
fclass_t fclass;
|
||||
VX_fp_classifier #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_classifier (
|
||||
|
@ -69,9 +70,9 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
);
|
||||
|
||||
wire [S_MAN_WIDTH-1:0] input_mant;
|
||||
wire [S_EXP_WIDTH-1:0] input_exp;
|
||||
wire [S_EXP_WIDTH-1:0] input_exp;
|
||||
wire input_sign;
|
||||
|
||||
|
||||
wire i2f_sign = dataa[INT_WIDTH-1];
|
||||
wire f2i_sign = dataa[INT_WIDTH-1] && is_signed;
|
||||
wire [S_MAN_WIDTH-1:0] f2i_mantissa = f2i_sign ? (-dataa) : dataa;
|
||||
|
@ -81,7 +82,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
assign input_sign = is_itof ? f2i_sign : i2f_sign;
|
||||
|
||||
// Pipeline stage0
|
||||
|
||||
|
||||
wire is_itof_s0;
|
||||
wire is_signed_s0;
|
||||
wire [2:0] rnd_mode_s0;
|
||||
|
@ -92,7 +93,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `INST_FRM_BITS + 1 + $bits(fclass_t) + 1 + S_EXP_WIDTH + S_MAN_WIDTH),
|
||||
.DEPTH (LATENCY > 2)
|
||||
.DEPTH (LATENCY > 1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -100,7 +101,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
.data_in ({is_itof, is_signed, frm, fclass, input_sign, input_exp, input_mant}),
|
||||
.data_out ({is_itof_s0, is_signed_s0, rnd_mode_s0, fclass_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0})
|
||||
);
|
||||
|
||||
|
||||
// Normalization
|
||||
|
||||
wire [LZC_RESULT_WIDTH-1:0] renorm_shamt_s0; // renormalization shift amount
|
||||
|
@ -113,12 +114,12 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
.data_out (renorm_shamt_s0),
|
||||
.valid_out (mant_is_nonzero_s0)
|
||||
);
|
||||
|
||||
|
||||
wire mant_is_zero_s0 = ~mant_is_nonzero_s0;
|
||||
|
||||
wire [S_MAN_WIDTH-1:0] input_mant_n_s0; // normalized input mantissa
|
||||
wire [S_MAN_WIDTH-1:0] input_mant_n_s0; // normalized input mantissa
|
||||
wire [S_EXP_WIDTH-1:0] input_exp_n_s0; // unbiased true exponent
|
||||
|
||||
|
||||
// Realign input mantissa, append zeroes if destination is wider
|
||||
assign input_mant_n_s0 = encoded_mant_s0 << renorm_shamt_s0;
|
||||
|
||||
|
@ -140,7 +141,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `INST_FRM_BITS + 1 + $bits(fclass_t) + 1 + 1 + S_MAN_WIDTH + S_EXP_WIDTH),
|
||||
.DEPTH (LATENCY > 1)
|
||||
.DEPTH (LATENCY > 2)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -169,30 +170,30 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
wire of_before_round_s1 = overflow;
|
||||
|
||||
// Pipeline stage2
|
||||
|
||||
|
||||
wire is_itof_s2;
|
||||
wire is_signed_s2;
|
||||
wire [2:0] rnd_mode_s2;
|
||||
fclass_t fclass_s2;
|
||||
fclass_t fclass_s2;
|
||||
wire mant_is_zero_s2;
|
||||
wire input_sign_s2;
|
||||
wire [2*S_MAN_WIDTH:0] destination_mant_s2;
|
||||
wire [EXP_BITS-1:0] final_exp_s2;
|
||||
wire of_before_round_s2;
|
||||
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + `INST_FRM_BITS + $bits(fclass_t) + 1 + 1 + (2*S_MAN_WIDTH+1) + EXP_BITS + 1),
|
||||
.DEPTH (LATENCY > 3)
|
||||
.DEPTH (LATENCY > 0)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({is_itof_s1, is_signed_s1, rnd_mode_s1, fclass_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}),
|
||||
.data_out ({is_itof_s2, is_signed_s2, rnd_mode_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2})
|
||||
);
|
||||
|
||||
);
|
||||
|
||||
// Rouding and classification
|
||||
|
||||
|
||||
wire [MAN_BITS-1:0] final_mant_s2; // mantissa after adjustments
|
||||
wire [INT_WIDTH-1:0] final_int_s2; // integer shifted in position
|
||||
wire [1:0] f2i_round_sticky_bits_s2, i2f_round_sticky_bits_s2;
|
||||
|
@ -237,20 +238,20 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
wire is_itof_s3;
|
||||
wire is_signed_s3;
|
||||
fclass_t fclass_s3;
|
||||
fclass_t fclass_s3;
|
||||
wire mant_is_zero_s3;
|
||||
wire input_sign_s3;
|
||||
wire rounded_sign_s3;
|
||||
wire [INT_WIDTH-1:0] rounded_abs_s3;
|
||||
wire of_before_round_s3;
|
||||
wire of_before_round_s3;
|
||||
wire f2i_round_has_sticky_s3;
|
||||
wire i2f_round_has_sticky_s3;
|
||||
|
||||
`UNUSED_VAR (fclass_s3)
|
||||
`UNUSED_VAR (fclass_s3)
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + $bits(fclass_t) + 1 + 1 + 32 + 1 + 1 + 1 + 1),
|
||||
.DEPTH (LATENCY > 4)
|
||||
.DEPTH (LATENCY > 3)
|
||||
) pipe_reg3 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -258,7 +259,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
.data_in ({is_itof_s2, is_signed_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2, f2i_round_has_sticky_s2, i2f_round_has_sticky_s2}),
|
||||
.data_out ({is_itof_s3, is_signed_s3, fclass_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3, f2i_round_has_sticky_s3, i2f_round_has_sticky_s3})
|
||||
);
|
||||
|
||||
|
||||
// Assemble regular result, nan box short ones. Int zeroes need to be detected
|
||||
wire [INT_WIDTH-1:0] fmt_result_s3 = mant_is_zero_s3 ? 0 : {rounded_sign_s3, rounded_abs_s3[EXP_BITS+MAN_BITS-1:0]};
|
||||
|
||||
|
@ -278,18 +279,18 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
f2i_special_result_s3[INT_WIDTH-2:0] = 2**(INT_WIDTH-1) - 1; // alone yields 2**(31)-1
|
||||
f2i_special_result_s3[INT_WIDTH-1] = ~is_signed_s3; // for unsigned casts yields 2**31
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
|
||||
wire f2i_result_is_special_s3 = fclass_s3.is_nan
|
||||
wire f2i_result_is_special_s3 = fclass_s3.is_nan
|
||||
| fclass_s3.is_inf
|
||||
| of_before_round_s3
|
||||
| (input_sign_s3 & ~is_signed_s3 & ~rounded_int_res_zero_s3);
|
||||
|
||||
|
||||
fflags_t f2i_special_status_s3;
|
||||
fflags_t i2f_status_s3, f2i_status_s3;
|
||||
fflags_t tmp_fflags_s3;
|
||||
|
||||
|
||||
// All integer special cases are invalid
|
||||
assign f2i_special_status_s3 = {1'b1, 4'h0};
|
||||
|
||||
|
@ -306,7 +307,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (32 + `FP_FLAGS_BITS),
|
||||
.DEPTH (LATENCY > 0)
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg4 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Modified port of noncomp module from fpnew Libray
|
||||
// Modified port of noncomp module from fpnew Libray
|
||||
// reference: https://github.com/pulp-platform/fpnew
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
@ -19,9 +19,10 @@
|
|||
`ifdef FPU_DSP
|
||||
|
||||
module VX_fncp_unit import VX_fpu_pkg::*; #(
|
||||
parameter LATENCY = 2,
|
||||
parameter LATENCY = 1,
|
||||
parameter EXP_BITS = 8,
|
||||
parameter MAN_BITS = 23
|
||||
parameter MAN_BITS = 23,
|
||||
parameter OUT_REG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -33,10 +34,10 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
input wire [31:0] dataa,
|
||||
input wire [31:0] datab,
|
||||
output wire [31:0] result,
|
||||
output wire [31:0] result,
|
||||
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags
|
||||
);
|
||||
);
|
||||
localparam NEG_INF = 32'h00000001,
|
||||
NEG_NORM = 32'h00000002,
|
||||
NEG_SUBNORM = 32'h00000004,
|
||||
|
@ -55,15 +56,15 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
wire a_smaller, ab_equal;
|
||||
|
||||
// Setup
|
||||
assign a_sign = dataa[31];
|
||||
assign a_sign = dataa[31];
|
||||
assign a_exponent = dataa[30:23];
|
||||
assign a_mantissa = dataa[22:0];
|
||||
|
||||
assign b_sign = datab[31];
|
||||
assign b_sign = datab[31];
|
||||
assign b_exponent = datab[30:23];
|
||||
assign b_mantissa = datab[22:0];
|
||||
|
||||
VX_fp_classifier #(
|
||||
VX_fp_classifier #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class_a (
|
||||
|
@ -72,7 +73,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
.clss_o (a_fclass)
|
||||
);
|
||||
|
||||
VX_fp_classifier #(
|
||||
VX_fp_classifier #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class_b (
|
||||
|
@ -82,7 +83,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
);
|
||||
|
||||
assign a_smaller = (dataa < datab) ^ (a_sign || b_sign);
|
||||
assign ab_equal = (dataa == datab)
|
||||
assign ab_equal = (dataa == datab)
|
||||
|| (a_fclass.is_zero && b_fclass.is_zero); // +0 == -0
|
||||
|
||||
// Pipeline stage0
|
||||
|
@ -101,54 +102,54 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (4 + 2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fclass_t) + 1 + 1),
|
||||
.DEPTH (LATENCY > 1)
|
||||
.DEPTH (LATENCY > 0)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({op_mod, dataa, datab, a_sign, b_sign, a_exponent, a_mantissa, a_fclass, b_fclass, a_smaller, ab_equal}),
|
||||
.data_out ({op_mod_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_fclass_s0, b_fclass_s0, a_smaller_s0, ab_equal_s0})
|
||||
);
|
||||
);
|
||||
|
||||
// FCLASS
|
||||
reg [31:0] fclass_mask_s0; // generate a 10-bit mask for integer reg
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
if (a_fclass_s0.is_normal) begin
|
||||
fclass_mask_s0 = a_sign_s0 ? NEG_NORM : POS_NORM;
|
||||
end
|
||||
end
|
||||
else if (a_fclass_s0.is_inf) begin
|
||||
fclass_mask_s0 = a_sign_s0 ? NEG_INF : POS_INF;
|
||||
end
|
||||
end
|
||||
else if (a_fclass_s0.is_zero) begin
|
||||
fclass_mask_s0 = a_sign_s0 ? NEG_ZERO : POS_ZERO;
|
||||
end
|
||||
end
|
||||
else if (a_fclass_s0.is_subnormal) begin
|
||||
fclass_mask_s0 = a_sign_s0 ? NEG_SUBNORM : POS_SUBNORM;
|
||||
end
|
||||
end
|
||||
else if (a_fclass_s0.is_nan) begin
|
||||
fclass_mask_s0 = {22'h0, a_fclass_s0.is_quiet, a_fclass_s0.is_signaling, 8'h0};
|
||||
end
|
||||
else begin
|
||||
end
|
||||
else begin
|
||||
fclass_mask_s0 = QUT_NAN;
|
||||
end
|
||||
end
|
||||
|
||||
// Min/Max
|
||||
// Min/Max
|
||||
reg [31:0] fminmax_res_s0;
|
||||
always @(*) begin
|
||||
if (a_fclass_s0.is_nan && b_fclass_s0.is_nan)
|
||||
fminmax_res_s0 = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
|
||||
else if (a_fclass_s0.is_nan)
|
||||
else if (a_fclass_s0.is_nan)
|
||||
fminmax_res_s0 = datab_s0;
|
||||
else if (b_fclass_s0.is_nan)
|
||||
else if (b_fclass_s0.is_nan)
|
||||
fminmax_res_s0 = dataa_s0;
|
||||
else begin
|
||||
else begin
|
||||
// FMIN, FMAX
|
||||
fminmax_res_s0 = (op_mod_s0[0] ^ a_smaller_s0) ? dataa_s0 : datab_s0;
|
||||
end
|
||||
end
|
||||
|
||||
// Sign injection
|
||||
// Sign injection
|
||||
reg [31:0] fsgnj_res_s0; // result of sign injection
|
||||
always @(*) begin
|
||||
case (op_mod_s0[1:0])
|
||||
|
@ -158,12 +159,12 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
endcase
|
||||
end
|
||||
|
||||
// Comparison
|
||||
// Comparison
|
||||
reg fcmp_res_s0; // result of comparison
|
||||
reg fcmp_fflags_NV_s0; // comparison fflags
|
||||
always @(*) begin
|
||||
case (op_mod_s0[1:0])
|
||||
0: begin // LE
|
||||
0: begin // LE
|
||||
if (a_fclass_s0.is_nan || b_fclass_s0.is_nan) begin
|
||||
fcmp_res_s0 = 0;
|
||||
fcmp_fflags_NV_s0 = 1;
|
||||
|
@ -179,12 +180,12 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
end else begin
|
||||
fcmp_res_s0 = (a_smaller_s0 & ~ab_equal_s0);
|
||||
fcmp_fflags_NV_s0 = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
2: begin // EQ
|
||||
if (a_fclass_s0.is_nan || b_fclass_s0.is_nan) begin
|
||||
fcmp_res_s0 = 0;
|
||||
fcmp_fflags_NV_s0 = a_fclass_s0.is_signaling | b_fclass_s0.is_signaling;
|
||||
fcmp_fflags_NV_s0 = a_fclass_s0.is_signaling | b_fclass_s0.is_signaling;
|
||||
end else begin
|
||||
fcmp_res_s0 = ab_equal_s0;
|
||||
fcmp_fflags_NV_s0 = 0;
|
||||
|
@ -192,7 +193,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
end
|
||||
default: begin
|
||||
fcmp_res_s0 = 'x;
|
||||
fcmp_fflags_NV_s0 = 'x;
|
||||
fcmp_fflags_NV_s0 = 'x;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
@ -216,7 +217,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
// FMV
|
||||
result_s0 = dataa_s0;
|
||||
fflags_NV_s0 = 0;
|
||||
end
|
||||
end
|
||||
6,7: begin
|
||||
// MIN/MAX
|
||||
result_s0 = fminmax_res_s0;
|
||||
|
@ -229,7 +230,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (32 + 1),
|
||||
.DEPTH (LATENCY > 0)
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,7 +21,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
parameter TAG_WIDTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
@ -36,7 +36,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
input wire is_signed,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -45,56 +45,69 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
);
|
||||
localparam DATAW = 32 + `INST_FRM_BITS + 1 + 1;
|
||||
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][DATAW-1:0] data_in;
|
||||
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
fflags_t [NUM_LANES-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][31:0] pe_data_in;
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in
|
||||
assign data_in[i][0 +: 32] = dataa[i];
|
||||
assign data_in[i][32 +: `INST_FRM_BITS] = frm;
|
||||
assign data_in[i][32 + `INST_FRM_BITS +: 1] = is_itof;
|
||||
assign data_in[i][32 + `INST_FRM_BITS + 1 +: 1] = is_signed;
|
||||
end
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FCVT),
|
||||
.DATA_IN_WIDTH(32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.DATA_IN_WIDTH (DATAW),
|
||||
.DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0)
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (2)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.data_in (dataa),
|
||||
.data_in (data_in),
|
||||
.tag_in ({mask_in, tag_in}),
|
||||
.ready_in (ready_in),
|
||||
.pe_enable (pe_enable),
|
||||
.pe_data_in (pe_data_in),
|
||||
.pe_data_out(pe_data_out),
|
||||
.pe_data_out(pe_data_in),
|
||||
.pe_data_in (pe_data_out),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.tag_out ({mask_out, tag_out}),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`UNUSED_VAR (pe_data_in)
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result
|
||||
assign result[i] = data_out[i][0 +: 32];
|
||||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fcvt_units
|
||||
VX_fcvt_unit #(
|
||||
.LATENCY (`LATENCY_FCVT)
|
||||
.LATENCY (`LATENCY_FCVT),
|
||||
.OUT_REG (1)
|
||||
) fcvt_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (pe_enable),
|
||||
.frm (frm),
|
||||
.is_itof (is_itof),
|
||||
.is_signed (is_signed),
|
||||
.frm (pe_data_in[0][32 +: `INST_FRM_BITS]),
|
||||
.is_itof (pe_data_in[0][32 + `INST_FRM_BITS +: 1]),
|
||||
.is_signed (pe_data_in[0][32 + `INST_FRM_BITS + 1 +: 1]),
|
||||
.dataa (pe_data_in[i][0 +: 32]),
|
||||
.result (pe_data_out[i][0 +: 32]),
|
||||
.fflags (pe_data_out[i][32 +: `FP_FLAGS_BITS])
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,7 +21,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
parameter TAG_WIDTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset,
|
||||
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
|
@ -31,10 +31,10 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
input wire [TAG_WIDTH-1:0] tag_in,
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -44,30 +44,33 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
output wire valid_out,
|
||||
input wire ready_out
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
localparam DATAW = 2 * 32 + `INST_FRM_BITS;
|
||||
|
||||
wire [NUM_LANES-1:0][2*32-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][DATAW-1:0] data_in;
|
||||
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in
|
||||
assign data_in[i][0 +: 32] = dataa[i];
|
||||
assign data_in[i][32 +: 32] = datab[i];
|
||||
assign data_in[i][64 +: `INST_FRM_BITS] = frm;
|
||||
end
|
||||
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FDIV),
|
||||
.DATA_IN_WIDTH(2*32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.DATA_IN_WIDTH (DATAW),
|
||||
.DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0)
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (2)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -76,15 +79,17 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
.tag_in ({mask_in, tag_in}),
|
||||
.ready_in (ready_in),
|
||||
.pe_enable (pe_enable),
|
||||
.pe_data_in (pe_data_in),
|
||||
.pe_data_out(pe_data_out),
|
||||
.pe_data_out(pe_data_in),
|
||||
.pe_data_in (pe_data_out),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.tag_out ({mask_out, tag_out}),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`UNUSED_VAR (pe_data_in)
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result
|
||||
assign result[i] = data_out[i][0 +: 32];
|
||||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
end
|
||||
|
@ -92,8 +97,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
fflags_t [NUM_LANES-1:0] per_lane_fflags;
|
||||
|
||||
`ifdef QUARTUS
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs
|
||||
acl_fdiv fdiv (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
@ -103,15 +108,15 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
.q (pe_data_out[i][0 +: 32])
|
||||
);
|
||||
assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
assign has_fflags = 0;
|
||||
assign per_lane_fflags = 'x;
|
||||
`UNUSED_VAR (fflags_out)
|
||||
|
||||
`elsif VIVADO
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs
|
||||
wire [3:0] tuser;
|
||||
xil_fdiv fdiv (
|
||||
.aclk (clk),
|
||||
|
@ -131,21 +136,21 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
assign has_fflags = 1;
|
||||
assign per_lane_fflags = fflags_out;
|
||||
|
||||
`else
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs
|
||||
reg [63:0] r;
|
||||
`UNUSED_VAR (r)
|
||||
`UNUSED_VAR (r)
|
||||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
dpi_fdiv (
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]},
|
||||
frm,
|
||||
r,
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]}, // a
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]}, // b
|
||||
pe_data_in[0][64 +: `INST_FRM_BITS], // frm
|
||||
r,
|
||||
f
|
||||
);
|
||||
end
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -15,7 +15,7 @@
|
|||
|
||||
`ifdef FPU_DPI
|
||||
|
||||
module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
||||
module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter TAG_WIDTH = 1,
|
||||
parameter OUT_BUF = 0
|
||||
|
@ -29,7 +29,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0] mask_in,
|
||||
|
||||
input wire [TAG_WIDTH-1:0] tag_in,
|
||||
|
||||
|
||||
input wire [`INST_FPU_BITS-1:0] op_type,
|
||||
input wire [`INST_FMT_BITS-1:0] fmt,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
@ -37,7 +37,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
|
||||
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
|
||||
input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
|
||||
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
|
||||
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -55,31 +55,30 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
localparam FPC_BITS = `LOG2UP(NUM_FPC);
|
||||
|
||||
localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH;
|
||||
|
||||
|
||||
wire [NUM_FPC-1:0] per_core_ready_in;
|
||||
wire [NUM_FPC-1:0][NUM_LANES-1:0][`XLEN-1:0] per_core_result;
|
||||
wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out;
|
||||
reg [NUM_FPC-1:0] per_core_ready_out;
|
||||
wire [NUM_FPC-1:0] per_core_valid_out;
|
||||
wire [NUM_FPC-1:0] per_core_has_fflags;
|
||||
fflags_t [NUM_FPC-1:0] per_core_fflags;
|
||||
wire [NUM_FPC-1:0] per_core_valid_out;
|
||||
wire [NUM_FPC-1:0] per_core_has_fflags;
|
||||
fflags_t [NUM_FPC-1:0] per_core_fflags;
|
||||
|
||||
wire div_ready_in, sqrt_ready_in;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_result, sqrt_result;
|
||||
wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out;
|
||||
wire div_ready_out, sqrt_ready_out;
|
||||
wire div_valid_out, sqrt_valid_out;
|
||||
wire div_has_fflags, sqrt_has_fflags;
|
||||
wire div_valid_out, sqrt_valid_out;
|
||||
wire div_has_fflags, sqrt_has_fflags;
|
||||
fflags_t div_fflags, sqrt_fflags;
|
||||
|
||||
reg [FPC_BITS-1:0] core_select;
|
||||
|
||||
reg is_fadd, is_fsub, is_fmul, is_fmadd, is_fmsub, is_fnmadd, is_fnmsub;
|
||||
reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f;
|
||||
reg dst_fmt, int_fmt;
|
||||
reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f;
|
||||
|
||||
reg [NUM_LANES-1:0][63:0] operands [3];
|
||||
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
operands[0][i] = 64'(dataa[i]);
|
||||
|
@ -88,43 +87,30 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
`UNUSED_VAR (fmt)
|
||||
wire f_fmt = fmt[0];
|
||||
wire i_fmt = fmt[1];
|
||||
|
||||
always @(*) begin
|
||||
is_fadd = 0;
|
||||
is_fsub = 0;
|
||||
is_fmul = 0;
|
||||
is_fsub = 0;
|
||||
is_fmul = 0;
|
||||
is_fmadd = 0;
|
||||
is_fmsub = 0;
|
||||
is_fnmadd = 0;
|
||||
is_fnmsub = 0;
|
||||
is_div = 0;
|
||||
is_fnmadd = 0;
|
||||
is_fnmsub = 0;
|
||||
is_div = 0;
|
||||
is_fcmp = 0;
|
||||
is_itof = 0;
|
||||
is_utof = 0;
|
||||
is_ftoi = 0;
|
||||
is_ftou = 0;
|
||||
is_f2f = 0;
|
||||
|
||||
dst_fmt = 0;
|
||||
int_fmt = 0;
|
||||
|
||||
`ifdef FLEN_64
|
||||
dst_fmt = fmt[0];
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
int_fmt = fmt[1];
|
||||
`endif
|
||||
|
||||
case (op_type)
|
||||
`INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end
|
||||
`INST_FPU_SUB: begin core_select = FPU_FMA; is_fsub = 1; end
|
||||
`INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = ~i_fmt; is_fsub = i_fmt; end
|
||||
`INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = ~i_fmt; is_fmsub = i_fmt; end
|
||||
`INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = ~i_fmt; is_fnmsub = i_fmt; end
|
||||
`INST_FPU_MUL: begin core_select = FPU_FMA; is_fmul = 1; end
|
||||
`INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = 1; end
|
||||
`INST_FPU_MSUB: begin core_select = FPU_FMA; is_fmsub = 1; end
|
||||
`INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = 1; end
|
||||
`INST_FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end
|
||||
`INST_FPU_DIV: begin core_select = FPU_DIVSQRT; is_div = 1; end
|
||||
`INST_FPU_SQRT: begin core_select = FPU_DIVSQRT; end
|
||||
`INST_FPU_CMP: begin core_select = FPU_NCP; is_fcmp = 1; end
|
||||
|
@ -132,23 +118,23 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
`INST_FPU_F2U: begin core_select = FPU_CVT; is_ftou = 1; end
|
||||
`INST_FPU_I2F: begin core_select = FPU_CVT; is_itof = 1; end
|
||||
`INST_FPU_U2F: begin core_select = FPU_CVT; is_utof = 1; end
|
||||
`INST_FPU_F2F: begin core_select = FPU_CVT; is_f2f = 1; end
|
||||
`INST_FPU_F2F: begin core_select = FPU_CVT; is_f2f = 1; end
|
||||
default: begin core_select = FPU_NCP; end
|
||||
endcase
|
||||
end
|
||||
|
||||
generate
|
||||
begin : fma
|
||||
|
||||
generate
|
||||
begin : g_fma
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fma;
|
||||
wire [NUM_LANES-1:0][63:0] result_fadd;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsub;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmul;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmadd;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmsub;
|
||||
wire [NUM_LANES-1:0][63:0] result_fnmadd;
|
||||
wire [NUM_LANES-1:0][63:0] result_fnmsub;
|
||||
|
||||
reg [NUM_LANES-1:0][63:0] result_fadd;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsub;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmul;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmadd;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmsub;
|
||||
reg [NUM_LANES-1:0][63:0] result_fnmadd;
|
||||
reg [NUM_LANES-1:0][63:0] result_fnmsub;
|
||||
|
||||
fflags_t [NUM_LANES-1:0] fflags_fma;
|
||||
fflags_t [NUM_LANES-1:0] fflags_fadd;
|
||||
fflags_t [NUM_LANES-1:0] fflags_fsub;
|
||||
|
@ -162,33 +148,33 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
wire fma_ready = per_core_ready_out[FPU_FMA] || ~per_core_valid_out[FPU_FMA];
|
||||
wire fma_fire = fma_valid && fma_ready;
|
||||
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_fadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]);
|
||||
dpi_fsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]);
|
||||
dpi_fmul (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fmul[i], fflags_fmul[i]);
|
||||
dpi_fmadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmadd[i], fflags_fmadd[i]);
|
||||
dpi_fmsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmsub[i], fflags_fmsub[i]);
|
||||
dpi_fnmadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmadd[i], fflags_fnmadd[i]);
|
||||
dpi_fnmsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmsub[i], fflags_fnmsub[i]);
|
||||
dpi_fadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]);
|
||||
dpi_fsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]);
|
||||
dpi_fmul (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fmul[i], fflags_fmul[i]);
|
||||
dpi_fmadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmadd[i], fflags_fmadd[i]);
|
||||
dpi_fmsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmsub[i], fflags_fmsub[i]);
|
||||
dpi_fnmadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmadd[i], fflags_fnmadd[i]);
|
||||
dpi_fnmsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmsub[i], fflags_fnmsub[i]);
|
||||
|
||||
result_fma[i] = is_fadd ? result_fadd[i][`XLEN-1:0] :
|
||||
is_fsub ? result_fsub[i][`XLEN-1:0] :
|
||||
is_fmul ? result_fmul[i][`XLEN-1:0] :
|
||||
is_fmadd ? result_fmadd[i][`XLEN-1:0] :
|
||||
is_fmadd ? result_fmadd[i][`XLEN-1:0] :
|
||||
is_fmsub ? result_fmsub[i][`XLEN-1:0] :
|
||||
is_fnmadd ? result_fnmadd[i][`XLEN-1:0] :
|
||||
is_fnmadd ? result_fnmadd[i][`XLEN-1:0] :
|
||||
is_fnmsub ? result_fnmsub[i][`XLEN-1:0] :
|
||||
'0;
|
||||
|
||||
fflags_fma[i] = is_fadd ? fflags_fadd[i] :
|
||||
is_fsub ? fflags_fsub[i] :
|
||||
is_fmul ? fflags_fmul[i] :
|
||||
is_fmadd ? fflags_fmadd[i] :
|
||||
is_fmadd ? fflags_fmadd[i] :
|
||||
is_fmsub ? fflags_fmsub[i] :
|
||||
is_fnmadd ? fflags_fnmadd[i] :
|
||||
is_fnmsub ? fflags_fnmsub[i] :
|
||||
'0;
|
||||
is_fnmadd ? fflags_fnmadd[i] :
|
||||
is_fnmsub ? fflags_fnmsub[i] :
|
||||
'0;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -213,20 +199,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
begin : fdiv
|
||||
generate
|
||||
begin : g_fdiv
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fdiv_r;
|
||||
wire [NUM_LANES-1:0][63:0] result_fdiv;
|
||||
reg [NUM_LANES-1:0][63:0] result_fdiv;
|
||||
fflags_t [NUM_LANES-1:0] fflags_fdiv;
|
||||
|
||||
wire fdiv_valid = (valid_in && core_select == FPU_DIVSQRT) && is_div;
|
||||
wire fdiv_ready = div_ready_out || ~div_valid_out;
|
||||
wire fdiv_fire = fdiv_valid && fdiv_ready;
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_fdiv (fdiv_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]);
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_fdiv (fdiv_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]);
|
||||
result_fdiv_r[i] = result_fdiv[i][`XLEN-1:0];
|
||||
end
|
||||
end
|
||||
|
@ -252,20 +238,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
begin : fsqrt
|
||||
generate
|
||||
begin : g_fsqrt
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fsqrt_r;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsqrt;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsqrt;
|
||||
fflags_t [NUM_LANES-1:0] fflags_fsqrt;
|
||||
|
||||
wire fsqrt_valid = (valid_in && core_select == FPU_DIVSQRT) && ~is_div;
|
||||
wire fsqrt_ready = sqrt_ready_out || ~sqrt_valid_out;
|
||||
wire fsqrt_ready = sqrt_ready_out || ~sqrt_valid_out;
|
||||
wire fsqrt_fire = fsqrt_valid && fsqrt_ready;
|
||||
|
||||
always @(*) begin
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_fsqrt (fsqrt_fire, int'(dst_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]);
|
||||
dpi_fsqrt (fsqrt_fire, int'(f_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]);
|
||||
result_fsqrt_r[i] = result_fsqrt[i][`XLEN-1:0];
|
||||
end
|
||||
end
|
||||
|
@ -292,15 +278,15 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
endgenerate
|
||||
|
||||
generate
|
||||
begin : fcvt
|
||||
begin : g_fcvt
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fcvt;
|
||||
wire [NUM_LANES-1:0][63:0] result_itof;
|
||||
wire [NUM_LANES-1:0][63:0] result_utof;
|
||||
wire [NUM_LANES-1:0][63:0] result_ftoi;
|
||||
wire [NUM_LANES-1:0][63:0] result_ftou;
|
||||
wire [NUM_LANES-1:0][63:0] result_f2f;
|
||||
|
||||
reg [NUM_LANES-1:0][63:0] result_itof;
|
||||
reg [NUM_LANES-1:0][63:0] result_utof;
|
||||
reg [NUM_LANES-1:0][63:0] result_ftoi;
|
||||
reg [NUM_LANES-1:0][63:0] result_ftou;
|
||||
reg [NUM_LANES-1:0][63:0] result_f2f;
|
||||
|
||||
fflags_t [NUM_LANES-1:0] fflags_fcvt;
|
||||
fflags_t [NUM_LANES-1:0] fflags_itof;
|
||||
fflags_t [NUM_LANES-1:0] fflags_utof;
|
||||
|
@ -310,20 +296,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
wire fcvt_valid = (valid_in && core_select == FPU_CVT);
|
||||
wire fcvt_ready = per_core_ready_out[FPU_CVT] || ~per_core_valid_out[FPU_CVT];
|
||||
wire fcvt_fire = fcvt_valid && fcvt_ready;
|
||||
|
||||
always @(*) begin
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_itof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]);
|
||||
dpi_utof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]);
|
||||
dpi_ftoi (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]);
|
||||
dpi_ftou (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]);
|
||||
dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]);
|
||||
dpi_itof (fcvt_fire, int'(f_fmt), int'(i_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]);
|
||||
dpi_utof (fcvt_fire, int'(f_fmt), int'(i_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]);
|
||||
dpi_ftoi (fcvt_fire, int'(i_fmt), int'(f_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]);
|
||||
dpi_ftou (fcvt_fire, int'(i_fmt), int'(f_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]);
|
||||
dpi_f2f (fcvt_fire, int'(f_fmt), operands[0][i], result_f2f[i]);
|
||||
|
||||
result_fcvt[i] = is_itof ? result_itof[i][`XLEN-1:0] :
|
||||
is_utof ? result_utof[i][`XLEN-1:0] :
|
||||
is_ftoi ? result_ftoi[i][`XLEN-1:0] :
|
||||
is_ftou ? result_ftou[i][`XLEN-1:0] :
|
||||
is_f2f ? result_f2f[i][`XLEN-1:0] :
|
||||
is_ftou ? result_ftou[i][`XLEN-1:0] :
|
||||
is_f2f ? result_f2f[i][`XLEN-1:0] :
|
||||
'0;
|
||||
|
||||
fflags_fcvt[i] = is_itof ? fflags_itof[i] :
|
||||
|
@ -355,19 +341,19 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
begin : fncp
|
||||
generate
|
||||
begin : g_fncp
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fncp;
|
||||
wire [NUM_LANES-1:0][63:0] result_fclss;
|
||||
wire [NUM_LANES-1:0][63:0] result_flt;
|
||||
wire [NUM_LANES-1:0][63:0] result_fle;
|
||||
wire [NUM_LANES-1:0][63:0] result_feq;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmin;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmax;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsgnj;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsgnjn;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsgnjx;
|
||||
reg [NUM_LANES-1:0][63:0] result_fclss;
|
||||
reg [NUM_LANES-1:0][63:0] result_flt;
|
||||
reg [NUM_LANES-1:0][63:0] result_fle;
|
||||
reg [NUM_LANES-1:0][63:0] result_feq;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmin;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmax;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsgnj;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsgnjn;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsgnjx;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmvx;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmvf;
|
||||
|
||||
|
@ -381,20 +367,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
wire fncp_valid = (valid_in && core_select == FPU_NCP);
|
||||
wire fncp_ready = per_core_ready_out[FPU_NCP] || ~per_core_valid_out[FPU_NCP];
|
||||
wire fncp_fire = fncp_valid && fncp_ready;
|
||||
|
||||
always @(*) begin
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_fclss (fncp_fire, int'(dst_fmt), operands[0][i], result_fclss[i]);
|
||||
dpi_fle (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]);
|
||||
dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]);
|
||||
dpi_feq (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]);
|
||||
dpi_fmin (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]);
|
||||
dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]);
|
||||
dpi_fsgnj (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnj[i]);
|
||||
dpi_fsgnjn (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]);
|
||||
dpi_fsgnjx (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]);
|
||||
result_fmvx[i] = dst_fmt ? operands[0][i] : 64'($signed(operands[0][i][31:0])); // sign-extension
|
||||
result_fmvf[i] = dst_fmt ? operands[0][i] : (operands[0][i] | 64'hffffffff00000000); // nan-boxing
|
||||
dpi_fclss (fncp_fire, int'(f_fmt), operands[0][i], result_fclss[i]);
|
||||
dpi_fle (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]);
|
||||
dpi_flt (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]);
|
||||
dpi_feq (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]);
|
||||
dpi_fmin (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]);
|
||||
dpi_fmax (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]);
|
||||
dpi_fsgnj (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnj[i]);
|
||||
dpi_fsgnjn (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]);
|
||||
dpi_fsgnjx (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]);
|
||||
result_fmvx[i] = f_fmt ? operands[0][i] : 64'($signed(operands[0][i][31:0])); // sign-extension
|
||||
result_fmvf[i] = f_fmt ? operands[0][i] : (operands[0][i] | 64'hffffffff00000000); // nan-boxing
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -431,7 +417,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
.data_in ({fncp_valid, tag_in, has_fflags_fncp, result_fncp, fflags_merged}),
|
||||
.data_out ({per_core_valid_out[FPU_NCP], per_core_tag_out[FPU_NCP], per_core_has_fflags[FPU_NCP], per_core_result[FPU_NCP], per_core_fflags[FPU_NCP]})
|
||||
);
|
||||
|
||||
|
||||
assign per_core_ready_in[FPU_NCP] = fncp_ready;
|
||||
|
||||
end
|
||||
|
@ -443,15 +429,15 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("P"),
|
||||
.OUT_BUF (0)
|
||||
) div_sqrt_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in ({sqrt_valid_out, div_valid_out}),
|
||||
.valid_in ({sqrt_valid_out, div_valid_out}),
|
||||
.ready_in ({sqrt_ready_out, div_ready_out}),
|
||||
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
|
||||
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
|
||||
{div_result, div_has_fflags, div_fflags, div_tag_out}}),
|
||||
.data_out ({per_core_result[FPU_DIVSQRT], per_core_has_fflags[FPU_DIVSQRT], per_core_fflags[FPU_DIVSQRT], per_core_tag_out[FPU_DIVSQRT]}),
|
||||
.valid_out (per_core_valid_out[FPU_DIVSQRT]),
|
||||
|
@ -463,19 +449,19 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
|
||||
wire [NUM_FPC-1:0][RSP_DATAW-1:0] per_core_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_FPC; ++i) begin
|
||||
for (genvar i = 0; i < NUM_FPC; ++i) begin : g_per_core_data_out
|
||||
assign per_core_data_out[i] = {per_core_result[i], per_core_has_fflags[i], per_core_fflags[i], per_core_tag_out[i]};
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_FPC),
|
||||
.DATAW (RSP_DATAW),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_valid_out),
|
||||
.valid_in (per_core_valid_out),
|
||||
.ready_in (per_core_ready_out),
|
||||
.data_in (per_core_data_out),
|
||||
.data_out ({result, has_fflags, fflags, tag_out}),
|
||||
|
|
|
@ -51,68 +51,39 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
localparam FPU_DIVSQRT = 1;
|
||||
localparam FPU_CVT = 2;
|
||||
localparam FPU_NCP = 3;
|
||||
localparam NUM_FPC = 4;
|
||||
localparam FPC_BITS = `LOG2UP(NUM_FPC);
|
||||
localparam NUM_FPCORES = 4;
|
||||
localparam FPCORES_BITS = `LOG2UP(NUM_FPCORES);
|
||||
|
||||
localparam REQ_DATAW = NUM_LANES + TAG_WIDTH + `INST_FPU_BITS + `INST_FMT_BITS + `INST_FRM_BITS + 3 * (NUM_LANES * 32);
|
||||
localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH;
|
||||
|
||||
`UNUSED_VAR (fmt)
|
||||
|
||||
wire [NUM_FPC-1:0] per_core_ready_in;
|
||||
wire [NUM_FPC-1:0][NUM_LANES-1:0][31:0] per_core_result;
|
||||
wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out;
|
||||
wire [NUM_FPC-1:0] per_core_ready_out;
|
||||
wire [NUM_FPC-1:0] per_core_valid_out;
|
||||
wire [NUM_FPC-1:0] per_core_has_fflags;
|
||||
fflags_t [NUM_FPC-1:0] per_core_fflags;
|
||||
wire [NUM_FPCORES-1:0] per_core_valid_in;
|
||||
wire [NUM_FPCORES-1:0][REQ_DATAW-1:0] per_core_data_in;
|
||||
wire [NUM_FPCORES-1:0] per_core_ready_in;
|
||||
|
||||
wire div_ready_in, sqrt_ready_in;
|
||||
wire [NUM_LANES-1:0][31:0] div_result, sqrt_result;
|
||||
wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out;
|
||||
wire div_ready_out, sqrt_ready_out;
|
||||
wire div_valid_out, sqrt_valid_out;
|
||||
wire div_has_fflags, sqrt_has_fflags;
|
||||
fflags_t div_fflags, sqrt_fflags;
|
||||
wire [NUM_FPCORES-1:0][NUM_LANES-1:0] per_core_mask_in;
|
||||
wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_in;
|
||||
wire [NUM_FPCORES-1:0][`INST_FPU_BITS-1:0] per_core_op_type;
|
||||
wire [NUM_FPCORES-1:0][`INST_FMT_BITS-1:0] per_core_fmt;
|
||||
wire [NUM_FPCORES-1:0][`INST_FRM_BITS-1:0] per_core_frm;
|
||||
wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_dataa;
|
||||
wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datab;
|
||||
wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datac;
|
||||
|
||||
reg [FPC_BITS-1:0] core_select;
|
||||
reg is_madd, is_sub, is_neg, is_div, is_itof, is_signed;
|
||||
|
||||
always @(*) begin
|
||||
is_madd = 0;
|
||||
is_sub = 0;
|
||||
is_neg = 0;
|
||||
is_div = 0;
|
||||
is_itof = 0;
|
||||
is_signed = 0;
|
||||
case (op_type)
|
||||
`INST_FPU_ADD: begin core_select = FPU_FMA; end
|
||||
`INST_FPU_SUB: begin core_select = FPU_FMA; is_sub = 1; end
|
||||
`INST_FPU_MUL: begin core_select = FPU_FMA; is_neg = 1; end
|
||||
`INST_FPU_MADD: begin core_select = FPU_FMA; is_madd = 1; end
|
||||
`INST_FPU_MSUB: begin core_select = FPU_FMA; is_madd = 1; is_sub = 1; end
|
||||
`INST_FPU_NMADD: begin core_select = FPU_FMA; is_madd = 1; is_neg = 1; end
|
||||
`INST_FPU_NMSUB: begin core_select = FPU_FMA; is_madd = 1; is_sub = 1; is_neg = 1; end
|
||||
`INST_FPU_DIV: begin core_select = FPU_DIVSQRT; is_div = 1; end
|
||||
`INST_FPU_SQRT: begin core_select = FPU_DIVSQRT; end
|
||||
`INST_FPU_F2I: begin core_select = FPU_CVT; is_signed = 1; end
|
||||
`INST_FPU_F2U: begin core_select = FPU_CVT; end
|
||||
`INST_FPU_I2F: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end
|
||||
`INST_FPU_U2F: begin core_select = FPU_CVT; is_itof = 1; end
|
||||
default: begin core_select = FPU_NCP; end
|
||||
endcase
|
||||
end
|
||||
|
||||
`RESET_RELAY (fma_reset, reset);
|
||||
`RESET_RELAY (div_reset, reset);
|
||||
`RESET_RELAY (sqrt_reset, reset);
|
||||
`RESET_RELAY (cvt_reset, reset);
|
||||
`RESET_RELAY (ncp_reset, reset);
|
||||
wire [NUM_FPCORES-1:0] per_core_valid_out;
|
||||
wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_result;
|
||||
wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_out;
|
||||
wire [NUM_FPCORES-1:0] per_core_has_fflags;
|
||||
fflags_t [NUM_FPCORES-1:0] per_core_fflags;
|
||||
wire [NUM_FPCORES-1:0] per_core_ready_out;
|
||||
|
||||
wire [NUM_LANES-1:0][31:0] dataa_s;
|
||||
wire [NUM_LANES-1:0][31:0] datab_s;
|
||||
wire [NUM_LANES-1:0][31:0] datac_s;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data
|
||||
assign dataa_s[i] = dataa[i][31:0];
|
||||
assign datab_s[i] = datab[i][31:0];
|
||||
assign datac_s[i] = datac[i][31:0];
|
||||
|
@ -122,23 +93,60 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
`UNUSED_VAR (datab)
|
||||
`UNUSED_VAR (datac)
|
||||
|
||||
// Decode fpu core type
|
||||
wire [FPCORES_BITS-1:0] core_select = op_type[3:2];
|
||||
|
||||
VX_stream_switch #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.NUM_OUTPUTS (NUM_FPCORES)
|
||||
) req_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel_in (core_select),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.data_in ({mask_in, tag_in, fmt, frm, dataa_s, datab_s, datac_s, op_type}),
|
||||
.data_out (per_core_data_in),
|
||||
.valid_out (per_core_valid_in),
|
||||
.ready_out (per_core_ready_in)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_FPCORES; ++i) begin : g_per_core_data_in
|
||||
assign {
|
||||
per_core_mask_in[i],
|
||||
per_core_tag_in[i],
|
||||
per_core_fmt[i],
|
||||
per_core_frm[i],
|
||||
per_core_dataa[i],
|
||||
per_core_datab[i],
|
||||
per_core_datac[i],
|
||||
per_core_op_type[i]
|
||||
} = per_core_data_in[i];
|
||||
end
|
||||
|
||||
// FMA core ///////////////////////////////////////////////////////////////
|
||||
|
||||
wire is_madd = per_core_op_type[FPU_FMA][1];
|
||||
wire is_neg = per_core_op_type[FPU_FMA][0];
|
||||
wire is_sub = per_core_fmt[FPU_FMA][1];
|
||||
|
||||
VX_fpu_fma #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) fpu_fma (
|
||||
.clk (clk),
|
||||
.reset (fma_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_FMA)),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_valid_in[FPU_FMA]),
|
||||
.ready_in (per_core_ready_in[FPU_FMA]),
|
||||
.mask_in (mask_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.mask_in (per_core_mask_in[FPU_FMA]),
|
||||
.tag_in (per_core_tag_in[FPU_FMA]),
|
||||
.frm (per_core_frm[FPU_FMA]),
|
||||
.is_madd (is_madd),
|
||||
.is_sub (is_sub),
|
||||
.is_neg (is_neg),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.datac (datac_s),
|
||||
.dataa (per_core_dataa[FPU_FMA]),
|
||||
.datab (per_core_datab[FPU_FMA]),
|
||||
.datac (per_core_datac[FPU_FMA]),
|
||||
.has_fflags (per_core_has_fflags[FPU_FMA]),
|
||||
.fflags (per_core_fflags[FPU_FMA]),
|
||||
.result (per_core_result[FPU_FMA]),
|
||||
|
@ -147,25 +155,99 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
.valid_out (per_core_valid_out[FPU_FMA])
|
||||
);
|
||||
|
||||
// Div/Sqrt cores /////////////////////////////////////////////////////////
|
||||
|
||||
wire [1:0] div_sqrt_valid_in;
|
||||
wire [1:0][REQ_DATAW-1:0] div_sqrt_data_in;
|
||||
wire [1:0] div_sqrt_ready_in;
|
||||
|
||||
wire [1:0][NUM_LANES-1:0] div_sqrt_mask_in;
|
||||
wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_in;
|
||||
wire [1:0][`INST_FPU_BITS-1:0] div_sqrt_op_type;
|
||||
wire [1:0][`INST_FMT_BITS-1:0] div_sqrt_fmt;
|
||||
wire [1:0][`INST_FRM_BITS-1:0] div_sqrt_frm;
|
||||
wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_dataa;
|
||||
wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_datab;
|
||||
wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_datac;
|
||||
|
||||
wire [1:0] div_sqrt_valid_out;
|
||||
wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_result;
|
||||
wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out;
|
||||
wire [1:0] div_sqrt_has_fflags;
|
||||
fflags_t [1:0] div_sqrt_fflags;
|
||||
wire [1:0] div_sqrt_ready_out;
|
||||
|
||||
wire div_sqrt_valid_tmp_in;
|
||||
wire [REQ_DATAW-1:0] div_sqrt_data_tmp_in;
|
||||
wire div_sqrt_ready_tmp_in;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) div_sqrt_req_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_valid_in[FPU_DIVSQRT]),
|
||||
.ready_in (per_core_ready_in[FPU_DIVSQRT]),
|
||||
.data_in (per_core_data_in[FPU_DIVSQRT]),
|
||||
.data_out (div_sqrt_data_tmp_in),
|
||||
.valid_out (div_sqrt_valid_tmp_in),
|
||||
.ready_out (div_sqrt_ready_tmp_in)
|
||||
);
|
||||
|
||||
wire is_sqrt = div_sqrt_data_tmp_in[0]; // op_type[0]
|
||||
|
||||
VX_stream_switch #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.NUM_OUTPUTS (2)
|
||||
) div_sqrt_req_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel_in (is_sqrt),
|
||||
.valid_in (div_sqrt_valid_tmp_in),
|
||||
.ready_in (div_sqrt_ready_tmp_in),
|
||||
.data_in (div_sqrt_data_tmp_in),
|
||||
.data_out (div_sqrt_data_in),
|
||||
.valid_out (div_sqrt_valid_in),
|
||||
.ready_out (div_sqrt_ready_in)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < 2; ++i) begin : g_div_sqrt_data_in
|
||||
assign {
|
||||
div_sqrt_mask_in[i],
|
||||
div_sqrt_tag_in[i],
|
||||
div_sqrt_fmt[i],
|
||||
div_sqrt_frm[i],
|
||||
div_sqrt_dataa[i],
|
||||
div_sqrt_datab[i],
|
||||
div_sqrt_datac[i],
|
||||
div_sqrt_op_type[i]
|
||||
} = div_sqrt_data_in[i];
|
||||
end
|
||||
|
||||
`UNUSED_VAR (div_sqrt_op_type)
|
||||
`UNUSED_VAR (div_sqrt_fmt)
|
||||
`UNUSED_VAR (div_sqrt_datab)
|
||||
`UNUSED_VAR (div_sqrt_datac)
|
||||
|
||||
VX_fpu_div #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) fpu_div (
|
||||
.clk (clk),
|
||||
.reset (div_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_DIVSQRT) && is_div),
|
||||
.ready_in (div_ready_in),
|
||||
.mask_in (mask_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.has_fflags (div_has_fflags),
|
||||
.fflags (div_fflags),
|
||||
.result (div_result),
|
||||
.tag_out (div_tag_out),
|
||||
.valid_out (div_valid_out),
|
||||
.ready_out (div_ready_out)
|
||||
.reset (reset),
|
||||
.valid_in (div_sqrt_valid_in[0]),
|
||||
.ready_in (div_sqrt_ready_in[0]),
|
||||
.mask_in (div_sqrt_mask_in[0]),
|
||||
.tag_in (div_sqrt_tag_in[0]),
|
||||
.frm (div_sqrt_frm[0]),
|
||||
.dataa (div_sqrt_dataa[0]),
|
||||
.datab (div_sqrt_datab[0]),
|
||||
.has_fflags (div_sqrt_has_fflags[0]),
|
||||
.fflags (div_sqrt_fflags[0]),
|
||||
.result (div_sqrt_result[0]),
|
||||
.tag_out (div_sqrt_tag_out[0]),
|
||||
.valid_out (div_sqrt_valid_out[0]),
|
||||
.ready_out (div_sqrt_ready_out[0])
|
||||
);
|
||||
|
||||
VX_fpu_sqrt #(
|
||||
|
@ -173,92 +255,42 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) fpu_sqrt (
|
||||
.clk (clk),
|
||||
.reset (sqrt_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_DIVSQRT) && ~is_div),
|
||||
.ready_in (sqrt_ready_in),
|
||||
.mask_in (mask_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.has_fflags (sqrt_has_fflags),
|
||||
.fflags (sqrt_fflags),
|
||||
.result (sqrt_result),
|
||||
.tag_out (sqrt_tag_out),
|
||||
.valid_out (sqrt_valid_out),
|
||||
.ready_out (sqrt_ready_out)
|
||||
.reset (reset),
|
||||
.valid_in (div_sqrt_valid_in[1]),
|
||||
.ready_in (div_sqrt_ready_in[1]),
|
||||
.mask_in (div_sqrt_mask_in[1]),
|
||||
.tag_in (div_sqrt_tag_in[1]),
|
||||
.frm (div_sqrt_frm[1]),
|
||||
.dataa (div_sqrt_dataa[1]),
|
||||
.has_fflags (div_sqrt_has_fflags[1]),
|
||||
.fflags (div_sqrt_fflags[1]),
|
||||
.result (div_sqrt_result[1]),
|
||||
.tag_out (div_sqrt_tag_out[1]),
|
||||
.valid_out (div_sqrt_valid_out[1]),
|
||||
.ready_out (div_sqrt_ready_out[1])
|
||||
);
|
||||
|
||||
wire cvt_ret_int_in = ~is_itof;
|
||||
wire cvt_ret_int_out;
|
||||
|
||||
VX_fpu_cvt #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH+1)
|
||||
) fpu_cvt (
|
||||
.clk (clk),
|
||||
.reset (cvt_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_CVT)),
|
||||
.ready_in (per_core_ready_in[FPU_CVT]),
|
||||
.mask_in (mask_in),
|
||||
.tag_in ({cvt_ret_int_in, tag_in}),
|
||||
.frm (frm),
|
||||
.is_itof (is_itof),
|
||||
.is_signed (is_signed),
|
||||
.dataa (dataa_s),
|
||||
.has_fflags (per_core_has_fflags[FPU_CVT]),
|
||||
.fflags (per_core_fflags[FPU_CVT]),
|
||||
.result (per_core_result[FPU_CVT]),
|
||||
.tag_out ({cvt_ret_int_out, per_core_tag_out[FPU_CVT]}),
|
||||
.valid_out (per_core_valid_out[FPU_CVT]),
|
||||
.ready_out (per_core_ready_out[FPU_CVT])
|
||||
);
|
||||
|
||||
wire ncp_ret_int_in = (op_type == `INST_FPU_CMP)
|
||||
|| `INST_FPU_IS_CLASS(op_type, frm)
|
||||
|| `INST_FPU_IS_MVXW(op_type, frm);
|
||||
wire ncp_ret_int_out;
|
||||
|
||||
wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(op_type, frm);
|
||||
wire ncp_ret_sext_out;
|
||||
|
||||
VX_fpu_ncp #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH+2)
|
||||
) fpu_ncp (
|
||||
.clk (clk),
|
||||
.reset (ncp_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_NCP)),
|
||||
.ready_in (per_core_ready_in[FPU_NCP]),
|
||||
.mask_in (mask_in),
|
||||
.tag_in ({ncp_ret_sext_in, ncp_ret_int_in, tag_in}),
|
||||
.op_type (op_type),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.result (per_core_result[FPU_NCP]),
|
||||
.has_fflags (per_core_has_fflags[FPU_NCP]),
|
||||
.fflags (per_core_fflags[FPU_NCP]),
|
||||
.tag_out ({ncp_ret_sext_out, ncp_ret_int_out, per_core_tag_out[FPU_NCP]}),
|
||||
.valid_out (per_core_valid_out[FPU_NCP]),
|
||||
.ready_out (per_core_ready_out[FPU_NCP])
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign per_core_ready_in[FPU_DIVSQRT] = is_div ? div_ready_in : sqrt_ready_in;
|
||||
wire [1:0][RSP_DATAW-1:0] div_sqrt_arb_data_in;
|
||||
for (genvar i = 0; i < 2; ++i) begin : g_div_sqrt_arb_data_in
|
||||
assign div_sqrt_arb_data_in[i] = {
|
||||
div_sqrt_result[i],
|
||||
div_sqrt_has_fflags[i],
|
||||
div_sqrt_fflags[i],
|
||||
div_sqrt_tag_out[i]
|
||||
};
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.ARBITER ("P"),
|
||||
.OUT_BUF (0)
|
||||
) div_sqrt_arb (
|
||||
) div_sqrt_rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in ({sqrt_valid_out, div_valid_out}),
|
||||
.ready_in ({sqrt_ready_out, div_ready_out}),
|
||||
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
|
||||
{div_result, div_has_fflags, div_fflags, div_tag_out}}),
|
||||
.valid_in (div_sqrt_valid_out),
|
||||
.ready_in (div_sqrt_ready_out),
|
||||
.data_in (div_sqrt_arb_data_in),
|
||||
.data_out ({
|
||||
per_core_result[FPU_DIVSQRT],
|
||||
per_core_has_fflags[FPU_DIVSQRT],
|
||||
|
@ -270,12 +302,73 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
// CVT core ///////////////////////////////////////////////////////////////
|
||||
|
||||
wire is_itof = per_core_op_type[FPU_CVT][1];
|
||||
wire is_signed = ~per_core_op_type[FPU_CVT][0];
|
||||
wire cvt_ret_int_in = ~is_itof;
|
||||
wire cvt_ret_int_out;
|
||||
|
||||
VX_fpu_cvt #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (1+TAG_WIDTH)
|
||||
) fpu_cvt (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_valid_in[FPU_CVT]),
|
||||
.ready_in (per_core_ready_in[FPU_CVT]),
|
||||
.mask_in (per_core_mask_in[FPU_CVT]),
|
||||
.tag_in ({cvt_ret_int_in, per_core_tag_in[FPU_CVT]}),
|
||||
.frm (per_core_frm[FPU_CVT]),
|
||||
.is_itof (is_itof),
|
||||
.is_signed (is_signed),
|
||||
.dataa (per_core_dataa[FPU_CVT]),
|
||||
.has_fflags (per_core_has_fflags[FPU_CVT]),
|
||||
.fflags (per_core_fflags[FPU_CVT]),
|
||||
.result (per_core_result[FPU_CVT]),
|
||||
.tag_out ({cvt_ret_int_out, per_core_tag_out[FPU_CVT]}),
|
||||
.valid_out (per_core_valid_out[FPU_CVT]),
|
||||
.ready_out (per_core_ready_out[FPU_CVT])
|
||||
);
|
||||
|
||||
// NCP core ///////////////////////////////////////////////////////////////
|
||||
|
||||
wire ncp_ret_int_in = (per_core_op_type[FPU_NCP] == `INST_FPU_CMP)
|
||||
|| `INST_FPU_IS_CLASS(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP])
|
||||
|| `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]);
|
||||
wire ncp_ret_int_out;
|
||||
|
||||
wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]);
|
||||
wire ncp_ret_sext_out;
|
||||
|
||||
VX_fpu_ncp #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH+2)
|
||||
) fpu_ncp (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_valid_in[FPU_NCP]),
|
||||
.ready_in (per_core_ready_in[FPU_NCP]),
|
||||
.mask_in (per_core_mask_in[FPU_NCP]),
|
||||
.tag_in ({ncp_ret_sext_in, ncp_ret_int_in, per_core_tag_in[FPU_NCP]}),
|
||||
.op_type (per_core_op_type[FPU_NCP]),
|
||||
.frm (per_core_frm[FPU_NCP]),
|
||||
.dataa (per_core_dataa[FPU_NCP]),
|
||||
.datab (per_core_datab[FPU_NCP]),
|
||||
.result (per_core_result[FPU_NCP]),
|
||||
.has_fflags (per_core_has_fflags[FPU_NCP]),
|
||||
.fflags (per_core_fflags[FPU_NCP]),
|
||||
.tag_out ({ncp_ret_sext_out, ncp_ret_int_out, per_core_tag_out[FPU_NCP]}),
|
||||
.valid_out (per_core_valid_out[FPU_NCP]),
|
||||
.ready_out (per_core_ready_out[FPU_NCP])
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
reg [NUM_FPC-1:0][RSP_DATAW+2-1:0] per_core_data_out;
|
||||
reg [NUM_FPCORES-1:0][RSP_DATAW+2-1:0] per_core_data_out;
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_FPC; ++i) begin
|
||||
for (integer i = 0; i < NUM_FPCORES; ++i) begin
|
||||
per_core_data_out[i][RSP_DATAW+1:2] = {
|
||||
per_core_result[i],
|
||||
per_core_has_fflags[i],
|
||||
|
@ -289,12 +382,12 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
end
|
||||
|
||||
wire [NUM_LANES-1:0][31:0] result_s;
|
||||
|
||||
|
||||
wire [1:0] op_ret_int_out;
|
||||
`UNUSED_VAR (op_ret_int_out)
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_FPC),
|
||||
.NUM_INPUTS (NUM_FPCORES),
|
||||
.DATAW (RSP_DATAW + 2),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
|
@ -310,25 +403,22 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result
|
||||
`ifdef FPU_RV64F
|
||||
reg [`XLEN-1:0] result_r;
|
||||
reg [`XLEN-1:0] result_w;
|
||||
always @(*) begin
|
||||
case (op_ret_int_out)
|
||||
2'b11: result_r = `XLEN'($signed(result_s[i]));
|
||||
2'b01: result_r = {32'h00000000, result_s[i]};
|
||||
default: result_r = {32'hffffffff, result_s[i]};
|
||||
2'b11: result_w = `XLEN'($signed(result_s[i]));
|
||||
2'b01: result_w = {32'h00000000, result_s[i]};
|
||||
default: result_w = {32'hffffffff, result_s[i]};
|
||||
endcase
|
||||
end
|
||||
assign result[i] = result_r;
|
||||
assign result[i] = result_w;
|
||||
`else
|
||||
assign result[i] = result_s[i];
|
||||
`endif
|
||||
end
|
||||
|
||||
// can accept new request?
|
||||
assign ready_in = per_core_ready_in[core_select];
|
||||
|
||||
endmodule
|
||||
|
||||
`endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,7 +21,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
parameter TAG_WIDTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
@ -29,7 +29,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0] mask_in,
|
||||
|
||||
input wire [TAG_WIDTH-1:0] tag_in,
|
||||
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire is_madd,
|
||||
|
@ -39,7 +39,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
input wire [NUM_LANES-1:0][31:0] datac,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -49,26 +49,27 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
localparam DATAW = 3 * 32 + `INST_FRM_BITS;
|
||||
|
||||
wire [NUM_LANES-1:0][3*32-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][DATAW-1:0] data_in;
|
||||
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][3*32-1:0] pe_data_in;
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
reg [NUM_LANES-1:0][31:0] a, b, c;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_select
|
||||
always @(*) begin
|
||||
if (is_madd) begin
|
||||
// MADD / MSUB / NMADD / NMSUB
|
||||
a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
|
||||
a[i] = {is_neg ^ dataa[i][31], dataa[i][30:0]};
|
||||
b[i] = datab[i];
|
||||
c[i] = (is_neg ^ is_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i];
|
||||
c[i] = {is_neg ^ is_sub ^ datac[i][31], datac[i][30:0]};
|
||||
end else begin
|
||||
if (is_neg) begin
|
||||
// MUL
|
||||
|
@ -77,28 +78,30 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
c[i] = '0;
|
||||
end else begin
|
||||
// ADD / SUB
|
||||
a[i] = 32'h3f800000; // 1.0f
|
||||
b[i] = dataa[i];
|
||||
c[i] = is_sub ? {~datab[i][31], datab[i][30:0]} : datab[i];
|
||||
a[i] = dataa[i];
|
||||
b[i] = 32'h3f800000; // 1.0f
|
||||
c[i] = {is_sub ^ datab[i][31], datab[i][30:0]};
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in
|
||||
assign data_in[i][0 +: 32] = a[i];
|
||||
assign data_in[i][32 +: 32] = b[i];
|
||||
assign data_in[i][64 +: 32] = c[i];
|
||||
assign data_in[i][96 +: `INST_FRM_BITS] = frm;
|
||||
end
|
||||
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FMA),
|
||||
.DATA_IN_WIDTH(3*32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.DATA_IN_WIDTH (DATAW),
|
||||
.DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (1)
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (2)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -107,15 +110,17 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
.tag_in ({mask_in, tag_in}),
|
||||
.ready_in (ready_in),
|
||||
.pe_enable (pe_enable),
|
||||
.pe_data_in (pe_data_in),
|
||||
.pe_data_out(pe_data_out),
|
||||
.pe_data_out(pe_data_in),
|
||||
.pe_data_in (pe_data_out),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.tag_out ({mask_out, tag_out}),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`UNUSED_VAR (pe_data_in)
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result
|
||||
assign result[i] = data_out[i][0 +: 32];
|
||||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
end
|
||||
|
@ -123,8 +128,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
fflags_t [NUM_LANES-1:0] per_lane_fflags;
|
||||
|
||||
`ifdef QUARTUS
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas
|
||||
acl_fmadd fmadd (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
@ -136,15 +141,15 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
);
|
||||
assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x;
|
||||
end
|
||||
|
||||
|
||||
assign has_fflags = 0;
|
||||
assign per_lane_fflags = 'x;
|
||||
|
||||
`elsif VIVADO
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas
|
||||
wire [2:0] tuser;
|
||||
|
||||
|
||||
xil_fma fma (
|
||||
.aclk (clk),
|
||||
.aclken (pe_enable),
|
||||
|
@ -167,20 +172,20 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas
|
||||
reg [63:0] r;
|
||||
`UNUSED_VAR (r)
|
||||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
dpi_fmadd (
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][64 +: 32]},
|
||||
frm,
|
||||
r,
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]}, // a
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]}, // b
|
||||
{32'hffffffff, pe_data_in[i][64 +: 32]}, // c
|
||||
pe_data_in[0][96 +: `INST_FRM_BITS], // frm
|
||||
r,
|
||||
f
|
||||
);
|
||||
end
|
||||
|
|
|
@ -90,7 +90,7 @@ module VX_fpu_fpnew
|
|||
|
||||
reg [TAG_WIDTH-1:0] fpu_tag_in, fpu_tag_out;
|
||||
|
||||
reg [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands;
|
||||
logic [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands;
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] fpu_result;
|
||||
fpnew_pkg::status_t fpu_status;
|
||||
|
@ -105,7 +105,7 @@ module VX_fpu_fpnew
|
|||
`UNUSED_VAR (fmt)
|
||||
|
||||
always @(*) begin
|
||||
fpu_op = 'x;
|
||||
fpu_op = fpnew_pkg::operation_e'('x);
|
||||
fpu_rnd = frm;
|
||||
fpu_op_mod = 0;
|
||||
fpu_has_fflags = 1;
|
||||
|
@ -134,20 +134,13 @@ module VX_fpu_fpnew
|
|||
fpu_op = fpnew_pkg::ADD;
|
||||
fpu_operands[1] = dataa;
|
||||
fpu_operands[2] = datab;
|
||||
end
|
||||
`INST_FPU_SUB: begin
|
||||
fpu_op = fpnew_pkg::ADD;
|
||||
fpu_operands[1] = dataa;
|
||||
fpu_operands[2] = datab;
|
||||
fpu_op_mod = 1;
|
||||
fpu_op_mod = fmt[1]; // FADD or FSUB
|
||||
end
|
||||
`INST_FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
|
||||
`INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = fmt[1]; end
|
||||
`INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = ~fmt[1]; end
|
||||
`INST_FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
|
||||
`INST_FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
|
||||
`INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
|
||||
`INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
|
||||
`INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
|
||||
`INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
|
||||
`ifdef FLEN_64
|
||||
`INST_FPU_F2F: begin fpu_op = fpnew_pkg::F2F; fpu_src_fmt = fmt[0] ? fpnew_pkg::FP32 : fpnew_pkg::FP64; end
|
||||
`endif
|
||||
|
@ -169,7 +162,7 @@ module VX_fpu_fpnew
|
|||
end
|
||||
|
||||
`UNUSED_VAR (mask_in)
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_fpnew_coreses
|
||||
wire [(TAG_WIDTH+1)-1:0] fpu_tag;
|
||||
wire fpu_valid_out_uq;
|
||||
wire fpu_ready_in_uq;
|
||||
|
@ -183,8 +176,7 @@ module VX_fpu_fpnew
|
|||
.Features (FPU_FEATURES),
|
||||
.Implementation (FPU_IMPLEMENTATION),
|
||||
.TagType (logic[(TAG_WIDTH+1)-1:0]),
|
||||
.TrueSIMDClass (1),
|
||||
.EnableSIMDMask (1)
|
||||
.DivSqrtSel (fpnew_pkg::PULP)
|
||||
) fpnew_core (
|
||||
.clk_i (clk),
|
||||
.rst_ni (~reset),
|
||||
|
@ -196,11 +188,11 @@ module VX_fpu_fpnew
|
|||
.dst_fmt_i (fpu_dst_fmt),
|
||||
.int_fmt_i (fpu_int_fmt),
|
||||
.vectorial_op_i (1'b0),
|
||||
.simd_mask_i (mask_in[i]),
|
||||
.simd_mask_i (1'b1),
|
||||
.tag_i ({fpu_tag_in, fpu_has_fflags}),
|
||||
.in_valid_i (fpu_valid_in),
|
||||
.in_ready_o (fpu_ready_in_uq),
|
||||
.flush_i (reset),
|
||||
.flush_i (1'b0),
|
||||
.result_o (fpu_result[i]),
|
||||
.status_o (fpu_status_uq),
|
||||
.tag_o (fpu_tag),
|
||||
|
@ -209,7 +201,7 @@ module VX_fpu_fpnew
|
|||
`UNUSED_PIN (busy_o)
|
||||
);
|
||||
|
||||
if (i == 0) begin
|
||||
if (i == 0) begin : g_output_0
|
||||
assign {fpu_tag_out, fpu_has_fflags_out} = fpu_tag;
|
||||
assign fpu_valid_out = fpu_valid_out_uq;
|
||||
assign fpu_ready_in = fpu_ready_in_uq;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -35,7 +35,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -44,31 +44,35 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
);
|
||||
localparam DATAW = 2 * 32 + `INST_FRM_BITS + `INST_FPU_BITS;
|
||||
|
||||
wire [NUM_LANES-1:0][2*32-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][DATAW-1:0] data_in;
|
||||
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
fflags_t [NUM_LANES-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in
|
||||
assign data_in[i][0 +: 32] = dataa[i];
|
||||
assign data_in[i][32 +: 32] = datab[i];
|
||||
assign data_in[i][64 +: `INST_FRM_BITS] = frm;
|
||||
assign data_in[i][64 + `INST_FRM_BITS +: `INST_FPU_BITS] = op_type;
|
||||
end
|
||||
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FNCP),
|
||||
.DATA_IN_WIDTH(2*32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.DATA_IN_WIDTH (DATAW),
|
||||
.DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0)
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (2)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -77,28 +81,31 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
.tag_in ({mask_in, tag_in}),
|
||||
.ready_in (ready_in),
|
||||
.pe_enable (pe_enable),
|
||||
.pe_data_in (pe_data_in),
|
||||
.pe_data_out(pe_data_out),
|
||||
.pe_data_out(pe_data_in),
|
||||
.pe_data_in (pe_data_out),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.tag_out ({mask_out, tag_out}),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`UNUSED_VAR (pe_data_in)
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result
|
||||
assign result[i] = data_out[i][0 +: 32];
|
||||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin : g_fncp_units
|
||||
VX_fncp_unit #(
|
||||
.LATENCY (`LATENCY_FNCP)
|
||||
.LATENCY (`LATENCY_FNCP),
|
||||
.OUT_REG (1)
|
||||
) fncp_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (pe_enable),
|
||||
.frm (frm),
|
||||
.op_type (op_type),
|
||||
.frm (pe_data_in[0][64 +: `INST_FRM_BITS]),
|
||||
.op_type (pe_data_in[0][64 + `INST_FRM_BITS +: `INST_FPU_BITS]),
|
||||
.dataa (pe_data_in[i][0 +: 32]),
|
||||
.datab (pe_data_in[i][32 +: 32]),
|
||||
.result (pe_data_out[i][0 +: 32]),
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue