mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'master' into master
This commit is contained in:
commit
aad3b26332
1631 changed files with 1923326 additions and 186050 deletions
|
@ -1,3 +0,0 @@
|
|||
ignore:
|
||||
- "./examples/*"
|
||||
- "./tests/*"
|
176
.github/workflows/ci.yml
vendored
Normal file
176
.github/workflows/ci.yml
vendored
Normal file
|
@ -0,0 +1,176 @@
|
|||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: CI
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
runs-on: ubuntu-20.04
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-toolchain-
|
||||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Install Dependencies
|
||||
if: steps.cache-toolchain.outputs.cache-hit != 'true' || steps.cache-thirdparty.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
sudo bash ./ci/system_updates.sh
|
||||
|
||||
- name: Setup Toolchain
|
||||
if: steps.cache-toolchain.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
TOOLDIR=$PWD/tools
|
||||
mkdir -p build
|
||||
cd build
|
||||
../configure --tooldir=$TOOLDIR
|
||||
ci/toolchain_install.sh --all
|
||||
|
||||
- name: Setup Third Party
|
||||
if: steps.cache-thirdparty.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make -C third_party > /dev/null
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: setup
|
||||
strategy:
|
||||
matrix:
|
||||
xlen: [32, 64]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo bash ./ci/system_updates.sh
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-toolchain-
|
||||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Run Build
|
||||
run: |
|
||||
TOOLDIR=$PWD/tools
|
||||
mkdir -p build${{ matrix.xlen }}
|
||||
cd build${{ matrix.xlen }}
|
||||
../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }}
|
||||
source ci/toolchain_env.sh
|
||||
make software -s > /dev/null
|
||||
make tests -s > /dev/null
|
||||
|
||||
- name: Upload Build Artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: build-${{ matrix.xlen }}
|
||||
path: build${{ matrix.xlen }}
|
||||
|
||||
tests:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: build
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
name: [regression, opencl, cache, config1, config2, debug, stress]
|
||||
xlen: [32, 64]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo bash ./ci/system_updates.sh
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-toolchain-
|
||||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Download Build Artifact
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: build-${{ matrix.xlen }}
|
||||
path: build${{ matrix.xlen }}
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
cd build${{ matrix.xlen }}
|
||||
source ci/toolchain_env.sh
|
||||
chmod -R +x . # Ensure all files have executable permissions
|
||||
if [ "${{ matrix.name }}" == "regression" ]; then
|
||||
./ci/regression.sh --unittest
|
||||
./ci/regression.sh --isa
|
||||
./ci/regression.sh --kernel
|
||||
./ci/regression.sh --synthesis
|
||||
./ci/regression.sh --regression
|
||||
else
|
||||
./ci/regression.sh --${{ matrix.name }}
|
||||
fi
|
||||
|
||||
complete:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: tests
|
||||
|
||||
steps:
|
||||
- name: Check Completion
|
||||
run: echo "All matrix jobs passed"
|
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
/build*
|
||||
/.vscode
|
||||
*.cache
|
2
.gitmodules
vendored
2
.gitmodules
vendored
|
@ -6,4 +6,4 @@
|
|||
url = https://github.com/ucb-bar/berkeley-softfloat-3.git
|
||||
[submodule "third_party/ramulator"]
|
||||
path = third_party/ramulator
|
||||
url = https://github.com/CMU-SAFARI/ramulator.git
|
||||
url = https://github.com/CMU-SAFARI/ramulator2.git
|
||||
|
|
90
.travis.yml
90
.travis.yml
|
@ -1,90 +0,0 @@
|
|||
language: cpp
|
||||
dist: focal
|
||||
os: linux
|
||||
compiler: gcc
|
||||
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- build-essential
|
||||
- valgrind
|
||||
- libstdc++6
|
||||
|
||||
env:
|
||||
global:
|
||||
- TOOLDIR=$HOME/tools
|
||||
|
||||
cache:
|
||||
directories:
|
||||
- $TOOLDIR
|
||||
- $HOME/build32
|
||||
- $HOME/build64
|
||||
|
||||
before_install:
|
||||
- if [ ! -d "$TOOLDIR" ] || [ -z "$(ls -A $TOOLDIR)" ]; then
|
||||
mkdir -p $TOOLDIR;
|
||||
OSDIR=ubuntu/focal ./ci/toolchain_install.sh --all;
|
||||
fi
|
||||
- source ./ci/toolchain_env.sh
|
||||
|
||||
stages:
|
||||
- setup
|
||||
- test
|
||||
|
||||
jobs:
|
||||
include:
|
||||
- stage: setup
|
||||
script:
|
||||
- rm -rf $HOME/build32 && cp -r $PWD $HOME/build32
|
||||
- rm -rf $HOME/build64 && cp -r $PWD $HOME/build64
|
||||
- make -C $HOME/build32
|
||||
- XLEN=64 make -C $HOME/build64
|
||||
- stage: test
|
||||
name: unittest
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --unittest
|
||||
- stage: test
|
||||
name: isa
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --isa
|
||||
- stage: test
|
||||
name: isa64
|
||||
script: cp -r $HOME/build64 build && cd build && XLEN=64 ./ci/travis_run.py ./ci/regression.sh --isa
|
||||
- stage: test
|
||||
name: regression
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
- stage: test
|
||||
name: regression64
|
||||
script: cp -r $HOME/build64 build && cd build && XLEN=64 ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
- stage: test
|
||||
name: opencl
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --opencl
|
||||
- stage: test
|
||||
name: cluster
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --cluster
|
||||
- stage: test
|
||||
name: config
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --config
|
||||
- stage: test
|
||||
name: debug
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --debug
|
||||
- stage: test
|
||||
name: stress0
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --stress0
|
||||
- stage: test
|
||||
name: stress1
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --stress1
|
||||
- stage: test
|
||||
name: synthesis
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --synthesis
|
||||
- stage: test
|
||||
name: synthesis64
|
||||
script: cp -r $HOME/build64 build && cd build && XLEN=64 ./ci/travis_run.py ./ci/regression.sh --synthesis
|
||||
|
||||
after_success:
|
||||
# Gather code coverage
|
||||
- lcov --directory runtime --capture --output-file runtime.cov # capture trace
|
||||
- lcov --directory sim --capture --output-file sim.cov # capture trace
|
||||
- lcov --list runtime.cov # output coverage data for debugging
|
||||
- lcov --list sim.cov # output coverage data for debugging
|
||||
# Upload coverage report
|
||||
- bash <(curl -s https://codecov.io/bash) -f runtime.cov
|
||||
- bash <(curl -s https://codecov.io/bash) -f sim.cov
|
28
Makefile
28
Makefile
|
@ -1,28 +0,0 @@
|
|||
all:
|
||||
$(MAKE) -C third_party
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C sim
|
||||
$(MAKE) -C kernel
|
||||
$(MAKE) -C runtime
|
||||
$(MAKE) -C tests
|
||||
|
||||
clean:
|
||||
$(MAKE) -C hw clean
|
||||
$(MAKE) -C sim clean
|
||||
$(MAKE) -C kernel clean
|
||||
$(MAKE) -C runtime clean
|
||||
$(MAKE) -C tests clean
|
||||
|
||||
clean-all:
|
||||
$(MAKE) -C third_party clean
|
||||
$(MAKE) -C hw clean
|
||||
$(MAKE) -C sim clean
|
||||
$(MAKE) -C kernel clean
|
||||
$(MAKE) -C runtime clean
|
||||
$(MAKE) -C tests clean-all
|
||||
|
||||
crtlsim:
|
||||
$(MAKE) -C sim clean
|
||||
|
||||
brtlsim:
|
||||
$(MAKE) -C sim
|
74
Makefile.in
Normal file
74
Makefile.in
Normal file
|
@ -0,0 +1,74 @@
|
|||
include config.mk
|
||||
|
||||
.PHONY: build software tests
|
||||
|
||||
all:
|
||||
$(MAKE) -C $(VORTEX_HOME)/third_party
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C sim
|
||||
$(MAKE) -C kernel
|
||||
$(MAKE) -C runtime
|
||||
$(MAKE) -C tests
|
||||
|
||||
build:
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C sim
|
||||
$(MAKE) -C kernel
|
||||
$(MAKE) -C runtime
|
||||
$(MAKE) -C tests
|
||||
|
||||
software:
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C kernel
|
||||
$(MAKE) -C runtime/stub
|
||||
|
||||
tests:
|
||||
$(MAKE) -C tests
|
||||
|
||||
clean-build:
|
||||
$(MAKE) -C hw clean
|
||||
$(MAKE) -C sim clean
|
||||
$(MAKE) -C kernel clean
|
||||
$(MAKE) -C runtime clean
|
||||
$(MAKE) -C tests clean
|
||||
|
||||
clean: clean-build
|
||||
$(MAKE) -C $(VORTEX_HOME)/third_party clean
|
||||
|
||||
# Install setup
|
||||
KERNEL_INC_DST = $(INSTALLDIR)/kernel/include
|
||||
KERNEL_LIB_DST = $(INSTALLDIR)/kernel/lib$(XLEN)
|
||||
RUNTIME_INC_DST = $(INSTALLDIR)/runtime/include
|
||||
RUNTIME_LIB_DST = $(INSTALLDIR)/runtime/lib
|
||||
|
||||
KERNEL_HEADERS = $(wildcard $(VORTEX_HOME)/kernel/include/*.h)
|
||||
KERNEL_LIBS = $(wildcard kernel/*.a)
|
||||
RUNTIME_HEADERS = $(wildcard $(VORTEX_HOME)/runtime/include/*.h)
|
||||
RUNTIME_LIBS = $(wildcard runtime/*.so)
|
||||
|
||||
INSTALL_DIRS = $(KERNEL_LIB_DST) $(RUNTIME_LIB_DST) $(KERNEL_INC_DST) $(RUNTIME_INC_DST)
|
||||
|
||||
$(INSTALL_DIRS):
|
||||
mkdir -p $@
|
||||
|
||||
$(KERNEL_INC_DST)/VX_types.h: hw/VX_types.h | $(KERNEL_INC_DST)
|
||||
cp $< $@
|
||||
|
||||
$(KERNEL_INC_DST)/%.h: $(VORTEX_HOME)/kernel/include/%.h | $(KERNEL_INC_DST)
|
||||
cp $< $@
|
||||
|
||||
$(RUNTIME_INC_DST)/%.h: $(VORTEX_HOME)/runtime/include/%.h | $(RUNTIME_INC_DST)
|
||||
cp $< $@
|
||||
|
||||
$(KERNEL_LIB_DST)/%.a: kernel/%.a | $(KERNEL_LIB_DST)
|
||||
cp $< $@
|
||||
|
||||
$(RUNTIME_LIB_DST)/%.so: runtime/%.so | $(RUNTIME_LIB_DST)
|
||||
cp $< $@
|
||||
|
||||
install: $(INSTALL_DIRS) \
|
||||
$(KERNEL_INC_DST)/VX_types.h \
|
||||
$(KERNEL_HEADERS:$(VORTEX_HOME)/kernel/include/%=$(KERNEL_INC_DST)/%) \
|
||||
$(RUNTIME_HEADERS:$(VORTEX_HOME)/runtime/include/%=$(RUNTIME_INC_DST)/%) \
|
||||
$(KERNEL_LIBS:kernel/%=$(KERNEL_LIB_DST)/%) \
|
||||
$(RUNTIME_LIBS:runtime/%=$(RUNTIME_LIB_DST)/%)
|
83
README.md
83
README.md
|
@ -1,6 +1,3 @@
|
|||
[](https://travis-ci.com/vortexgpgpu/vortex)
|
||||
[](https://codecov.io/gh/vortexgpgpu/vortex)
|
||||
|
||||
# Vortex GPGPU
|
||||
|
||||
Vortex is a full-stack open-source RISC-V GPGPU.
|
||||
|
@ -12,10 +9,10 @@ Vortex is a full-stack open-source RISC-V GPGPU.
|
|||
- configurable number of cores, warps, and threads.
|
||||
- configurable number of ALU, FPU, LSU, and SFU units per core.
|
||||
- configurable pipeline issue width.
|
||||
- optional shared memory, L1, L2, and L3 caches.
|
||||
- Software:
|
||||
- optional local memory, L1, L2, and L3 caches.
|
||||
- Software:
|
||||
- OpenCL 1.2 Support.
|
||||
- Supported FPGAs:
|
||||
- Supported FPGAs:
|
||||
- Altera Arria 10
|
||||
- Altera Stratix 10
|
||||
- Xilinx Alveo U50, U250, U280
|
||||
|
@ -33,8 +30,9 @@ Vortex is a full-stack open-source RISC-V GPGPU.
|
|||
- `miscs`: Miscellaneous resources.
|
||||
|
||||
## Build Instructions
|
||||
More detailed build instructions can be found [here](docs/install_vortex.md).
|
||||
### Supported OS Platforms
|
||||
- Ubuntu 18.04
|
||||
- Ubuntu 18.04, 20.04
|
||||
- Centos 7
|
||||
### Toolchain Dependencies
|
||||
- [POCL](http://portablecl.org/)
|
||||
|
@ -46,19 +44,64 @@ Vortex is a full-stack open-source RISC-V GPGPU.
|
|||
- [Ramulator](https://github.com/CMU-SAFARI/ramulator.git)
|
||||
- [Yosys](https://github.com/YosysHQ/yosys)
|
||||
- [Sv2v](https://github.com/zachjs/sv2v)
|
||||
### Install development tools
|
||||
$ sudo apt-get install build-essential
|
||||
$ sudo apt-get install git
|
||||
### Install development tools
|
||||
```sh
|
||||
sudo apt-get install build-essential
|
||||
sudo apt-get install binutils
|
||||
sudo apt-get install python
|
||||
sudo apt-get install uuid-dev
|
||||
sudo apt-get install git
|
||||
```
|
||||
### Install Vortex codebase
|
||||
$ git clone --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
$ cd Vortex
|
||||
```sh
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
cd vortex
|
||||
```
|
||||
### Configure your build folder
|
||||
```sh
|
||||
mkdir build
|
||||
cd build
|
||||
../configure --xlen=32 --tooldir=$HOME/tools
|
||||
```
|
||||
### Install prebuilt toolchain
|
||||
By default, the toolchain will install to /opt folder.
|
||||
You can install the toolchain to a different directory by overriding TOOLDIR (e.g. export TOOLDIR=$HOME/tools).
|
||||
|
||||
$ ./ci/toolchain_install.sh --all
|
||||
$ source ./ci/toolchain_env.sh
|
||||
### Build Vortex sources
|
||||
$ make -s
|
||||
```sh
|
||||
./ci/toolchain_install.sh --all
|
||||
```
|
||||
### Set environment variables
|
||||
```sh
|
||||
# should always run before using the toolchain!
|
||||
source ./ci/toolchain_env.sh
|
||||
```
|
||||
### Building Vortex
|
||||
```sh
|
||||
make -s
|
||||
```
|
||||
### Quick demo running vecadd OpenCL kernel on 2 cores
|
||||
$ ./ci/blackbox.sh --cores=2 --app=vecadd
|
||||
```sh
|
||||
./ci/blackbox.sh --cores=2 --app=vecadd
|
||||
```
|
||||
|
||||
### Common Developer Tips
|
||||
- Installing Vortex kernel and runtime libraries to use with external tools requires passing --prefix=<install-path> to the configure script.
|
||||
```sh
|
||||
../configure --xlen=32 --tooldir=$HOME/tools --prefix=<install-path>
|
||||
make -s
|
||||
make install
|
||||
```
|
||||
- Building Vortex 64-bit simply requires using --xlen=64 configure option.
|
||||
```sh
|
||||
../configure --xlen=32 --tooldir=$HOME/tools
|
||||
```
|
||||
- Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source <build-path>/ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login.
|
||||
```sh
|
||||
echo "source <build-path>/ci/toolchain_env.sh" >> ~/.bashrc
|
||||
```
|
||||
- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder.
|
||||
```sh
|
||||
../configure
|
||||
```
|
||||
- To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information.
|
||||
```sh
|
||||
./ci/blackbox.sh --app=demo --debug=3
|
||||
```
|
||||
- For additional information, check out the /docs.
|
||||
|
|
4
RELEASE
4
RELEASE
|
@ -1,4 +0,0 @@
|
|||
|
||||
Release Notes!
|
||||
|
||||
* 07/01/2020 - LKG FPGA build - Passed basic, demo, vecadd kernels.
|
23
TODO
23
TODO
|
@ -1,23 +0,0 @@
|
|||
|
||||
|
||||
|
||||
Functionality:
|
||||
1) vx_cl_warpSpawn()
|
||||
-> To be used by pocl->ops->run
|
||||
|
||||
2) newlib Integration (LoadFile(""))
|
||||
-> To be used by the Rhinio benchmarks
|
||||
|
||||
3) POCL OPS Vortex Suite
|
||||
|
||||
Performance:
|
||||
1) Icache doesn't need SEND_MEM_REQUEST Stage
|
||||
-> Blocks are never dirty, so why not evict right away
|
||||
|
||||
2) Branch not taken speculation
|
||||
|
||||
3) Runtime -02 not running on RTL, and -03 not running on RTL and Emulator
|
||||
|
||||
|
||||
Vector:
|
||||
1) Cycle accurate simulator (would require Cache Simulator)
|
|
@ -1,12 +1,12 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -23,14 +23,14 @@ show_help()
|
|||
{
|
||||
show_usage
|
||||
echo " where"
|
||||
echo "--driver: simx, rtlsim, oape, xrt"
|
||||
echo "--driver: gpu, simx, rtlsim, oape, xrt"
|
||||
echo "--app: any subfolder test under regression or opencl"
|
||||
echo "--class: 0=disable, 1=pipeline, 2=memsys"
|
||||
echo "--rebuild: 0=disable, 1=force, 2=auto, 3=temp"
|
||||
}
|
||||
|
||||
SCRIPT_DIR=$(dirname "$0")
|
||||
VORTEX_HOME=$SCRIPT_DIR/..
|
||||
ROOT_DIR=$SCRIPT_DIR/..
|
||||
|
||||
DRIVER=simx
|
||||
APP=sgemm
|
||||
|
@ -91,12 +91,12 @@ case $i in
|
|||
;;
|
||||
--scope)
|
||||
SCOPE=1
|
||||
CORES=1
|
||||
CORES=1
|
||||
shift
|
||||
;;
|
||||
--perf=*)
|
||||
PERF_FLAG=-DPERF_ENABLE
|
||||
PERF_CLASS=${i#*=}
|
||||
PERF_CLASS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--args=*)
|
||||
|
@ -117,8 +117,8 @@ case $i in
|
|||
exit 0
|
||||
;;
|
||||
*)
|
||||
show_usage
|
||||
exit -1
|
||||
show_usage
|
||||
exit -1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
@ -130,17 +130,20 @@ then
|
|||
fi
|
||||
|
||||
case $DRIVER in
|
||||
gpu)
|
||||
DRIVER_PATH=
|
||||
;;
|
||||
simx)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/simx
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/simx
|
||||
;;
|
||||
rtlsim)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/rtlsim
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/rtlsim
|
||||
;;
|
||||
opae)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/opae
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/opae
|
||||
;;
|
||||
xrt)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/xrt
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/xrt
|
||||
;;
|
||||
*)
|
||||
echo "invalid driver: $DRIVER"
|
||||
|
@ -148,49 +151,66 @@ case $DRIVER in
|
|||
;;
|
||||
esac
|
||||
|
||||
if [ -d "$VORTEX_HOME/tests/opencl/$APP" ];
|
||||
if [ -d "$ROOT_DIR/tests/opencl/$APP" ];
|
||||
then
|
||||
APP_PATH=$VORTEX_HOME/tests/opencl/$APP
|
||||
elif [ -d "$VORTEX_HOME/tests/regression/$APP" ];
|
||||
APP_PATH=$ROOT_DIR/tests/opencl/$APP
|
||||
elif [ -d "$ROOT_DIR/tests/regression/$APP" ];
|
||||
then
|
||||
APP_PATH=$VORTEX_HOME/tests/regression/$APP
|
||||
APP_PATH=$ROOT_DIR/tests/regression/$APP
|
||||
else
|
||||
echo "Application folder not found: $APP"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
if [ "$DRIVER" = "gpu" ];
|
||||
then
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER"
|
||||
OPTS=$ARGS make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
else
|
||||
echo "running: make -C $APP_PATH run-$DRIVER"
|
||||
make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
fi
|
||||
|
||||
exit $status
|
||||
fi
|
||||
|
||||
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS $L2 $L3 $PERF_FLAG $CONFIGS"
|
||||
|
||||
echo "CONFIGS=$CONFIGS"
|
||||
|
||||
if [ $REBUILD -ne 0 ]
|
||||
if [ $REBUILD -ne 0 ]
|
||||
then
|
||||
BLACKBOX_CACHE=blackbox.$DRIVER.cache
|
||||
BLACKBOX_CACHE=blackbox.$DRIVER.cache
|
||||
if [ -f "$BLACKBOX_CACHE" ]
|
||||
then
|
||||
then
|
||||
LAST_CONFIGS=`cat $BLACKBOX_CACHE`
|
||||
fi
|
||||
|
||||
if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ];
|
||||
then
|
||||
make -C $DRIVER_PATH clean > /dev/null
|
||||
make -C $DRIVER_PATH clean-driver > /dev/null
|
||||
echo "$CONFIGS+$DEBUG+$SCOPE" > $BLACKBOX_CACHE
|
||||
fi
|
||||
fi
|
||||
|
||||
# export performance monitor class identifier
|
||||
export PERF_CLASS=$PERF_CLASS
|
||||
export VORTEX_PROFILING=$PERF_CLASS
|
||||
|
||||
status=0
|
||||
|
||||
# ensure config update
|
||||
make -C $VORTEX_HOME/hw config > /dev/null
|
||||
make -C $ROOT_DIR/hw config > /dev/null
|
||||
|
||||
# ensure the stub driver is present
|
||||
make -C $VORTEX_HOME/runtime/stub > /dev/null
|
||||
make -C $ROOT_DIR/runtime/stub > /dev/null
|
||||
|
||||
if [ $DEBUG -ne 0 ]
|
||||
then
|
||||
then
|
||||
# running application
|
||||
if [ $TEMPBUILD -eq 1 ]
|
||||
then
|
||||
|
@ -212,11 +232,11 @@ then
|
|||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
echo "running: VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
DEBUG=1 VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
status=$?
|
||||
else
|
||||
echo "running: VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
DEBUG=1 VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
status=$?
|
||||
fi
|
||||
|
||||
|
@ -237,26 +257,26 @@ then
|
|||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
DEBUG=1 OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
status=$?
|
||||
else
|
||||
echo "running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
DEBUG=1 make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
status=$?
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
if [ -f "$APP_PATH/trace.vcd" ]
|
||||
then
|
||||
then
|
||||
mv -f $APP_PATH/trace.vcd .
|
||||
fi
|
||||
else
|
||||
else
|
||||
if [ $TEMPBUILD -eq 1 ]
|
||||
then
|
||||
# setup temp directory
|
||||
TEMPDIR=$(mktemp -d)
|
||||
mkdir -p "$TEMPDIR/$DRIVER"
|
||||
|
||||
|
||||
# driver initialization
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
|
@ -266,7 +286,7 @@ else
|
|||
echo "running: DESTDIR=$TEMPDIR/$DRIVER CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DESTDIR="$TEMPDIR/$DRIVER" CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
fi
|
||||
|
||||
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
|
@ -282,7 +302,7 @@ else
|
|||
# cleanup temp directory
|
||||
trap "rm -rf $TEMPDIR" EXIT
|
||||
else
|
||||
|
||||
|
||||
# driver initialization
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
|
|
41
ci/datagen.py
Executable file
41
ci/datagen.py
Executable file
|
@ -0,0 +1,41 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import struct
|
||||
import random
|
||||
import sys
|
||||
|
||||
def create_binary_file(n, filename):
|
||||
# Open the file in binary write mode
|
||||
with open(filename, 'wb') as f:
|
||||
# Write the integer N as 4 bytes
|
||||
f.write(struct.pack('i', n))
|
||||
# Generate and write N floating-point numbers
|
||||
for _ in range(n):
|
||||
# Generate a random float between 0 and 1
|
||||
num = random.random()
|
||||
# Write the float in IEEE 754 format (4 bytes)
|
||||
f.write(struct.pack('f', num))
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: script.py N filename")
|
||||
sys.exit(1)
|
||||
|
||||
n = int(sys.argv[1])
|
||||
filename = sys.argv[2]
|
||||
|
||||
create_binary_file(n, filename)
|
||||
print(f"Created binary file '{filename}' containing {n} floats.")
|
322
ci/regression.sh
322
ci/regression.sh
|
@ -1,322 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
# clear blackbox cache
|
||||
rm -f blackbox.*.cache
|
||||
|
||||
unittest()
|
||||
{
|
||||
make -C tests/unittest run
|
||||
make -C hw/unittest
|
||||
}
|
||||
|
||||
isa()
|
||||
{
|
||||
echo "begin isa tests..."
|
||||
|
||||
make -C tests/riscv/isa run-simx
|
||||
make -C tests/riscv/isa run-rtlsim
|
||||
CONFIGS="-DDPI_DISABLE" make -C tests/riscv/isa run-rtlsim
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
if [ "$XLEN" == "64" ]
|
||||
then
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_D_ENABLE -DFPU_FPNEW" make -C sim/rtlsim
|
||||
make -C tests/riscv/isa run-rtlsim-64d || true
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim
|
||||
make -C tests/riscv/isa run-rtlsim-64fx
|
||||
fi
|
||||
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim
|
||||
|
||||
echo "isa tests done!"
|
||||
}
|
||||
|
||||
regression()
|
||||
{
|
||||
echo "begin regression tests..."
|
||||
|
||||
make -C tests/kernel run-simx
|
||||
make -C tests/kernel run-rtlsim
|
||||
|
||||
make -C tests/regression run-simx
|
||||
make -C tests/regression run-rtlsim
|
||||
|
||||
# test FPU hardware implementations
|
||||
CONFIGS="-DFPU_DPI" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
CONFIGS="-DFPU_DSP" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
CONFIGS="-DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
|
||||
# test local barrier
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -t19"
|
||||
./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -t19"
|
||||
|
||||
# test global barrier
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -t20" --cores=2
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -t20" --cores=2
|
||||
|
||||
# test FPU core
|
||||
|
||||
echo "regression tests done!"
|
||||
}
|
||||
|
||||
opencl()
|
||||
{
|
||||
echo "begin opencl tests..."
|
||||
|
||||
make -C tests/opencl run-simx
|
||||
make -C tests/opencl run-rtlsim
|
||||
|
||||
echo "opencl tests done!"
|
||||
}
|
||||
|
||||
cluster()
|
||||
{
|
||||
echo "begin clustering tests..."
|
||||
|
||||
# warp/threads configurations
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --cores=1 --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --cores=1 --warps=8 --threads=16 --app=diverge
|
||||
|
||||
# cores clustering
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --clusters=1 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1"
|
||||
|
||||
# L2/L3
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
|
||||
echo "clustering tests done!"
|
||||
}
|
||||
|
||||
debug()
|
||||
{
|
||||
echo "begin debugging tests..."
|
||||
|
||||
# test CSV trace generation
|
||||
make -C sim/simx clean && DEBUG=3 make -C sim/simx
|
||||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim
|
||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
make -C sim/simx clean && make -C sim/simx
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim
|
||||
|
||||
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=basic --args="-t0 -n1"
|
||||
|
||||
echo "debugging tests done!"
|
||||
}
|
||||
|
||||
config()
|
||||
{
|
||||
echo "begin configuration tests..."
|
||||
|
||||
# disable DPI
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
|
||||
|
||||
# issue width
|
||||
CONFIGS="-DISSUE_WIDTH=1" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=1" ./ci/blackbox.sh --driver=simx --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=simx --app=diverge
|
||||
|
||||
# dispatch size
|
||||
CONFIGS="-DNUM_ALU_BLOCK=1 -DNUM_ALU_LANES=1" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
CONFIGS="-DNUM_ALU_BLOCK=2 -DNUM_ALU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
CONFIGS="-DNUM_ALU_BLOCK=1 -DNUM_ALU_LANES=1" ./ci/blackbox.sh --driver=simx --app=diverge
|
||||
CONFIGS="-DNUM_ALU_BLOCK=2 -DNUM_ALU_LANES=2" ./ci/blackbox.sh --driver=simx --app=diverge
|
||||
|
||||
# FPU scaling
|
||||
CONFIGS="-DNUM_ALU_BLOCK=4 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
|
||||
CONFIGS="-DNUM_ALU_BLOCK=2 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
|
||||
CONFIGS="-DNUM_ALU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
|
||||
|
||||
# custom program startup address
|
||||
make -C tests/regression/dogfood clean-all
|
||||
STARTUP_ADDR=0x40000000 make -C tests/regression/dogfood
|
||||
CONFIGS="-DSTARTUP_ADDR=0x40000000" ./ci/blackbox.sh --driver=simx --app=dogfood
|
||||
CONFIGS="-DSTARTUP_ADDR=0x40000000" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
make -C tests/regression/dogfood clean-all
|
||||
make -C tests/regression/dogfood
|
||||
|
||||
# disabling M extension
|
||||
CONFIGS="-DEXT_M_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_mf_ext
|
||||
|
||||
# disabling F extension
|
||||
CONFIGS="-DEXT_F_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_mf_ext
|
||||
CONFIGS="-DEXT_F_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_mf_ext --perf=1
|
||||
CONFIGS="-DEXT_F_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=no_mf_ext --perf=1
|
||||
|
||||
# disable shared memory
|
||||
CONFIGS="-DSM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_smem
|
||||
CONFIGS="-DSM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_smem --perf=1
|
||||
CONFIGS="-DSM_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=no_smem --perf=1
|
||||
|
||||
# disable L1 cache
|
||||
CONFIGS="-DL1_DISABLE -DSM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
|
||||
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
|
||||
|
||||
# multiple L1 caches per cluster
|
||||
CONFIGS="-DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --cores=8 --warps=1 --threads=2
|
||||
|
||||
# test AXI bus
|
||||
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
|
||||
# adjust l1 block size to match l2
|
||||
CONFIGS="-DL1_LINE_SIZE=64" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1"
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DSMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
|
||||
CONFIGS="-DSMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
|
||||
CONFIGS="-DSMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemm
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemm
|
||||
|
||||
# test 128-bit MEM block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo
|
||||
|
||||
# test single-bank DRAM
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo
|
||||
|
||||
# test 27-bit DRAM address
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo
|
||||
|
||||
echo "configuration tests done!"
|
||||
}
|
||||
|
||||
stress0()
|
||||
{
|
||||
echo "begin stress0 tests..."
|
||||
|
||||
# test verilator reset values
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --app=printf
|
||||
|
||||
echo "stress0 tests done!"
|
||||
}
|
||||
|
||||
stress1()
|
||||
{
|
||||
echo "begin stress1 tests..."
|
||||
|
||||
./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n128" --l2cache
|
||||
|
||||
echo "stress1 tests done!"
|
||||
}
|
||||
|
||||
synthesis()
|
||||
{
|
||||
echo "begin synthesis tests..."
|
||||
|
||||
PREFIX=build_base make -C hw/syn/yosys clean
|
||||
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys elaborate
|
||||
|
||||
echo "synthesis tests done!"
|
||||
}
|
||||
|
||||
show_usage()
|
||||
{
|
||||
echo "Vortex Regression Test"
|
||||
echo "Usage: $0 [--unittest] [--isa] [--regression] [--opencl] [--cluster] [--debug] [--config] [--stress[#n]] [--synthesis] [--all] [--h|--help]"
|
||||
}
|
||||
|
||||
start=$SECONDS
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
--unittest ) unittest
|
||||
;;
|
||||
--isa ) isa
|
||||
;;
|
||||
--regression ) regression
|
||||
;;
|
||||
--opencl ) opencl
|
||||
;;
|
||||
--cluster ) cluster
|
||||
;;
|
||||
--debug ) debug
|
||||
;;
|
||||
--config ) config
|
||||
;;
|
||||
--stress0 ) stress0
|
||||
;;
|
||||
--stress1 ) stress1
|
||||
;;
|
||||
--stress ) stress0
|
||||
stress1
|
||||
;;
|
||||
--synthesis ) synthesis
|
||||
;;
|
||||
--all ) unittest
|
||||
isa
|
||||
regression
|
||||
opencl
|
||||
cluster
|
||||
debug
|
||||
config
|
||||
stress0
|
||||
stress1
|
||||
synthesis
|
||||
;;
|
||||
-h | --help ) show_usage
|
||||
exit
|
||||
;;
|
||||
* ) show_usage
|
||||
exit 1
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
echo "Regression completed!"
|
||||
|
||||
duration=$(( SECONDS - start ))
|
||||
awk -v t=$duration 'BEGIN{t=int(t*1000); printf "Elapsed Time: %d:%02d:%02d\n", t/3600000, t/60000%60, t/1000%60}'
|
417
ci/regression.sh.in
Executable file
417
ci/regression.sh.in
Executable file
|
@ -0,0 +1,417 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
# clear blackbox cache
|
||||
rm -f blackbox.*.cache
|
||||
|
||||
XLEN=${XLEN:=@XLEN@}
|
||||
|
||||
XSIZE=$((XLEN / 8))
|
||||
|
||||
echo "Vortex Regression Test: XLEN=$XLEN"
|
||||
|
||||
unittest()
|
||||
{
|
||||
make -C tests/unittest run
|
||||
make -C hw/unittest > /dev/null
|
||||
}
|
||||
|
||||
isa()
|
||||
{
|
||||
echo "begin isa tests..."
|
||||
|
||||
make -C sim/simx
|
||||
make -C sim/rtlsim
|
||||
|
||||
make -C tests/riscv/isa run-simx
|
||||
make -C tests/riscv/isa run-rtlsim
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
if [ "$XLEN" == "64" ]
|
||||
then
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64fx
|
||||
fi
|
||||
|
||||
# clean build
|
||||
make -C sim/rtlsim clean
|
||||
|
||||
echo "isa tests done!"
|
||||
}
|
||||
|
||||
kernel()
|
||||
{
|
||||
echo "begin kernel tests..."
|
||||
|
||||
make -C sim/simx
|
||||
make -C sim/rtlsim
|
||||
|
||||
make -C tests/kernel run-simx
|
||||
make -C tests/kernel run-rtlsim
|
||||
|
||||
echo "kernel tests done!"
|
||||
}
|
||||
|
||||
regression()
|
||||
{
|
||||
echo "begin regression tests..."
|
||||
|
||||
make -C runtime/simx
|
||||
make -C runtime/rtlsim
|
||||
|
||||
make -C tests/regression run-simx
|
||||
make -C tests/regression run-rtlsim
|
||||
|
||||
# test global barrier
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
|
||||
# test local barrier
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
|
||||
./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar"
|
||||
|
||||
echo "regression tests done!"
|
||||
}
|
||||
|
||||
opencl()
|
||||
{
|
||||
echo "begin opencl tests..."
|
||||
|
||||
make -C runtime/simx
|
||||
make -C runtime/rtlsim
|
||||
|
||||
make -C tests/opencl run-simx
|
||||
make -C tests/opencl run-rtlsim
|
||||
|
||||
./ci/blackbox.sh --driver=simx --app=lbm --warps=8
|
||||
./ci/blackbox.sh --driver=rtlsim --app=lbm --warps=8
|
||||
|
||||
echo "opencl tests done!"
|
||||
}
|
||||
|
||||
cache()
|
||||
{
|
||||
echo "begin cache tests..."
|
||||
|
||||
# disable local memory
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
|
||||
|
||||
# disable L1 cache
|
||||
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
|
||||
# reduce l1 line size
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache ways
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test writeback
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
|
||||
# cache clustering
|
||||
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2
|
||||
|
||||
# L2/L3
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=4 --l2cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
|
||||
echo "begin cache tests..."
|
||||
}
|
||||
|
||||
config1()
|
||||
{
|
||||
echo "begin configuration-1 tests..."
|
||||
|
||||
# warp/threads
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=8 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=8 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge
|
||||
|
||||
# cores clustering
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=4 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
|
||||
# issue width
|
||||
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=4" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=simx --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=4" ./ci/blackbox.sh --driver=simx --app=diverge
|
||||
|
||||
# ALU scaling
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_ALU_BLOCK=1 -DNUM_ALU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_ALU_BLOCK=4 -DNUM_ALU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_ALU_BLOCK=1 -DNUM_ALU_LANES=2" ./ci/blackbox.sh --driver=simx --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_ALU_BLOCK=4 -DNUM_ALU_LANES=4" ./ci/blackbox.sh --driver=simx --app=diverge
|
||||
|
||||
# FPU scaling
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
|
||||
# FPU's PE scaling
|
||||
CONFIGS="-DFMA_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfmadd"
|
||||
CONFIGS="-DFCVT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tftoi"
|
||||
CONFIGS="-DFDIV_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfdiv"
|
||||
CONFIGS="-DFSQRT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfsqrt"
|
||||
CONFIGS="-DFNCP_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfclamp"
|
||||
|
||||
# LSU scaling
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
|
||||
echo "configuration-1 tests done!"
|
||||
}
|
||||
|
||||
config2()
|
||||
{
|
||||
echo "begin configuration-2 tests..."
|
||||
|
||||
# test opaesim
|
||||
./ci/blackbox.sh --driver=opae --app=printf
|
||||
./ci/blackbox.sh --driver=opae --app=diverge
|
||||
|
||||
# disable DPI
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
|
||||
|
||||
# custom program startup address
|
||||
make -C tests/regression/dogfood clean-kernel
|
||||
STARTUP_ADDR=0x40000000 make -C tests/regression/dogfood
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood
|
||||
./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
make -C tests/regression/dogfood clean-kernel
|
||||
|
||||
# disabling M & F extensions
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32i
|
||||
make -C sim/rtlsim clean
|
||||
|
||||
# disabling ZICOND extension
|
||||
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
|
||||
|
||||
# test AXI bus
|
||||
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
|
||||
# test 128-bit MEM block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
|
||||
# test XLEN-bit MEM block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
|
||||
# test memory coalescing
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
|
||||
|
||||
# test single-bank DRAM
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
|
||||
# test 27-bit DRAM address
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
|
||||
echo "configuration-2 tests done!"
|
||||
}
|
||||
|
||||
test_csv_trace()
|
||||
{
|
||||
# test CSV trace generation
|
||||
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
|
||||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
# clean build
|
||||
make -C sim/simx clean
|
||||
make -C sim/rtlsim clean
|
||||
}
|
||||
|
||||
debug()
|
||||
{
|
||||
echo "begin debugging tests..."
|
||||
|
||||
test_csv_trace
|
||||
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
|
||||
|
||||
echo "debugging tests done!"
|
||||
}
|
||||
|
||||
stress()
|
||||
{
|
||||
echo "begin stress tests..."
|
||||
|
||||
# test verilator reset values
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache
|
||||
|
||||
echo "stress tests done!"
|
||||
}
|
||||
|
||||
synthesis()
|
||||
{
|
||||
echo "begin synthesis tests..."
|
||||
|
||||
PREFIX=build_base make -C hw/syn/yosys clean
|
||||
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys synthesis
|
||||
|
||||
echo "synthesis tests done!"
|
||||
}
|
||||
|
||||
show_usage()
|
||||
{
|
||||
echo "Vortex Regression Test"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]"
|
||||
}
|
||||
|
||||
declare -a tests=()
|
||||
clean=0
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
--clean )
|
||||
clean=1
|
||||
;;
|
||||
--unittest )
|
||||
tests+=("unittest")
|
||||
;;
|
||||
--isa )
|
||||
tests+=("isa")
|
||||
;;
|
||||
--kernel )
|
||||
tests+=("kernel")
|
||||
;;
|
||||
--regression )
|
||||
tests+=("regression")
|
||||
;;
|
||||
--opencl )
|
||||
tests+=("opencl")
|
||||
;;
|
||||
--cache )
|
||||
tests+=("cache")
|
||||
;;
|
||||
--config1 )
|
||||
tests+=("config1")
|
||||
;;
|
||||
--config2 )
|
||||
tests+=("config2")
|
||||
;;
|
||||
--debug )
|
||||
tests+=("debug")
|
||||
;;
|
||||
--stress )
|
||||
tests+=("stress")
|
||||
;;
|
||||
--synthesis )
|
||||
tests+=("synthesis")
|
||||
;;
|
||||
--all )
|
||||
tests=()
|
||||
tests+=("unittest")
|
||||
tests+=("isa")
|
||||
tests+=("kernel")
|
||||
tests+=("regression")
|
||||
tests+=("opencl")
|
||||
tests+=("cache")
|
||||
tests+=("config1")
|
||||
tests+=("config2")
|
||||
tests+=("debug")
|
||||
tests+=("stress")
|
||||
tests+=("synthesis")
|
||||
;;
|
||||
-h | --help )
|
||||
show_usage
|
||||
exit
|
||||
;;
|
||||
* )
|
||||
show_usage
|
||||
exit 1
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [ $clean -eq 1 ];
|
||||
then
|
||||
make clean
|
||||
make -s
|
||||
fi
|
||||
|
||||
start=$SECONDS
|
||||
|
||||
for test in "${tests[@]}"; do
|
||||
$test
|
||||
done
|
||||
|
||||
echo "Regression completed!"
|
||||
|
||||
duration=$(( SECONDS - start ))
|
||||
awk -v t=$duration 'BEGIN{t=int(t*1000); printf "Elapsed Time: %d:%02d:%02d\n", t/3600000, t/60000%60, t/1000%60}'
|
27
ci/system_updates.sh
Executable file
27
ci/system_updates.sh
Executable file
|
@ -0,0 +1,27 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -e
|
||||
|
||||
apt-get update -y
|
||||
|
||||
add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
apt-get update
|
||||
apt-get install -y g++-11 gcc-11
|
||||
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100
|
||||
|
||||
apt-get install -y build-essential valgrind libstdc++6 binutils python uuid-dev ccache
|
13
ci/toolchain_env.sh → ci/toolchain_env.sh.in
Normal file → Executable file
13
ci/toolchain_env.sh → ci/toolchain_env.sh.in
Normal file → Executable file
|
@ -1,23 +1,22 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright 2023 blaise
|
||||
#
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
TOOLDIR=${TOOLDIR:=/opt}
|
||||
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
|
||||
|
||||
export VERILATOR_ROOT=$TOOLDIR/verilator
|
||||
export PATH=$VERILATOR_ROOT/bin:$PATH
|
||||
export PATH=$TOOLDIR/verilator/bin:$PATH
|
||||
|
||||
export SV2V_PATH=$TOOLDIR/sv2v
|
||||
export PATH=$SV2V_PATH/bin:$PATH
|
|
@ -1,184 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master
|
||||
TOOLDIR=${TOOLDIR:=/opt}
|
||||
OSDIR=${OSDIR:=ubuntu/bionic}
|
||||
|
||||
OS="${OS:=ubuntu/bionic}"
|
||||
|
||||
riscv()
|
||||
{
|
||||
case $OSDIR in
|
||||
"centos/7") parts=$(eval echo {a..h}) ;;
|
||||
*) parts=$(eval echo {a..j}) ;;
|
||||
esac
|
||||
rm -f riscv-gnu-toolchain.tar.bz2.parta*
|
||||
for x in $parts
|
||||
do
|
||||
wget $REPOSITORY/riscv-gnu-toolchain/$OSDIR/riscv-gnu-toolchain.tar.bz2.parta$x
|
||||
done
|
||||
cat riscv-gnu-toolchain.tar.bz2.parta* > riscv-gnu-toolchain.tar.bz2
|
||||
tar -xvf riscv-gnu-toolchain.tar.bz2
|
||||
cp -r riscv-gnu-toolchain $TOOLDIR
|
||||
rm -f riscv-gnu-toolchain.tar.bz2*
|
||||
rm -rf riscv-gnu-toolchain
|
||||
}
|
||||
|
||||
riscv64()
|
||||
{
|
||||
case $OSDIR in
|
||||
"centos/7") parts=$(eval echo {a..h}) ;;
|
||||
*) parts=$(eval echo {a..j}) ;;
|
||||
esac
|
||||
rm -f riscv64-gnu-toolchain.tar.bz2.parta*
|
||||
for x in $parts
|
||||
do
|
||||
wget $REPOSITORY/riscv64-gnu-toolchain/$OSDIR/riscv64-gnu-toolchain.tar.bz2.parta$x
|
||||
done
|
||||
cat riscv64-gnu-toolchain.tar.bz2.parta* > riscv64-gnu-toolchain.tar.bz2
|
||||
tar -xvf riscv64-gnu-toolchain.tar.bz2
|
||||
cp -r riscv64-gnu-toolchain $TOOLDIR
|
||||
rm -f riscv64-gnu-toolchain.tar.bz2*
|
||||
rm -rf riscv64-gnu-toolchain
|
||||
}
|
||||
|
||||
llvm-vortex()
|
||||
{
|
||||
case $OSDIR in
|
||||
"centos/7") parts=$(eval echo {a..b}) ;;
|
||||
*) parts=$(eval echo {a..b}) ;;
|
||||
esac
|
||||
echo $parts
|
||||
rm -f llvm-vortex.tar.bz2.parta*
|
||||
for x in $parts
|
||||
do
|
||||
wget $REPOSITORY/llvm-vortex/$OSDIR/llvm-vortex.tar.bz2.parta$x
|
||||
done
|
||||
cat llvm-vortex.tar.bz2.parta* > llvm-vortex.tar.bz2
|
||||
tar -xvf llvm-vortex.tar.bz2
|
||||
cp -r llvm-vortex $TOOLDIR
|
||||
rm -f llvm-vortex.tar.bz2*
|
||||
rm -rf llvm-vortex
|
||||
}
|
||||
|
||||
llvm-pocl()
|
||||
{
|
||||
case $OSDIR in
|
||||
"centos/7") parts=$(eval echo {a..b}) ;;
|
||||
*) parts=$(eval echo {a..b}) ;;
|
||||
esac
|
||||
echo $parts
|
||||
rm -f llvm-pocl.tar.bz2.parta*
|
||||
for x in $parts
|
||||
do
|
||||
wget $REPOSITORY/llvm-pocl/$OSDIR/llvm-pocl.tar.bz2.parta$x
|
||||
done
|
||||
cat llvm-pocl.tar.bz2.parta* > llvm-pocl.tar.bz2
|
||||
tar -xvf llvm-pocl.tar.bz2
|
||||
cp -r llvm-pocl $TOOLDIR
|
||||
rm -f llvm-pocl.tar.bz2*
|
||||
rm -rf llvm-pocl
|
||||
}
|
||||
|
||||
pocl()
|
||||
{
|
||||
wget $REPOSITORY/pocl/$OSDIR/pocl.tar.bz2
|
||||
tar -xvf pocl.tar.bz2
|
||||
rm -f pocl.tar.bz2
|
||||
cp -r pocl $TOOLDIR
|
||||
rm -rf pocl
|
||||
}
|
||||
|
||||
verilator()
|
||||
{
|
||||
wget $REPOSITORY/verilator/$OSDIR/verilator.tar.bz2
|
||||
tar -xvf verilator.tar.bz2
|
||||
cp -r verilator $TOOLDIR
|
||||
rm -f verilator.tar.bz2
|
||||
rm -rf verilator
|
||||
}
|
||||
|
||||
sv2v()
|
||||
{
|
||||
wget $REPOSITORY/sv2v/$OSDIR/sv2v.tar.bz2
|
||||
tar -xvf sv2v.tar.bz2
|
||||
rm -f sv2v.tar.bz2
|
||||
cp -r sv2v $TOOLDIR
|
||||
rm -rf sv2v
|
||||
}
|
||||
|
||||
yosys()
|
||||
{
|
||||
case $OSDIR in
|
||||
"centos/7") parts=$(eval echo {a..c}) ;;
|
||||
*) parts=$(eval echo {a..c}) ;;
|
||||
esac
|
||||
echo $parts
|
||||
rm -f yosys.tar.bz2.parta*
|
||||
for x in $parts
|
||||
do
|
||||
wget $REPOSITORY/yosys/$OSDIR/yosys.tar.bz2.parta$x
|
||||
done
|
||||
cat yosys.tar.bz2.parta* > yosys.tar.bz2
|
||||
tar -xvf yosys.tar.bz2
|
||||
cp -r yosys $TOOLDIR
|
||||
rm -f yosys.tar.bz2*
|
||||
rm -rf yosys
|
||||
}
|
||||
|
||||
show_usage()
|
||||
{
|
||||
echo "Install Pre-built Vortex Toolchain"
|
||||
echo "Usage: $0 [[--riscv] [--riscv64] [--llvm-vortex] [--llvm-pocl] [--pocl] [--verilator] [--sv2v] [--yosys] [--all] [-h|--help]]"
|
||||
}
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
--pocl ) pocl
|
||||
;;
|
||||
--verilator ) verilator
|
||||
;;
|
||||
--riscv ) riscv
|
||||
;;
|
||||
--riscv64 ) riscv64
|
||||
;;
|
||||
--llvm-vortex ) llvm-vortex
|
||||
;;
|
||||
--llvm-pocl ) llvm-pocl
|
||||
;;
|
||||
--sv2v ) sv2v
|
||||
;;
|
||||
--yosys ) yosys
|
||||
;;
|
||||
--all ) pocl
|
||||
verilator
|
||||
sv2v
|
||||
yosys
|
||||
llvm-vortex
|
||||
riscv
|
||||
riscv64
|
||||
;;
|
||||
-h | --help ) show_usage
|
||||
exit
|
||||
;;
|
||||
* ) show_usage
|
||||
exit 1
|
||||
esac
|
||||
shift
|
||||
done
|
199
ci/toolchain_install.sh.in
Executable file
199
ci/toolchain_install.sh.in
Executable file
|
@ -0,0 +1,199 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master
|
||||
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
|
||||
OSVERSION=${OSVERSION:=@OSVERSION@}
|
||||
|
||||
riscv32()
|
||||
{
|
||||
case $OSVERSION in
|
||||
"centos/7") parts=$(eval echo {a..h}) ;;
|
||||
"ubuntu/focal") parts=$(eval echo {a..k}) ;;
|
||||
*) parts=$(eval echo {a..j}) ;;
|
||||
esac
|
||||
rm -f riscv32-gnu-toolchain.tar.bz2.parta*
|
||||
for x in $parts
|
||||
do
|
||||
wget $REPOSITORY/riscv32-gnu-toolchain/$OSVERSION/riscv32-gnu-toolchain.tar.bz2.parta$x
|
||||
done
|
||||
cat riscv32-gnu-toolchain.tar.bz2.parta* > riscv32-gnu-toolchain.tar.bz2
|
||||
tar -xvf riscv32-gnu-toolchain.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/riscv32-gnu-toolchain && mv riscv32-gnu-toolchain $TOOLDIR
|
||||
rm -rf riscv32-gnu-toolchain.tar.bz2*
|
||||
}
|
||||
|
||||
riscv64()
|
||||
{
|
||||
case $OSVERSION in
|
||||
"centos/7") parts=$(eval echo {a..h}) ;;
|
||||
*) parts=$(eval echo {a..j}) ;;
|
||||
esac
|
||||
rm -f riscv64-gnu-toolchain.tar.bz2.parta*
|
||||
for x in $parts
|
||||
do
|
||||
wget $REPOSITORY/riscv64-gnu-toolchain/$OSVERSION/riscv64-gnu-toolchain.tar.bz2.parta$x
|
||||
done
|
||||
cat riscv64-gnu-toolchain.tar.bz2.parta* > riscv64-gnu-toolchain.tar.bz2
|
||||
tar -xvf riscv64-gnu-toolchain.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/riscv64-gnu-toolchain && mv riscv64-gnu-toolchain $TOOLDIR
|
||||
rm -rf riscv64-gnu-toolchain riscv64-gnu-toolchain.tar.bz2*
|
||||
}
|
||||
|
||||
llvm()
|
||||
{
|
||||
case $OSVERSION in
|
||||
"centos/7") parts=$(eval echo {a..b}) ;;
|
||||
*) parts=$(eval echo {a..b}) ;;
|
||||
esac
|
||||
echo $parts
|
||||
rm -f llvm-vortex2.tar.bz2.parta*
|
||||
for x in $parts
|
||||
do
|
||||
wget $REPOSITORY/llvm-vortex/$OSVERSION/llvm-vortex2.tar.bz2.parta$x
|
||||
done
|
||||
cat llvm-vortex2.tar.bz2.parta* > llvm-vortex2.tar.bz2
|
||||
tar -xvf llvm-vortex2.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/llvm-vortex && mv llvm-vortex $TOOLDIR
|
||||
rm -rf llvm-vortex llvm-vortex2.tar.bz2*
|
||||
}
|
||||
|
||||
libcrt32()
|
||||
{
|
||||
wget $REPOSITORY/libcrt32/libcrt32.tar.bz2
|
||||
tar -xvf libcrt32.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libcrt32 && mv libcrt32 $TOOLDIR
|
||||
rm -rf libcrt32 libcrt32.tar.bz2
|
||||
}
|
||||
|
||||
libcrt64()
|
||||
{
|
||||
wget $REPOSITORY/libcrt64/libcrt64.tar.bz2
|
||||
tar -xvf libcrt64.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libcrt64 && mv libcrt64 $TOOLDIR
|
||||
rm -rf libcrt64 libcrt64.tar.bz2
|
||||
}
|
||||
|
||||
libc32()
|
||||
{
|
||||
wget $REPOSITORY/libc32/libc32.tar.bz2
|
||||
tar -xvf libc32.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libc32 && mv libc32 $TOOLDIR
|
||||
rm -rf libc32 libc32.tar.bz2
|
||||
}
|
||||
|
||||
libc64()
|
||||
{
|
||||
wget $REPOSITORY/libc64/libc64.tar.bz2
|
||||
tar -xvf libc64.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libc64 && mv libc64 $TOOLDIR
|
||||
rm -rf libc64 libc64.tar.bz2
|
||||
}
|
||||
|
||||
pocl()
|
||||
{
|
||||
wget $REPOSITORY/pocl/$OSVERSION/pocl2.tar.bz2
|
||||
tar -xvf pocl2.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/pocl && mv pocl $TOOLDIR
|
||||
rm -rf pocl2 pocl2.tar.bz2
|
||||
}
|
||||
|
||||
verilator()
|
||||
{
|
||||
wget $REPOSITORY/verilator/$OSVERSION/verilator.tar.bz2
|
||||
tar -xvf verilator.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/verilator && mv verilator $TOOLDIR
|
||||
rm -rf verilator verilator.tar.bz2
|
||||
}
|
||||
|
||||
sv2v()
|
||||
{
|
||||
wget $REPOSITORY/sv2v/$OSVERSION/sv2v.tar.bz2
|
||||
tar -xvf sv2v.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/sv2v && mv sv2v $TOOLDIR
|
||||
rm -rf sv2v sv2v.tar.bz2
|
||||
}
|
||||
|
||||
yosys()
|
||||
{
|
||||
case $OSVERSION in
|
||||
"centos/7") parts=$(eval echo {a..c}) ;;
|
||||
*) parts=$(eval echo {a..c}) ;;
|
||||
esac
|
||||
echo $parts
|
||||
rm -f yosys.tar.bz2.parta*
|
||||
for x in $parts
|
||||
do
|
||||
wget $REPOSITORY/yosys/$OSVERSION/yosys.tar.bz2.parta$x
|
||||
done
|
||||
cat yosys.tar.bz2.parta* > yosys.tar.bz2
|
||||
tar -xvf yosys.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/yosys && mv yosys $TOOLDIR
|
||||
rm -rf yosys yosys.tar.bz2* yosys
|
||||
}
|
||||
|
||||
show_usage()
|
||||
{
|
||||
echo "Install Pre-built Vortex Toolchain"
|
||||
echo "Usage: $0 [--pocl] [--verilator] [--riscv32] [--riscv64] [--llvm] [--libcrt32] [--libcrt64] [--libc32] [--libc64] [--sv2v] [--yosys] [--all] [-h|--help]"
|
||||
}
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
--pocl ) pocl
|
||||
;;
|
||||
--verilator ) verilator
|
||||
;;
|
||||
--riscv32 ) riscv32
|
||||
;;
|
||||
--riscv64 ) riscv64
|
||||
;;
|
||||
--llvm ) llvm
|
||||
;;
|
||||
--libcrt32 ) libcrt32
|
||||
;;
|
||||
--libcrt64 ) libcrt64
|
||||
;;
|
||||
--libc32 ) libc32
|
||||
;;
|
||||
--libc64 ) libc64
|
||||
;;
|
||||
--sv2v ) sv2v
|
||||
;;
|
||||
--yosys ) yosys
|
||||
;;
|
||||
--all ) pocl
|
||||
verilator
|
||||
llvm
|
||||
libcrt32
|
||||
libcrt64
|
||||
libc32
|
||||
libc64
|
||||
riscv32
|
||||
riscv64
|
||||
sv2v
|
||||
yosys
|
||||
;;
|
||||
-h | --help ) show_usage
|
||||
exit
|
||||
;;
|
||||
* ) show_usage
|
||||
exit 1
|
||||
esac
|
||||
shift
|
||||
done
|
|
@ -1,128 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
TOOLDIR=${TOOLDIR:=/opt}
|
||||
OSDIR=${OSDIR:=ubuntu/bionic}
|
||||
|
||||
riscv()
|
||||
{
|
||||
echo "prebuilt riscv-gnu-toolchain..."
|
||||
tar -C $TOOLDIR -cvjf riscv-gnu-toolchain.tar.bz2 riscv-gnu-toolchain
|
||||
split -b 50M riscv-gnu-toolchain.tar.bz2 "riscv-gnu-toolchain.tar.bz2.part"
|
||||
mv riscv-gnu-toolchain.tar.bz2.part* ./riscv-gnu-toolchain/$OSDIR
|
||||
rm riscv-gnu-toolchain.tar.bz2
|
||||
}
|
||||
|
||||
riscv64()
|
||||
{
|
||||
echo "prebuilt riscv64-gnu-toolchain..."
|
||||
tar -C $TOOLDIR -cvjf riscv64-gnu-toolchain.tar.bz2 riscv64-gnu-toolchain
|
||||
split -b 50M riscv64-gnu-toolchain.tar.bz2 "riscv64-gnu-toolchain.tar.bz2.part"
|
||||
mv riscv64-gnu-toolchain.tar.bz2.part* ./riscv64-gnu-toolchain/$OSDIR
|
||||
rm riscv64-gnu-toolchain.tar.bz2
|
||||
}
|
||||
|
||||
llvm-vortex()
|
||||
{
|
||||
echo "prebuilt llvm-vortex..."
|
||||
tar -C $TOOLDIR -cvjf llvm-vortex.tar.bz2 llvm-vortex
|
||||
split -b 50M llvm-vortex.tar.bz2 "llvm-vortex.tar.bz2.part"
|
||||
mv llvm-vortex.tar.bz2.part* ./llvm-vortex/$OSDIR
|
||||
rm llvm-vortex.tar.bz2
|
||||
}
|
||||
|
||||
llvm-pocl()
|
||||
{
|
||||
echo "prebuilt llvm-pocl..."
|
||||
tar -C $TOOLDIR -cvjf llvm-pocl.tar.bz2 llvm-pocl
|
||||
split -b 50M llvm-pocl.tar.bz2 "llvm-pocl.tar.bz2.part"
|
||||
mv llvm-pocl.tar.bz2.part* ./llvm-pocl/$OSDIR
|
||||
rm llvm-pocl.tar.bz2
|
||||
}
|
||||
|
||||
pocl()
|
||||
{
|
||||
echo "prebuilt pocl..."
|
||||
tar -C $TOOLDIR -cvjf pocl.tar.bz2 pocl
|
||||
mv pocl.tar.bz2 ./pocl/$OSDIR
|
||||
}
|
||||
|
||||
verilator()
|
||||
{
|
||||
echo "prebuilt verilator..."
|
||||
tar -C $TOOLDIR -cvjf verilator.tar.bz2 verilator
|
||||
mv verilator.tar.bz2 ./verilator/$OSDIR
|
||||
}
|
||||
|
||||
sv2v()
|
||||
{
|
||||
echo "prebuilt sv2v..."
|
||||
tar -C $TOOLDIR -cvjf sv2v.tar.bz2 sv2v
|
||||
mv sv2v.tar.bz2 ./sv2v/$OSDIR
|
||||
}
|
||||
|
||||
yosys()
|
||||
{
|
||||
echo "prebuilt yosys..."
|
||||
tar -C $TOOLDIR -cvjf yosys.tar.bz2 yosys
|
||||
split -b 50M yosys.tar.bz2 "yosys.tar.bz2.part"
|
||||
mv yosys.tar.bz2.part* ./yosys/$OSDIR
|
||||
rm yosys.tar.bz2
|
||||
}
|
||||
|
||||
show_usage()
|
||||
{
|
||||
echo "Setup Pre-built Vortex Toolchain"
|
||||
echo "Usage: $0 [[--riscv] [--llvm-vortex] [--llvm-pocl] [--pocl] [--verilator] [--sv2v] [-yosys] [--all] [-h|--help]]"
|
||||
}
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
--pocl ) pocl
|
||||
;;
|
||||
--verilator ) verilator
|
||||
;;
|
||||
--riscv ) riscv
|
||||
;;
|
||||
--riscv64 ) riscv64
|
||||
;;
|
||||
--llvm-vortex ) llvm-vortex
|
||||
;;
|
||||
--llvm-pocl ) llvm-pocl
|
||||
;;
|
||||
--sv2v ) sv2v
|
||||
;;
|
||||
--yosys ) yosys
|
||||
;;
|
||||
--all ) riscv
|
||||
riscv64
|
||||
llvm-vortex
|
||||
llvm-pocl
|
||||
pocl
|
||||
verilator
|
||||
sv2v
|
||||
yosys
|
||||
;;
|
||||
-h | --help ) show_usage
|
||||
exit
|
||||
;;
|
||||
* ) show_usage
|
||||
exit 1
|
||||
esac
|
||||
shift
|
||||
done
|
167
ci/toolchain_prebuilt.sh.in
Executable file
167
ci/toolchain_prebuilt.sh.in
Executable file
|
@ -0,0 +1,167 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
|
||||
OSVERSION=${OSVERSION:=@OSVERSION@}
|
||||
|
||||
riscv32()
|
||||
{
|
||||
echo "prebuilt riscv32-gnu-toolchain..."
|
||||
tar -C $TOOLDIR -cvjf riscv32-gnu-toolchain.tar.bz2 riscv32-gnu-toolchain
|
||||
split -b 50M riscv32-gnu-toolchain.tar.bz2 "riscv32-gnu-toolchain.tar.bz2.part"
|
||||
mkdir -p ./riscv32-gnu-toolchain/$OSVERSION
|
||||
mv riscv32-gnu-toolchain.tar.bz2.part* ./riscv32-gnu-toolchain/$OSVERSION
|
||||
rm riscv32-gnu-toolchain.tar.bz2
|
||||
}
|
||||
|
||||
riscv64()
|
||||
{
|
||||
echo "prebuilt riscv64-gnu-toolchain..."
|
||||
tar -C $TOOLDIR -cvjf riscv64-gnu-toolchain.tar.bz2 riscv64-gnu-toolchain
|
||||
split -b 50M riscv64-gnu-toolchain.tar.bz2 "riscv64-gnu-toolchain.tar.bz2.part"
|
||||
mkdir -p ./riscv64-gnu-toolchain/$OSVERSION
|
||||
mv riscv64-gnu-toolchain.tar.bz2.part* ./riscv64-gnu-toolchain/$OSVERSION
|
||||
rm riscv64-gnu-toolchain.tar.bz2
|
||||
}
|
||||
|
||||
llvm()
|
||||
{
|
||||
echo "prebuilt llvm-vortex2..."
|
||||
tar -C $TOOLDIR -cvjf llvm-vortex2.tar.bz2 llvm-vortex
|
||||
split -b 50M llvm-vortex2.tar.bz2 "llvm-vortex2.tar.bz2.part"
|
||||
mkdir -p ./llvm-vortex/$OSVERSION
|
||||
mv llvm-vortex2.tar.bz2.part* ./llvm-vortex/$OSVERSION
|
||||
rm llvm-vortex2.tar.bz2
|
||||
}
|
||||
|
||||
libcrt32()
|
||||
{
|
||||
echo "prebuilt libcrt32..."
|
||||
tar -C $TOOLDIR -cvjf libcrt32.tar.bz2 libcrt32
|
||||
mkdir -p ./libcrt32
|
||||
mv libcrt32.tar.bz2 ./libcrt32
|
||||
}
|
||||
|
||||
libcrt64()
|
||||
{
|
||||
echo "prebuilt libcrt64..."
|
||||
tar -C $TOOLDIR -cvjf libcrt64.tar.bz2 libcrt64
|
||||
mkdir -p ./libcrt64
|
||||
mv libcrt64.tar.bz2 ./libcrt64
|
||||
}
|
||||
|
||||
libc32()
|
||||
{
|
||||
echo "prebuilt libc32..."
|
||||
tar -C $TOOLDIR -cvjf libc32.tar.bz2 libc32
|
||||
mkdir -p ./libc32
|
||||
mv libc32.tar.bz2 ./libc32
|
||||
}
|
||||
|
||||
libc64()
|
||||
{
|
||||
echo "prebuilt libc64..."
|
||||
tar -C $TOOLDIR -cvjf libc64.tar.bz2 libc64
|
||||
mkdir -p ./libc64
|
||||
mv libc64.tar.bz2 ./libc64
|
||||
}
|
||||
|
||||
pocl()
|
||||
{
|
||||
echo "prebuilt pocl..."
|
||||
tar -C $TOOLDIR -cvjf pocl2.tar.bz2 pocl
|
||||
mkdir -p ./pocl/$OSVERSION
|
||||
mv pocl2.tar.bz2 ./pocl/$OSVERSION
|
||||
}
|
||||
|
||||
verilator()
|
||||
{
|
||||
echo "prebuilt verilator..."
|
||||
tar -C $TOOLDIR -cvjf verilator.tar.bz2 verilator
|
||||
mkdir -p ./verilator/$OSVERSION
|
||||
mv verilator.tar.bz2 ./verilator/$OSVERSION
|
||||
}
|
||||
|
||||
sv2v()
|
||||
{
|
||||
echo "prebuilt sv2v..."
|
||||
tar -C $TOOLDIR -cvjf sv2v.tar.bz2 sv2v
|
||||
mkdir -p ./sv2v/$OSVERSION
|
||||
mv sv2v.tar.bz2 ./sv2v/$OSVERSION
|
||||
}
|
||||
|
||||
yosys()
|
||||
{
|
||||
echo "prebuilt yosys..."
|
||||
tar -C $TOOLDIR -cvjf yosys.tar.bz2 yosys
|
||||
split -b 50M yosys.tar.bz2 "yosys.tar.bz2.part"
|
||||
mkdir -p ./yosys/$OSVERSION
|
||||
mv yosys.tar.bz2.part* ./yosys/$OSVERSION
|
||||
rm yosys.tar.bz2
|
||||
}
|
||||
|
||||
show_usage()
|
||||
{
|
||||
echo "Setup Pre-built Vortex Toolchain"
|
||||
echo "Usage: $0 [--pocl] [--verilator] [--riscv32] [--riscv64] [--llvm] [--libcrt32] [--libcrt64] [--libc32] [--libc64] [--sv2v] [-yosys] [--all] [-h|--help]"
|
||||
}
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
--pocl ) pocl
|
||||
;;
|
||||
--verilator ) verilator
|
||||
;;
|
||||
--riscv32 ) riscv32
|
||||
;;
|
||||
--riscv64 ) riscv64
|
||||
;;
|
||||
--llvm ) llvm
|
||||
;;
|
||||
--libcrt32 ) libcrt32
|
||||
;;
|
||||
--libcrt64 ) libcrt64
|
||||
;;
|
||||
--libc32 ) libc32
|
||||
;;
|
||||
--libc64 ) libc64
|
||||
;;
|
||||
--sv2v ) sv2v
|
||||
;;
|
||||
--yosys ) yosys
|
||||
;;
|
||||
--all ) pocl
|
||||
verilator
|
||||
riscv32
|
||||
riscv64
|
||||
llvm
|
||||
libcrt32
|
||||
libcrt64
|
||||
libc32
|
||||
libc64
|
||||
sv2v
|
||||
yosys
|
||||
;;
|
||||
-h | --help ) show_usage
|
||||
exit
|
||||
;;
|
||||
* ) show_usage
|
||||
exit 1
|
||||
esac
|
||||
shift
|
||||
done
|
183
ci/trace_csv.py
183
ci/trace_csv.py
|
@ -1,12 +1,12 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,7 +17,10 @@ import sys
|
|||
import argparse
|
||||
import csv
|
||||
import re
|
||||
|
||||
import inspect
|
||||
|
||||
configs = None
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='CPU trace log to CSV format converter.')
|
||||
parser.add_argument('-t', '--type', default='simx', help='log type (rtlsim or simx)')
|
||||
|
@ -25,7 +28,25 @@ def parse_args():
|
|||
parser.add_argument('log', help='Input log file')
|
||||
return parser.parse_args()
|
||||
|
||||
def parse_simx(log_filename):
|
||||
def load_config(filename):
|
||||
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=0x([0-9a-fA-F]+), num_barriers=(\d+)"
|
||||
with open(filename, 'r') as file:
|
||||
for line in file:
|
||||
config_match = re.search(config_pattern, line)
|
||||
if config_match:
|
||||
config = {
|
||||
'num_threads': int(config_match.group(1)),
|
||||
'num_warps': int(config_match.group(2)),
|
||||
'num_cores': int(config_match.group(3)),
|
||||
'num_clusters': int(config_match.group(4)),
|
||||
'socket_size': int(config_match.group(5)),
|
||||
'local_mem_base': int(config_match.group(6), 16),
|
||||
'num_barriers': int(config_match.group(7)),
|
||||
}
|
||||
return config
|
||||
return None
|
||||
|
||||
def parse_simx(log_lines):
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
|
||||
opcode_pattern = r"Instr 0x[0-9a-fA-F]+: ([0-9a-zA-Z_\.]+)"
|
||||
|
@ -36,19 +57,19 @@ def parse_simx(log_filename):
|
|||
destination_pattern = r"Dest Reg: (.+)"
|
||||
uuid_pattern = r"#(\d+)"
|
||||
entries = []
|
||||
with open(log_filename, 'r') as log_file:
|
||||
instr_data = None
|
||||
for lineno, line in enumerate(log_file, start=1):
|
||||
instr_data = None
|
||||
for lineno, line in enumerate(log_lines, start=1):
|
||||
try:
|
||||
if line.startswith("DEBUG Fetch:"):
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
instr_data = {}
|
||||
instr_data = {}
|
||||
instr_data["lineno"] = lineno
|
||||
instr_data["PC"] = re.search(pc_pattern, line).group(1)
|
||||
instr_data["core_id"] = re.search(core_id_pattern, line).group(1)
|
||||
instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1)
|
||||
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
|
||||
instr_data["uuid"] = re.search(uuid_pattern, line).group(1)
|
||||
instr_data["core_id"] = int(re.search(core_id_pattern, line).group(1))
|
||||
instr_data["warp_id"] = int(re.search(warp_id_pattern, line).group(1))
|
||||
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
|
||||
instr_data["uuid"] = int(re.search(uuid_pattern, line).group(1))
|
||||
elif line.startswith("DEBUG Instr"):
|
||||
instr_data["instr"] = re.search(instr_pattern, line).group(1)
|
||||
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
|
||||
|
@ -57,16 +78,19 @@ def parse_simx(log_filename):
|
|||
instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg
|
||||
elif line.startswith("DEBUG Dest"):
|
||||
instr_data["destination"] = re.search(destination_pattern, line).group(1)
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
instr_data = None
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
return entries
|
||||
|
||||
|
||||
def reverse_binary(bin_str):
|
||||
return bin_str[::-1]
|
||||
|
||||
def bin_to_array(bin_str):
|
||||
return [int(bit) for bit in bin_str]
|
||||
|
||||
|
||||
def append_reg(text, value, sep):
|
||||
if sep:
|
||||
text += ", "
|
||||
|
@ -77,14 +101,7 @@ def append_reg(text, value, sep):
|
|||
text += "x" + value
|
||||
sep = True
|
||||
return text, sep
|
||||
|
||||
def append_imm(text, value, sep):
|
||||
if sep:
|
||||
text += ", "
|
||||
text += value
|
||||
sep = True
|
||||
return text, sep
|
||||
|
||||
|
||||
def append_value(text, reg, value, tmask_arr, sep):
|
||||
text, sep = append_reg(text, reg, sep)
|
||||
text += "={"
|
||||
|
@ -97,9 +114,10 @@ def append_value(text, reg, value, tmask_arr, sep):
|
|||
text +="-"
|
||||
text += "}"
|
||||
return text, sep
|
||||
|
||||
def parse_rtlsim(log_filename):
|
||||
line_pattern = r"\d+: core(\d+)-(decode|issue|commit)"
|
||||
|
||||
def parse_rtlsim(log_lines):
|
||||
global configs
|
||||
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
|
||||
ex_pattern = r"ex=([a-zA-Z]+)"
|
||||
|
@ -108,8 +126,6 @@ def parse_rtlsim(log_filename):
|
|||
tmask_pattern = r"tmask=(\d+)"
|
||||
wb_pattern = r"wb=(\d)"
|
||||
opds_pattern = r"opds=(\d+)"
|
||||
use_imm_pattern = r"use_imm=(\d)"
|
||||
imm_pattern = r"imm=(0x[0-9a-fA-F]+)"
|
||||
rd_pattern = r"rd=(\d+)"
|
||||
rs1_pattern = r"rs1=(\d+)"
|
||||
rs2_pattern = r"rs2=(\d+)"
|
||||
|
@ -120,24 +136,29 @@ def parse_rtlsim(log_filename):
|
|||
rd_data_pattern = r"data=\{(.+?)\}"
|
||||
eop_pattern = r"eop=(\d)"
|
||||
uuid_pattern = r"#(\d+)"
|
||||
entries = []
|
||||
with open(log_filename, 'r') as log_file:
|
||||
instr_data = {}
|
||||
for lineno, line in enumerate(log_file, start=1):
|
||||
entries = []
|
||||
instr_data = {}
|
||||
num_cores = configs['num_cores']
|
||||
socket_size = configs['socket_size']
|
||||
num_sockets = (num_cores + socket_size - 1) // socket_size
|
||||
for lineno, line in enumerate(log_lines, start=1):
|
||||
try:
|
||||
line_match = re.search(line_pattern, line)
|
||||
if line_match:
|
||||
PC = re.search(pc_pattern, line).group(1)
|
||||
warp_id = re.search(warp_id_pattern, line).group(1)
|
||||
warp_id = int(re.search(warp_id_pattern, line).group(1))
|
||||
tmask = re.search(tmask_pattern, line).group(1)
|
||||
uuid = re.search(uuid_pattern, line).group(1)
|
||||
core_id = line_match.group(1)
|
||||
stage = line_match.group(2)
|
||||
if stage == "decode":
|
||||
uuid = int(re.search(uuid_pattern, line).group(1))
|
||||
cluster_id = int(line_match.group(1))
|
||||
socket_id = int(line_match.group(2))
|
||||
core_id = int(line_match.group(3))
|
||||
stage = line_match.group(4)
|
||||
if stage == "decode":
|
||||
trace = {}
|
||||
trace["uuid"] = uuid
|
||||
trace["PC"] = PC
|
||||
trace["core_id"] = core_id
|
||||
trace["warp_id"] = warp_id
|
||||
trace["PC"] = PC
|
||||
trace["core_id"] = ((((cluster_id * num_sockets) + socket_id) * socket_size) + core_id)
|
||||
trace["warp_id"] = warp_id
|
||||
trace["tmask"] = reverse_binary(tmask)
|
||||
trace["instr"] = re.search(instr_pattern, line).group(1)
|
||||
trace["opcode"] = re.search(op_pattern, line).group(1)
|
||||
|
@ -146,8 +167,6 @@ def parse_rtlsim(log_filename):
|
|||
trace["rs1"] = re.search(rs1_pattern, line).group(1)
|
||||
trace["rs2"] = re.search(rs2_pattern, line).group(1)
|
||||
trace["rs3"] = re.search(rs3_pattern, line).group(1)
|
||||
trace["use_imm"] = re.search(use_imm_pattern, line).group(1) == "1"
|
||||
trace["imm"] = re.search(imm_pattern, line).group(1)
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "issue":
|
||||
if uuid in instr_data:
|
||||
|
@ -162,7 +181,7 @@ def parse_rtlsim(log_filename):
|
|||
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
|
||||
trace["issued"] = True
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "commit":
|
||||
elif stage == "commit":
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
if "issued" in trace:
|
||||
|
@ -205,41 +224,65 @@ def parse_rtlsim(log_filename):
|
|||
del trace["rs1"]
|
||||
del trace["rs2"]
|
||||
del trace["rs3"]
|
||||
del trace["use_imm"]
|
||||
del trace["imm"]
|
||||
del trace["issued"]
|
||||
del trace["issued"]
|
||||
del instr_data[uuid]
|
||||
entries.append(trace)
|
||||
return entries
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
return entries
|
||||
|
||||
def write_csv(log_filename, csv_filename, log_type):
|
||||
entries = None
|
||||
|
||||
# parse log file
|
||||
if log_type == "rtlsim":
|
||||
entries = parse_rtlsim(log_filename)
|
||||
elif log_type == "simx":
|
||||
entries = parse_simx(log_filename)
|
||||
else:
|
||||
print('Error: invalid log type')
|
||||
sys.exit()
|
||||
|
||||
# sort entries by uuid
|
||||
entries.sort(key=lambda x: (int(x['core_id']), int(x['warp_id']), int(x['lineno'])))
|
||||
for entry in entries:
|
||||
del entry['lineno']
|
||||
|
||||
# write to CSV
|
||||
def write_csv(sublogs, csv_filename, log_type):
|
||||
with open(csv_filename, 'w', newline='') as csv_file:
|
||||
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "operands", "destination"]
|
||||
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"]
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for entry in entries:
|
||||
writer.writerow(entry)
|
||||
|
||||
for sublog in sublogs:
|
||||
entries = None
|
||||
|
||||
# parse sublog
|
||||
if log_type == "rtlsim":
|
||||
entries = parse_rtlsim(sublog)
|
||||
elif log_type == "simx":
|
||||
entries = parse_simx(sublog)
|
||||
else:
|
||||
print('Error: invalid log type')
|
||||
sys.exit()
|
||||
|
||||
# sort entries by uuid
|
||||
entries.sort(key=lambda x: (int(x['uuid'])))
|
||||
for entry in entries:
|
||||
del entry['lineno']
|
||||
|
||||
for entry in entries:
|
||||
writer.writerow(entry)
|
||||
|
||||
def split_log_file(log_filename):
|
||||
with open(log_filename, 'r') as log_file:
|
||||
log_lines = log_file.readlines()
|
||||
|
||||
sublogs = []
|
||||
current_sublog = None
|
||||
|
||||
for line in log_lines:
|
||||
if line.startswith("[VXDRV] START"):
|
||||
if current_sublog is not None:
|
||||
sublogs.append(current_sublog)
|
||||
current_sublog = [line]
|
||||
elif current_sublog is not None:
|
||||
current_sublog.append(line)
|
||||
|
||||
if current_sublog is not None:
|
||||
sublogs.append(current_sublog)
|
||||
|
||||
return sublogs
|
||||
|
||||
def main():
|
||||
global configs
|
||||
args = parse_args()
|
||||
write_csv(args.log, args.csv, args.type)
|
||||
configs = load_config(args.log)
|
||||
sublogs = split_log_file(args.log)
|
||||
write_csv(sublogs, args.csv, args.type)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -18,48 +18,56 @@ import time
|
|||
import threading
|
||||
import subprocess
|
||||
|
||||
# This script executes a long-running command while outputing "still running ..." periodically
|
||||
# This script executes a long-running command while printing "still running ..." periodically
|
||||
# to notify Travis build system that the program has not hanged
|
||||
|
||||
PING_INTERVAL=300 # 5 minutes
|
||||
SLEEP_INTERVAL=1 # 1 second
|
||||
|
||||
def monitor(stop):
|
||||
wait_time = 0
|
||||
while True:
|
||||
time.sleep(PING_INTERVAL)
|
||||
wait_time += PING_INTERVAL
|
||||
print(" + still running (" + str(wait_time) + "s) ...")
|
||||
sys.stdout.flush()
|
||||
if stop():
|
||||
break
|
||||
def monitor(stop_event):
|
||||
wait_time = 0
|
||||
elapsed_time = 0
|
||||
while not stop_event.is_set():
|
||||
time.sleep(SLEEP_INTERVAL)
|
||||
elapsed_time += SLEEP_INTERVAL
|
||||
if elapsed_time >= PING_INTERVAL:
|
||||
wait_time += elapsed_time
|
||||
print(" + still running (" + str(wait_time) + "s) ...")
|
||||
sys.stdout.flush()
|
||||
elapsed_time = 0
|
||||
|
||||
def execute(command):
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE)
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
while True:
|
||||
output = process.stdout.readline()
|
||||
if output:
|
||||
line = output.decode('ascii').rstrip()
|
||||
try:
|
||||
line = output.decode('utf-8').rstrip()
|
||||
except UnicodeDecodeError:
|
||||
line = repr(output) # Safely print raw binary data
|
||||
print(">>> " + line)
|
||||
process.stdout.flush()
|
||||
ret = process.poll()
|
||||
if ret is not None:
|
||||
return ret
|
||||
return ret
|
||||
return -1
|
||||
|
||||
def main(argv):
|
||||
if not argv:
|
||||
print("Usage: travis_run.py <command>")
|
||||
sys.exit(1)
|
||||
|
||||
# start monitoring thread
|
||||
stop_monitor = False
|
||||
t = threading.Thread(target = monitor, args =(lambda : stop_monitor, ))
|
||||
stop_event = threading.Event()
|
||||
t = threading.Thread(target=monitor, args=(stop_event,))
|
||||
t.start()
|
||||
|
||||
# execute command
|
||||
exitcode = execute(argv)
|
||||
exitcode = execute(argv)
|
||||
print(" + exitcode="+str(exitcode))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
# terminate monitoring thread
|
||||
stop_monitor = True
|
||||
stop_event.set()
|
||||
t.join()
|
||||
|
||||
sys.exit(exitcode)
|
||||
|
|
37
config.mk.in
Normal file
37
config.mk.in
Normal file
|
@ -0,0 +1,37 @@
|
|||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
VORTEX_HOME ?= @VORTEX_HOME@
|
||||
|
||||
XLEN ?= @XLEN@
|
||||
|
||||
TOOLDIR ?= @TOOLDIR@
|
||||
|
||||
OSVERSION ?= @OSVERSION@
|
||||
|
||||
INSTALLDIR ?= @INSTALLDIR@
|
||||
|
||||
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
|
||||
|
||||
LIBC_VORTEX ?= $(TOOLDIR)/libc$(XLEN)
|
||||
LIBCRT_VORTEX ?= $(TOOLDIR)/libcrt$(XLEN)
|
||||
|
||||
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv$(XLEN)-gnu-toolchain
|
||||
|
||||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
|
||||
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
|
||||
|
||||
VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime
|
||||
VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel
|
||||
|
||||
THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party
|
174
configure
vendored
Executable file
174
configure
vendored
Executable file
|
@ -0,0 +1,174 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Determine the current working directory
|
||||
CURRENT_DIR=$(pwd)
|
||||
|
||||
# Function to detect current OS
|
||||
detect_osversion() {
|
||||
local osversion="unsupported"
|
||||
if [ -f /etc/os-release ]; then
|
||||
. /etc/os-release # Source the os-release file to get OS information
|
||||
case "$ID" in
|
||||
ubuntu)
|
||||
case "$VERSION_CODENAME" in
|
||||
bionic) osversion="ubuntu/bionic";;
|
||||
focal) osversion="ubuntu/focal";;
|
||||
# Add new versions as needed
|
||||
esac
|
||||
;;
|
||||
centos)
|
||||
case "$VERSION_ID" in
|
||||
7) osversion="centos/7";;
|
||||
# Add new versions as needed
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
echo "$osversion"
|
||||
}
|
||||
|
||||
# Function to recursively copy files, skipping the current directory
|
||||
copy_files() {
|
||||
local source_dir="$1"
|
||||
local target_dir="$2"
|
||||
#echo "source_dir=$source_dir, target_dir=$target_dir"
|
||||
|
||||
local same_dir=0
|
||||
if [ "$(realpath "$source_dir")" == "$(realpath "$target_dir")" ]; then
|
||||
same_dir=1
|
||||
fi
|
||||
|
||||
# Function to copy and update file
|
||||
copy_and_update() {
|
||||
local src_pattern="$1"
|
||||
local dest_dir="$2"
|
||||
for file in $src_pattern; do
|
||||
#echo "*** $file > $dest_dir"
|
||||
if [ -f "$file" ]; then
|
||||
if [[ "$file" == *.in ]]; then
|
||||
filename=$(basename -- "$file")
|
||||
filename_no_ext="${filename%.in}"
|
||||
dest_file="$dest_dir/$filename_no_ext"
|
||||
mkdir -p "$dest_dir"
|
||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g" "$file" > "$dest_file"
|
||||
# apply permissions to bash scripts
|
||||
read -r firstline < "$dest_file"
|
||||
if [[ "$firstline" =~ ^#!.*bash ]]; then
|
||||
chmod +x "$dest_file"
|
||||
fi
|
||||
else
|
||||
if [ $same_dir -eq 0 ]; then
|
||||
mkdir -p "$dest_dir"
|
||||
cp -p "$file" "$dest_dir"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
for pattern in "${SUBDIRS[@]}"; do
|
||||
local full_copy=0
|
||||
if [[ "$pattern" == !* ]]; then
|
||||
full_copy=1
|
||||
pattern=${pattern:1}
|
||||
fi
|
||||
local source_pattern="$source_dir/$pattern"
|
||||
if [[ "$pattern" == "." ]]; then
|
||||
source_pattern=$source_dir
|
||||
fi
|
||||
find "$source_dir" -type d -path "$source_pattern" 2>/dev/null | while read dir; do
|
||||
# Compute the relative path of the directory
|
||||
local rel_path="${dir#$source_dir}"
|
||||
rel_path="${rel_path#/}" # Remove leading slash, if present
|
||||
local full_target_dir="$target_dir/$rel_path"
|
||||
|
||||
# Copy and update Makefile and common.mk if they exist
|
||||
if [ $full_copy -eq 1 ]; then
|
||||
copy_and_update "$dir/*" "$full_target_dir"
|
||||
else
|
||||
copy_and_update "$dir/Makefile" "$full_target_dir"
|
||||
copy_and_update "$dir/common.mk" "$full_target_dir"
|
||||
copy_and_update "$dir/*.in" "$full_target_dir"
|
||||
fi
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
|
||||
# default configuration parameters
|
||||
default_xlen=32
|
||||
default_tooldir=$HOME/tools
|
||||
default_osversion=$(detect_osversion)
|
||||
default_prefix=$CURRENT_DIR
|
||||
|
||||
# load default configuration parameters from existing config.mk
|
||||
if [ -f "config.mk" ]; then
|
||||
while IFS='=' read -r key value; do
|
||||
value=${value//[@]/} # Remove placeholder characters
|
||||
value="${value#"${value%%[![:space:]]*}"}" # Remove leading whitespace
|
||||
value="${value%"${value##*[![:space:]]}"}" # Remove trailing whitespace
|
||||
case $key in
|
||||
XLEN\ ?*) default_xlen=${value//\?=/} ;;
|
||||
TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;;
|
||||
OSVERSION\ ?*) default_osversion=${value//\?=/} ;;
|
||||
PREFIX\ ?*) default_prefix=${value//\?=/} ;;
|
||||
esac
|
||||
done < config.mk
|
||||
fi
|
||||
|
||||
# set configuration parameters
|
||||
XLEN=${XLEN:=$default_xlen}
|
||||
TOOLDIR=${TOOLDIR:=$default_tooldir}
|
||||
OSVERSION=${OSVERSION:=$default_osversion}
|
||||
PREFIX=${PREFIX:=$default_prefix}
|
||||
|
||||
# parse command line arguments
|
||||
usage() {
|
||||
echo "Usage: $0 [--xlen=<value>] [--tooldir=<path>] [--osversion=<version>]"
|
||||
echo " --xlen=<value> Set the XLEN value (default: 32)"
|
||||
echo " --tooldir=<path> Set the TOOLDIR path (default: $HOME/tools)"
|
||||
echo " --osversion=<version> Set the OS Version (default: $(detect_osversion))"
|
||||
echo " --prefix=<path> Set installation directory"
|
||||
exit 1
|
||||
}
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case $1 in
|
||||
--xlen=*) XLEN="${1#*=}" ;;
|
||||
--tooldir=*) TOOLDIR="${1#*=}" ;;
|
||||
--osversion=*) OSVERSION="${1#*=}" ;;
|
||||
--prefix=*) PREFIX="${1#*=}" ;;
|
||||
-h|--help) usage ;;
|
||||
*) echo "Unknown parameter passed: $1"; usage ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# check OS
|
||||
if [ "$OSVERSION" == "unsupported" ]; then
|
||||
echo "Error: Unsupported OS."
|
||||
exit -1
|
||||
fi
|
||||
|
||||
# project subdirectories to build
|
||||
SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*")
|
||||
|
||||
# Get the directory of the script
|
||||
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
|
||||
THIRD_PARTY_DIR=$SCRIPT_DIR/third_party
|
||||
|
||||
copy_files "$SCRIPT_DIR" "$CURRENT_DIR"
|
|
@ -1,4 +1,4 @@
|
|||
# FPGA Startup and Configuration Guide
|
||||
# FPGA Startup and Configuration Guide
|
||||
|
||||
OPAE Environment Setup
|
||||
----------------------
|
||||
|
@ -53,9 +53,9 @@ If the build fails and you need to restart it, clean up the build folder using t
|
|||
|
||||
$ make clean
|
||||
|
||||
The file `vortex_afu.gbs` should exist when the build is done:
|
||||
The bitstream file `vortex_afu.gbs` should exist when the build is done:
|
||||
|
||||
$ ls -lsa <build_dir>/vortex_afu.gbs
|
||||
$ ls -lsa <build_dir>/synth/vortex_afu.gbs
|
||||
|
||||
|
||||
Signing the bitstream and Programming the FPGA
|
||||
|
@ -65,10 +65,15 @@ Signing the bitstream and Programming the FPGA
|
|||
$ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs
|
||||
$ fpgasupdate vortex_afu_unsigned_ssl.gbs
|
||||
|
||||
FPGA sample test running OpenCL sgemm kernel
|
||||
--------------------------------------------
|
||||
Sample FPGA Run Test
|
||||
--------------------
|
||||
|
||||
Run the following from the Vortex root directory
|
||||
Ensure you have the correct opae runtime for the FPGA target
|
||||
|
||||
$ ./ci/blackbox.sh --driver=fpga --app=sgemm --args="-n64"
|
||||
$ make -C runtime/opae clean
|
||||
$ TARGET=FPGA make -C runtime/opae
|
||||
|
||||
Run the following from your Vortex build directory
|
||||
|
||||
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
The Vortex Cache Sub-system has the following main properties:
|
||||
|
||||
- High-bandwidth transfer with Multi-bank parallelism
|
||||
- Non-blocking pipelined architecture with local MSHR
|
||||
- Non-blocking pipelined write-through cache architecture with per-bank MSHR
|
||||
- Configurable design: Dcache, Icache, L2 cache, L3 cache
|
||||
|
||||
### Cache Microarchitecture
|
||||
|
@ -11,16 +11,16 @@ The Vortex Cache Sub-system has the following main properties:
|
|||

|
||||
|
||||
The Vortex cache is comprised of multiple parallel banks. It is comprised of the following modules:
|
||||
- **Bank request dispatch crossbar**: assign a bank to incoming requests and resolve collision using stalls.
|
||||
- **Bank response merge crossbar**: merge result from banks and forward to the core response.
|
||||
- **Memory request multiplexer**: arbitrate bank memory requests
|
||||
- **Memory response demultiplexer**: forward memory response to the corresponding bank.
|
||||
- **Flush Unit**: perform tag memory initialization.
|
||||
- **Bank request dispatch crossbar**: assigns a bank to incoming requests and resolve collision using stalls.
|
||||
- **Bank response merge crossbar**: merges result from banks and forward to the core response.
|
||||
- **Memory request multiplexer**: arbitrates bank memory requests
|
||||
- **Memory response demultiplexer**: forwards memory response to the corresponding bank.
|
||||
- **Flush Unit**: performs tag memory initialization.
|
||||
|
||||
Incoming requests entering the cache are sent to a dispatch crossbar that select the corresponding bank for each request, resolving bank collisions with stalls. The result output of each bank is merge back into outgoing response port via merger crossbar. Each bank intergates a non-blocking pipeline with a local Miss Status Holding Register (MSHR) to reduce the miss rate. The bank pipeline consists of the following stages:
|
||||
|
||||
- **Schedule**: Selects the next request into the pipeline from the incoming core request, memory fill, or the MSHR entry, with priority given to the latter.
|
||||
- **Tag Access**: A single-port read/write access to the tag store.
|
||||
- **Tag Access**: single-port read/write access to the tag store.
|
||||
- **Data Access**: Single-port read/write access to the data store.
|
||||
- **Response Handling**: Core response back to the core.
|
||||
|
||||
|
|
|
@ -3,14 +3,14 @@
|
|||
## Testing changes to the RTL or simulator GPU driver.
|
||||
|
||||
The Blackbox utility script will not pick up your changes if the h/w configuration is the same as during teh last run.
|
||||
To force the utility to build the driver, you need pass the --rebuild=1 option when running tests.
|
||||
To force the utility to build the driver, you need pass the --rebuild=1 option when running tests.
|
||||
Using --rebuild=0 will prevent the rebuild even if the h/w configuration is different from last run.
|
||||
|
||||
$ ./ci/blackbox.sh --driver=simx --app=demo --rebuild=1
|
||||
|
||||
## SimX Debugging
|
||||
|
||||
SimX cycle-approximate simulator allows faster debugging of Vortex kernels' execution.
|
||||
SimX cycle-approximate simulator allows faster debugging of Vortex kernels' execution.
|
||||
The recommended method to enable debugging is to pass the `--debug=<level>` flag to `blackbox` tool when running a program.
|
||||
|
||||
// Running demo program on SimX in debug mode
|
||||
|
@ -61,5 +61,8 @@ We provide a trace sanitizer tool under ./hw/scripts/trace_csv.py that you can u
|
|||
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=3 --log=run_simx.log
|
||||
$ ./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||
|
||||
The first column in the CSV trace is UUID (universal unique identifier) of the instruction and the content is sorted by the UUID. You can use the UUID to trace the same instruction running on either the RTL hw or SimX simulator.
|
||||
$ diff trace_rtlsim.csv trace_simx.csv
|
||||
|
||||
The first column in the CSV trace is UUID (universal unique identifier) of the instruction and the content is sorted by the UUID.
|
||||
You can use the UUID to trace the same instruction running on either the RTL hw or SimX simulator.
|
||||
This can be very effective if you want to use SimX to debugging your RTL hardware by comparing CSV traces.
|
|
@ -1,71 +1,45 @@
|
|||
# Environment Setup# Vortex Dev Environment Setup
|
||||
These instructions apply to the development vortex repo using the *updated toolchain*. The updated toolchain is considered to be any commit of `master` pulled from *July 2, 2023* onwards. The toolchain update in question can be viewed in this [commit](https://github.com/vortexgpgpu/vortex-dev/commit/0048496ba28d7b9a209a0e569d52d60f2b68fc04). Therefore, if you are unsure whether you are using the new toolchain or not, then you should check the `ci` folder for the existence of the `toolchain_prebuilt.sh` script. Furthermore, you should notice that the `toolchain_install.sh` script has the legacy `llvm()` split into `llvm-vortex()` and `llvm-pocl()`.
|
||||
# Environment Setup
|
||||
These instructions apply to the development vortex repo using the updated toolchain. The updated toolchain is considered to be any commit of `master` pulled from July 2, 2023 onwards. The toolchain update in question can be viewed in this [commit](https://github.com/vortexgpgpu/vortex-dev/commit/0048496ba28d7b9a209a0e569d52d60f2b68fc04). Therefore, if you are unsure whether you are using the new toolchain or not, then you should check the `ci` folder for the existence of the `toolchain_prebuilt.sh` script. Furthermore, you should notice that the `toolchain_install.sh` script has the legacy `llvm()` split into `llvm-vortex()` and `llvm-pocl()`.
|
||||
|
||||
> Note: As it stands right now, there a few test suites which are not working due to this toolchain migration. We are working to determine an exact list of which ones are working and which ones are not. For now, if the repo builds at a minimum, then you can consider all these steps to have worked successfully.
|
||||
|
||||
## Choosing an Development Environment
|
||||
There are three primary environments you can use. Each has its own pros and cons. Refer to this section to help you determine which environment best suits your needs.
|
||||
1. Volvo
|
||||
2. Docker
|
||||
3. Local
|
||||
## Set Up on Your Own System
|
||||
The toolchain binaries provided with Vortex are built on Ubuntu-based systems. To install Vortex on your own system, [follow these instructions](install_vortex.md).
|
||||
|
||||
|
||||
## Servers for Georgia Tech Students and Collaborators
|
||||
### Volvo
|
||||
Volvo is a server provided by Georgia Tech. As such, it provides high performance compute, but you need valid credentials to access it. If you don't already have credentials, you can get in contact with your mentor to ask about setting your account up.
|
||||
Volvo is a 64-core server provided by HPArch. You need valid credentials to access it. If you don't already have access, you can get in contact with your mentor to ask about setting your account up.
|
||||
|
||||
Pros:
|
||||
Setup on Volvo:
|
||||
1. Connect to Georgia Tech's VPN or ssh into another machine on campus
|
||||
2. `ssh volvo.cc.gatech.edu`
|
||||
3. Clone Vortex to your home directory: `git clone --recursive https://github.com/vortexgpgpu/vortex.git`
|
||||
4. `source /nethome/software/set_vortex_env.sh` to set up the necessary environment variables.
|
||||
5. `make -s` in the `vortex` root directory
|
||||
6. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood`
|
||||
|
||||
1. Native x86_64 architecture, AMD EPYC 7702P 64-Core Processor (*fast*)
|
||||
2. Packages and difficult configurations are already done for you
|
||||
3. Consistent environment as others, allowing for easier troubleshooting
|
||||
4. Just need to SSH into Volvo, minimal impact on local computer resources
|
||||
5. VScode remote development tools are phenomenal over SSH
|
||||
### Nio
|
||||
Nio is a 20-core desktop server provided by HPArch. If you have access to Volvo, you also have access to Nio.
|
||||
|
||||
Cons:
|
||||
1. Volvo is accessed via gatech vpn, external contributors might encounter issues with it -- especially from other university networks
|
||||
2. Account creation is not immediate and is subject to processing time
|
||||
3. Volvo might have outtages (*pretty uncommon*)
|
||||
5. SSH development requires internet and other remote development tools (*vscode works!*)
|
||||
|
||||
### Docker
|
||||
|
||||
Docker allows for isolated pre-built environments to be created, shared and used. They are much more resource efficient than a Virtual Machine, and have great tooling and support available. The main motivation for Docker is bringing a consistent development environment to your local computer, across all platforms.
|
||||
|
||||
Pros:
|
||||
|
||||
1. If you are native to x86_64, the container will also run natively, yielding better performance. However, if you have aarch64 (arm) processor, you can still run the Docker container without configuration changes.
|
||||
2. Consistent environment as others, allowing for easier troubleshooting
|
||||
3. Works out of the box, just have a working installation of Docker
|
||||
4. Vortex uses a build system, so once you build the repo once, only new code changes need to be recompiled
|
||||
5. Docker offers helpful tools and extensions to monitor the performance of your container
|
||||
|
||||
Cons:
|
||||
|
||||
1. If you are using an arm processor, the container will be run in emulation mode, so it will inherently run slower, as it needs to translate all the x86_64 instructions. It's still usable on Apple Silicon, however.
|
||||
2. Limited to your computer's performance, and Vortex is a large repo to build
|
||||
3. Will utilize a few gigabytes of storage on your computer for saving binaries to run the container
|
||||
Setup on Nio:
|
||||
1. Connect to Georgia Tech's VPN or ssh into another machine on campus
|
||||
2. `ssh nio.cc.gatech.edu`
|
||||
3. Clone Vortex to your home directory: `git clone --recursive https://github.com/vortexgpgpu/vortex.git`
|
||||
4. `source /opt/set_vortex_env_dev.sh` to set up the necessary environment variables.
|
||||
5. `make -s` in the `vortex` root directory
|
||||
6. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood`
|
||||
|
||||
|
||||
### Local
|
||||
You can reverse engineer the Dockerfile and scripts above to get a working environment setup locally. This option is for experienced users, who have already considered the pros and cons of Volvo and Docker.
|
||||
## Docker (Experimental)
|
||||
Docker allows for isolated pre-built environments to be created, shared and used. The emulation mode required for ARM-based processors will incur a decrease in performance. Currently, the dockerfile is not included with the official vortex repository and is not actively maintained or supported.
|
||||
|
||||
## Setup on Volvo
|
||||
1. Clone Repo Recursively: `git clone --recursive https://github.com/vortexgpgpu/vortex-dev.git`
|
||||
2. Source `/opt/set_vortex_env_dev.sh` to initialize pre-installed toolchain
|
||||
3. `make -s` in `vortex-dev` root directory
|
||||
4. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood`
|
||||
|
||||
## Setup with Docker
|
||||
Currently the Dockerfile is not included with the official vortex-dev repository, however you can quickly add it to repo and get started.
|
||||
1. Clone repo recursively onto your local machine: `git clone --recursive https://github.com/vortexgpgpu/vortex-dev.git`
|
||||
2. Download a copy of `Dockerfile.dev` and place it in the root of the repo.
|
||||
3. Build the Dockerfile into an image: `docker build --platform=linux/amd64 -t vortex-dev -f Dockerfile.dev .`
|
||||
4. Run a container based on the image: `docker run --rm -v ./:/root/vortex-dev/ -it --name vtx-dev --privileged=true --platform=linux/amd64 vortex-dev`
|
||||
### Setup with Docker
|
||||
1. Clone repo recursively onto your local machine: `git clone --recursive https://github.com/vortexgpgpu/vortex.git`
|
||||
2. Download the dockerfile from [here](https://github.gatech.edu/gist/usubramanya3/f1bf3e953faa38a6372e1292ffd0b65c) and place it in the root of the repo.
|
||||
3. Build the Dockerfile into an image: `docker build --platform=linux/amd64 -t vortex -f dockerfile .`
|
||||
4. Run a container based on the image: `docker run --rm -v ./:/root/vortex/ -it --name vtx-dev --privileged=true --platform=linux/amd64 vortex`
|
||||
5. Install the toolchain `./ci/toolchain_install.sh --all` (once per container)
|
||||
6. `make -s` in `vortex-dev` root directory
|
||||
6. `make -s` in `vortex` root directory
|
||||
7. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood`
|
||||
|
||||
|
||||
### Additional Docker Commands
|
||||
- Exit from a container (does not stop or remove it)
|
||||
- Resume a container you have exited or start a second terminal session `docker exec -it <container-name> bash`
|
||||
|
||||
You may exit from a container and resume a container you have exited or start a second terminal session `docker exec -it <container-name> bash`
|
||||
|
|
|
@ -7,13 +7,15 @@
|
|||
- [Cache Subsystem](cache_subsystem.md)
|
||||
- [Software](software.md)
|
||||
- [Simulation](simulation.md)
|
||||
- [FPGA Setup Guide](fpga_setup.md)
|
||||
- [Altera FPGA Setup Guide](altera_fpga_guide.md)
|
||||
- [Xilinx FPGA Setup Guide](xilinx_fpga_guide.md)
|
||||
- [Debugging](debugging.md)
|
||||
- [Useful Links](references.md)
|
||||
|
||||
## Installation
|
||||
|
||||
- Refer to the build instructions in [README](../README.md).
|
||||
- For the different environments Vortex supports, [read this document](environment_setup.md).
|
||||
- To install on your own system, [follow this document](install_vortex.md).
|
||||
|
||||
## Quick Start Scenarios
|
||||
|
||||
|
@ -26,6 +28,6 @@ Running Vortex simulators with different configurations:
|
|||
|
||||
$ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo
|
||||
|
||||
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
|
||||
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
|
||||
|
||||
$ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood
|
||||
$ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood
|
||||
|
|
81
docs/install_vortex.md
Normal file
81
docs/install_vortex.md
Normal file
|
@ -0,0 +1,81 @@
|
|||
# Installing and Setting Up the Vortex Environment
|
||||
|
||||
## Ubuntu 18.04, 20.04
|
||||
|
||||
1. Install the following dependencies:
|
||||
|
||||
```
|
||||
sudo apt-get install build-essential zlib1g-dev libtinfo-dev libncurses5 uuid-dev libboost-serialization-dev libpng-dev libhwloc-dev
|
||||
```
|
||||
|
||||
2. Upgrade GCC to 11:
|
||||
|
||||
```
|
||||
sudo apt-get install gcc-11 g++-11
|
||||
```
|
||||
|
||||
Multiple gcc versions on Ubuntu can be managed with update-alternatives, e.g.:
|
||||
|
||||
```
|
||||
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 9
|
||||
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 9
|
||||
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 11
|
||||
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 11
|
||||
```
|
||||
|
||||
3. Download the Vortex codebase:
|
||||
|
||||
```
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
```
|
||||
4. Build Vortex
|
||||
|
||||
```
|
||||
$ cd vortex
|
||||
$ mkdir -p build
|
||||
$ cd build
|
||||
$ ../configure --xlen=32 --tooldir=$HOME/tools
|
||||
$ ./ci/toolchain_install.sh --all
|
||||
$ source ./ci/toolchain_env.sh
|
||||
$ make -s
|
||||
```
|
||||
|
||||
|
||||
## RHEL 8
|
||||
Note: depending on the system, some of the toolchain may need to be recompiled for non-Ubuntu Linux. The source for the tools can be found [here](https://github.com/vortexgpgpu/).
|
||||
|
||||
1. Install the following dependencies:
|
||||
|
||||
```
|
||||
sudo yum install libpng-devel boost boost-devel boost-serialization libuuid-devel opencl-headers hwloc hwloc-devel gmp-devel compat-hwloc1
|
||||
```
|
||||
|
||||
2. Upgrade GCC to 11:
|
||||
|
||||
```
|
||||
sudo yum install gcc-toolset-11
|
||||
```
|
||||
|
||||
Multiple gcc versions on Red Hat can be managed with scl
|
||||
|
||||
3. Install MPFR 4.2.0:
|
||||
|
||||
Download [the source](https://ftp.gnu.org/gnu/mpfr/) and follow [the installation documentation](https://www.mpfr.org/mpfr-current/mpfr.html#How-to-Install).
|
||||
|
||||
4. Download the Vortex codebase:
|
||||
|
||||
```
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
```
|
||||
|
||||
5. Build Vortex
|
||||
|
||||
```
|
||||
$ cd vortex
|
||||
$ mkdir -p build
|
||||
$ cd build
|
||||
$ ../configure --xlen=32 --tooldir=$HOME/tools
|
||||
$ ./ci/toolchain_install.sh --all
|
||||
$ source ./ci/toolchain_env.sh
|
||||
$ make -s
|
||||
```
|
|
@ -20,7 +20,7 @@ Running tests under specific drivers (rtlsim,simx,fpga) is done using the script
|
|||
- *Cores* - used to specify the number of cores (processing element containing multiple warps) within a configuration.
|
||||
- *Warps* - used to specify the number of warps (collection of concurrent hardware threads) within a configuration.
|
||||
- *Threads* - used to specify the number of threads (smallest unit of computation) within a configuration.
|
||||
- *L2cache* - used to enable the shard l2cache among the Vortex cores.
|
||||
- *L2cache* - used to enable the shared l2cache among the Vortex cores.
|
||||
- *L3cache* - used to enable the shared l3cache among the Vortex clusters.
|
||||
- *Driver* - used to specify which driver to run the Vortex simulation (either rtlsim, opae, xrt, simx).
|
||||
- *Debug* - used to enable debug mode for the Vortex simulation.
|
||||
|
|
|
@ -15,7 +15,7 @@ You can execute the same application of a GPU architecture with 2 cores:
|
|||
|
||||
$ ./ci/blackbox.sh --core=2 --driver=simx --app=sgemm --args="-n10"
|
||||
|
||||
When excuting, Blackbox needs to recompile the driver if the desired architecture changes.
|
||||
When excuting, Blackbox needs to recompile the driver if the desired architecture changes.
|
||||
It tracks the latest configuration in a file under the current directory blackbox.<driver>.cache.
|
||||
To avoid having to rebuild the driver all the time, Blackbox checks if the latest cached configuration matches the current.
|
||||
|
||||
|
@ -24,24 +24,29 @@ To avoid having to rebuild the driver all the time, Blackbox checks if the lates
|
|||
The Vortex test suite is located under the /test/ folder
|
||||
You can execute the default regression suite by running the following commands at the root folder.
|
||||
|
||||
$ make -C tests/regression run-simx
|
||||
$ make -C tests/regression run-simx
|
||||
$ make -C tests/regression run-rtlsim
|
||||
|
||||
You can execute the default opncl suite by running the following commands at the root folder.
|
||||
|
||||
$ make -C tests/opencl run-simx
|
||||
$ make -C tests/opencl run-simx
|
||||
$ make -C tests/opencl run-rtlsim
|
||||
|
||||
## Creating Your Own Regression Tests
|
||||
- Inside `test/` you will find a series of folders which are named based on what they test
|
||||
- You can view the tests to see which ones have tests similar to what you are trying to create new tests for
|
||||
- once you have found a similar baseline, you can copy the folder and rename it to what you are planning to test
|
||||
- `testcases.h` contains each of the test case templates
|
||||
- `main.cpp` contains the implementation of each of the test cases and builds a test suite of all the tests cases you want
|
||||
## Creating Your Own Regression Test
|
||||
|
||||
Compile the test case: `make -C tests/regression/<testcase-name>/ clean-all && make -C tests/regression/<testcase-name>/`
|
||||
Inside `tests/regression` you will find a series of folders which are named based on what they test.
|
||||
You can view the tests to see which ones have tests similar to what you are trying to create new tests for.
|
||||
Once you have found a similar baseline, you can copy the folder and rename it to what you are planning to test.
|
||||
A regression test typically implements the following files:
|
||||
- ***kernel.cpp*** contains the GPU kernel code.
|
||||
- ***main.cpp*** contains the host CPU code.
|
||||
- ***Makefile*** defines the compiler build commands for the CPU and GPU binaries.
|
||||
|
||||
Run the test case: `./ci/blackbox.sh --driver=simx --cores=4 --app=<testcase-name> --debug`
|
||||
Sync your build folder: `$ ../configure`
|
||||
|
||||
Compile your test: `$ make -C tests/regression/<test-name>`
|
||||
|
||||
Run your test: `$ ./ci/blackbox.sh --driver=simx --app=<test-name> --debug`
|
||||
|
||||
## Adding Your Tests to the CI Pipeline
|
||||
see `continuous_integration.md`
|
||||
See `continuous_integration.md`
|
36
docs/xilinx_fpga_guide.md
Normal file
36
docs/xilinx_fpga_guide.md
Normal file
|
@ -0,0 +1,36 @@
|
|||
# FPGA Startup and Configuration Guide
|
||||
|
||||
XRT Environment Setup
|
||||
----------------------
|
||||
|
||||
$ source /opt/xilinx/Vitis/2023.1/settings64.sh
|
||||
$ source /opt/xilinx/xrt/setup.sh
|
||||
|
||||
|
||||
Check Installed FPGA Platforms
|
||||
------------------------------
|
||||
|
||||
$ platforminfo -l
|
||||
|
||||
|
||||
Build FPGA image
|
||||
----------------
|
||||
|
||||
$ cd hw/syn/xilinx/xrt
|
||||
$ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make
|
||||
|
||||
Will run the synthesis under new build directory: BUILD_DIR := "\<PREFIX>\_\<PLATFORM>\_\<TARGET>"
|
||||
|
||||
The generated bitstream will be located under <BUILD_DIR>/bin/vortex_afu.xclbin
|
||||
|
||||
Sample FPGA Run Test
|
||||
--------------------
|
||||
|
||||
Ensure you have the correct opae runtime for the FPGA target
|
||||
|
||||
$ make -C runtime/xrt clean
|
||||
$ TARGET=hw make -C runtime/xrt
|
||||
|
||||
Run the following from your Vortex build directory
|
||||
|
||||
$ TARGET=hw FPGA_BIN_DIR=<BUILD_DIR>/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128"
|
2
hw/.gitignore
vendored
2
hw/.gitignore
vendored
|
@ -1,2 +0,0 @@
|
|||
VX_config.h
|
||||
VX_types.h
|
13
hw/Makefile
13
hw/Makefile
|
@ -1,17 +1,22 @@
|
|||
RTL_DIR=./rtl
|
||||
SCRIPT_DIR=./scripts
|
||||
ROOT_DIR := $(realpath ..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
HW_DIR := $(VORTEX_HOME)/hw
|
||||
SCRIPT_DIR := $(HW_DIR)/scripts
|
||||
RTL_DIR := $(HW_DIR)/rtl
|
||||
|
||||
all: config
|
||||
|
||||
config: VX_config.h VX_types.h
|
||||
|
||||
VX_config.h: $(RTL_DIR)/VX_config.vh
|
||||
VX_config.h: $(RTL_DIR)/VX_config.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_config.vh -o VX_config.h
|
||||
|
||||
VX_types.h: $(RTL_DIR)/VX_types.vh
|
||||
VX_types.h: $(RTL_DIR)/VX_types.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_types.vh -o VX_types.h
|
||||
|
||||
clean:
|
||||
$(MAKE) -C unittest clean
|
||||
rm -f VX_config.h VX_types.h
|
||||
|
||||
.PHONY: VX_config.h VX_types.h
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -40,7 +40,7 @@ extern "C" {
|
|||
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result);
|
||||
|
||||
|
||||
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result);
|
||||
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
|
||||
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
|
||||
|
@ -54,15 +54,15 @@ extern "C" {
|
|||
}
|
||||
|
||||
inline uint64_t nan_box(uint32_t value) {
|
||||
#ifdef FPU_RV64F
|
||||
#ifdef XLEN_64
|
||||
return value | 0xffffffff00000000;
|
||||
#else
|
||||
#else
|
||||
return value;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool is_nan_boxed(uint64_t value) {
|
||||
#ifdef FPU_RV64F
|
||||
#ifdef XLEN_64
|
||||
return (uint32_t(value >> 32) == 0xffffffff);
|
||||
#else
|
||||
__unused (value);
|
||||
|
@ -70,15 +70,14 @@ inline bool is_nan_boxed(uint64_t value) {
|
|||
#endif
|
||||
}
|
||||
|
||||
inline int64_t check_boxing(int64_t a) {
|
||||
if (!is_nan_boxed(a)) {
|
||||
return nan_box(0x7fc00000); // NaN
|
||||
}
|
||||
return a;
|
||||
inline int64_t check_boxing(int64_t a) {
|
||||
if (is_nan_boxed(a))
|
||||
return a;
|
||||
return nan_box(0x7fc00000); // NaN
|
||||
}
|
||||
|
||||
void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fadd_d(a, b, (*frm & 0x7), fflags);
|
||||
|
@ -88,7 +87,7 @@ void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal*
|
|||
}
|
||||
|
||||
void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsub_d(a, b, (*frm & 0x7), fflags);
|
||||
|
@ -98,19 +97,19 @@ void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal*
|
|||
}
|
||||
|
||||
void dpi_fmul(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmul_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmadd_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
|
@ -118,9 +117,9 @@ void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
|
|||
}
|
||||
|
||||
void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmsub_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
|
@ -128,9 +127,9 @@ void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
|
|||
}
|
||||
|
||||
void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (dst_fmt) {
|
||||
*result = rv_fnmadd_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fnmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
|
@ -138,9 +137,9 @@ void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
|
|||
}
|
||||
|
||||
void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (dst_fmt) {
|
||||
*result = rv_fnmsub_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fnmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
|
@ -148,36 +147,36 @@ void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
|
|||
}
|
||||
|
||||
void dpi_fdiv(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fdiv_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsqrt(bool enable, int dst_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fsqrt_s(check_boxing(a), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ftol_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_ftol_s(check_boxing(a), (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = sext<uint64_t>(rv_ftoi_d(a, (*frm & 0x7), fflags), 32);
|
||||
} else {
|
||||
*result = sext<uint64_t>(rv_ftoi_s(check_boxing(a), (*frm & 0x7), fflags), 32);
|
||||
|
@ -186,61 +185,61 @@ void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVa
|
|||
}
|
||||
|
||||
void dpi_ftou(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ftolu_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_ftolu_s(check_boxing(a), (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = sext<uint64_t>(rv_ftou_d(a, (*frm & 0x7), fflags), 32);
|
||||
} else {
|
||||
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
|
||||
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ltof_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
} else {
|
||||
*result = rv_itof_d(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
|
||||
} else {
|
||||
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
|
||||
if (src_fmt) {
|
||||
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
|
||||
} else {
|
||||
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_lutof_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
} else {
|
||||
*result = rv_utof_d(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = nan_box(rv_lutof_s(a, (*frm & 0x7), fflags));
|
||||
} else {
|
||||
} else {
|
||||
*result = nan_box(rv_utof_s(a, (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_ftod((int32_t)check_boxing(a));
|
||||
|
@ -250,90 +249,90 @@ void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
|
|||
}
|
||||
|
||||
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fclss_d(a);
|
||||
} else {
|
||||
*result = rv_fclss_s(check_boxing(a));
|
||||
if (dst_fmt) {
|
||||
*result = rv_fclss_d(a);
|
||||
} else {
|
||||
*result = rv_fclss_s(check_boxing(a));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnj_d(a, b);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnj_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnj_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjn_d(a, b);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjn_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnjn_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnjx(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjx_d(a, b);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjx_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnjx_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_flt(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_flt_d(a, b, fflags);
|
||||
*result = rv_flt_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_flt_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fle(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fle_d(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fle_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_fle_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_feq(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_feq_d(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_feq_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_feq_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmin(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmin_d(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmin_d(a, b, fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmin_s(check_boxing(a), check_boxing(b), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmax(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmax_d(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmax_d(a, b, fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmax_s(check_boxing(a), check_boxing(b), fflags));
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,8 +14,6 @@
|
|||
`ifndef FLOAT_DPI_VH
|
||||
`define FLOAT_DPI_VH
|
||||
|
||||
`include "VX_config.vh"
|
||||
|
||||
import "DPI-C" function void dpi_fadd(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fsub(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmul(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,8 +21,6 @@
|
|||
#include "svdpi.h"
|
||||
#include "verilated_vpi.h"
|
||||
|
||||
#include "uuid_gen.h"
|
||||
|
||||
#ifdef XLEN_64
|
||||
#define iword_t int64_t
|
||||
#define uword_t uint64_t
|
||||
|
@ -50,7 +48,7 @@ extern "C" {
|
|||
void dpi_trace_start();
|
||||
void dpi_trace_stop();
|
||||
|
||||
uint64_t dpi_uuid_gen(bool reset, int wid, uint64_t PC);
|
||||
uint64_t dpi_uuid_gen(bool reset, int wid);
|
||||
}
|
||||
|
||||
bool sim_trace_enabled();
|
||||
|
@ -70,7 +68,7 @@ public:
|
|||
|
||||
void push(int value, bool enable) {
|
||||
if (!enable)
|
||||
return;
|
||||
return;
|
||||
for (unsigned i = 0; i < depth_-1; ++i) {
|
||||
buffer_[i] = buffer_[i+1];
|
||||
}
|
||||
|
@ -85,7 +83,7 @@ private:
|
|||
|
||||
std::vector<int> buffer_;
|
||||
bool init_;
|
||||
unsigned depth_;
|
||||
unsigned depth_;
|
||||
};
|
||||
|
||||
class Instances {
|
||||
|
@ -95,9 +93,9 @@ public:
|
|||
}
|
||||
|
||||
int allocate() {
|
||||
mutex_.lock();
|
||||
mutex_.lock();
|
||||
int inst = instances_.size();
|
||||
instances_.resize(inst + 1);
|
||||
instances_.resize(inst + 1);
|
||||
mutex_.unlock();
|
||||
return inst;
|
||||
}
|
||||
|
@ -135,7 +133,7 @@ void dpi_imul(bool enable, bool is_signed_a, bool is_signed_b, iword_t a, iword_
|
|||
udword_t second = *(uword_t*)&b;
|
||||
|
||||
udword_t mask = udword_t(-1) << (8 * sizeof(iword_t));
|
||||
|
||||
|
||||
if (is_signed_a && a < 0) {
|
||||
first |= mask;
|
||||
}
|
||||
|
@ -171,11 +169,11 @@ void dpi_idiv(bool enable, bool is_signed, iword_t a, iword_t b, iword_t* quotie
|
|||
} else if (dividen == inf_neg && divisor == -1) {
|
||||
*remainder = 0;
|
||||
*quotient = dividen;
|
||||
} else {
|
||||
} else {
|
||||
*quotient = (iword_t)dividen / (iword_t)divisor;
|
||||
*remainder = (iword_t)dividen % (iword_t)divisor;
|
||||
*remainder = (iword_t)dividen % (iword_t)divisor;
|
||||
}
|
||||
} else {
|
||||
} else {
|
||||
if (b == 0) {
|
||||
*quotient = -1;
|
||||
*remainder = dividen;
|
||||
|
@ -188,45 +186,35 @@ void dpi_idiv(bool enable, bool is_signed, iword_t a, iword_t b, iword_t* quotie
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void dpi_trace(int level, const char* format, ...) {
|
||||
void dpi_trace(int level, const char* format, ...) {
|
||||
if (level > DEBUG_LEVEL)
|
||||
return;
|
||||
if (!sim_trace_enabled())
|
||||
return;
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
va_start(va, format);
|
||||
vprintf(format, va);
|
||||
va_end(va);
|
||||
va_end(va);
|
||||
}
|
||||
|
||||
void dpi_trace_start() {
|
||||
void dpi_trace_start() {
|
||||
sim_trace_enable(true);
|
||||
}
|
||||
|
||||
void dpi_trace_stop() {
|
||||
void dpi_trace_stop() {
|
||||
sim_trace_enable(false);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
std::unordered_map<uint32_t, std::shared_ptr<vortex::UUIDGenerator>> g_uuid_gens;
|
||||
std::unordered_map<uint32_t, uint32_t> g_uuid_gens;
|
||||
|
||||
uint64_t dpi_uuid_gen(bool reset, int wid, uint64_t PC) {
|
||||
uint64_t dpi_uuid_gen(bool reset, int wid) {
|
||||
if (reset) {
|
||||
g_uuid_gens.clear();
|
||||
return 0;
|
||||
}
|
||||
std::shared_ptr<vortex::UUIDGenerator> uuid_gen;
|
||||
auto it = g_uuid_gens.find(wid);
|
||||
if (it == g_uuid_gens.end()) {
|
||||
uuid_gen = std::make_shared<vortex::UUIDGenerator>();
|
||||
g_uuid_gens.emplace(wid, uuid_gen);
|
||||
} else {
|
||||
uuid_gen = it->second;
|
||||
}
|
||||
uint32_t instr_uuid = uuid_gen->get_uuid(PC);
|
||||
uint32_t instr_id = instr_uuid & 0xffff;
|
||||
uint32_t instr_ref = instr_uuid >> 16;
|
||||
uint64_t uuid = (uint64_t(instr_ref) << 32) | (wid << 16) | instr_id;
|
||||
uint32_t instr_uuid = g_uuid_gens[wid]++;
|
||||
uint64_t uuid = (uint64_t(wid) << 32) | instr_uuid;
|
||||
return uuid;
|
||||
}
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,8 +14,6 @@
|
|||
`ifndef UTIL_DPI_VH
|
||||
`define UTIL_DPI_VH
|
||||
|
||||
`include "VX_config.vh"
|
||||
|
||||
`ifdef XLEN_64
|
||||
`define INT_TYPE longint
|
||||
`else
|
||||
|
@ -32,6 +30,6 @@ import "DPI-C" function void dpi_trace(input int level, input string format /*ve
|
|||
import "DPI-C" function void dpi_trace_start();
|
||||
import "DPI-C" function void dpi_trace_stop();
|
||||
|
||||
import "DPI-C" function longint dpi_uuid_gen(input logic reset, input int wid, input longint PC);
|
||||
import "DPI-C" function longint dpi_uuid_gen(input logic reset, input int wid);
|
||||
|
||||
`endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,8 +14,9 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_cluster import VX_gpu_pkg::*; #(
|
||||
parameter CLUSTER_ID = 0
|
||||
) (
|
||||
parameter CLUSTER_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
// Clock
|
||||
|
@ -32,27 +33,23 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
// Memory
|
||||
VX_mem_bus_if.master mem_bus_if,
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak,
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_socket = 0;
|
||||
`SCOPE_IO_SWITCH (scope_socket + `NUM_SOCKETS);
|
||||
`endif
|
||||
`SCOPE_IO_SWITCH (`NUM_SOCKETS);
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_tmp_if();
|
||||
VX_mem_perf_if mem_perf_tmp_if();
|
||||
assign mem_perf_tmp_if.icache = 'x;
|
||||
assign mem_perf_tmp_if.dcache = 'x;
|
||||
assign mem_perf_tmp_if.l3cache = mem_perf_if.l3cache;
|
||||
assign mem_perf_tmp_if.smem = 'x;
|
||||
assign mem_perf_tmp_if.lmem = 'x;
|
||||
assign mem_perf_tmp_if.mem = mem_perf_if.mem;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
|
||||
|
@ -63,7 +60,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_gbar_arb #(
|
||||
.NUM_REQS (`NUM_SOCKETS),
|
||||
.OUT_REG ((`NUM_SOCKETS > 2) ? 1 : 0) // bgar_unit has no backpressure
|
||||
.OUT_BUF ((`NUM_SOCKETS > 2) ? 1 : 0) // bgar_unit has no backpressure
|
||||
) gbar_arb (
|
||||
.clk (clk),
|
||||
.reset (gbar_reset),
|
||||
|
@ -89,7 +86,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (l2_reset, reset);
|
||||
|
||||
VX_cache_wrap #(
|
||||
.INSTANCE_ID ("l2cache"),
|
||||
.INSTANCE_ID ($sformatf("%s-l2cache", INSTANCE_ID)),
|
||||
.CACHE_SIZE (`L2_CACHE_SIZE),
|
||||
.LINE_SIZE (`L2_LINE_SIZE),
|
||||
.NUM_BANKS (`L2_NUM_BANKS),
|
||||
|
@ -99,12 +96,14 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
.CRSQ_SIZE (`L2_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L2_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L2_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L2_MREQ_SIZE),
|
||||
.MREQ_SIZE (`L2_WRITEBACK ? `L2_MSHR_SIZE : `L2_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_REG (2),
|
||||
.MEM_OUT_REG (2),
|
||||
.WRITEBACK (`L2_WRITEBACK),
|
||||
.DIRTY_BYTES (`L2_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
.NC_ENABLE (1),
|
||||
.PASSTHRU (!`L2_ENABLED)
|
||||
) l2cache (
|
||||
|
@ -119,13 +118,6 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`NUM_SOCKETS-1:0] per_socket_sim_ebreak;
|
||||
wire [`NUM_SOCKETS-1:0][`NUM_REGS-1:0][`XLEN-1:0] per_socket_sim_wb_value;
|
||||
assign sim_ebreak = per_socket_sim_ebreak[0];
|
||||
assign sim_wb_value = per_socket_sim_wb_value[0];
|
||||
`UNUSED_VAR (per_socket_sim_ebreak)
|
||||
`UNUSED_VAR (per_socket_sim_wb_value)
|
||||
|
||||
VX_dcr_bus_if socket_dcr_bus_tmp_if();
|
||||
assign socket_dcr_bus_tmp_if.write_valid = dcr_bus_if.write_valid && (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END);
|
||||
assign socket_dcr_bus_tmp_if.write_addr = dcr_bus_if.write_addr;
|
||||
|
@ -133,17 +125,19 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [`NUM_SOCKETS-1:0] per_socket_busy;
|
||||
|
||||
VX_dcr_bus_if socket_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1));
|
||||
|
||||
// Generate all sockets
|
||||
for (genvar i = 0; i < `NUM_SOCKETS; ++i) begin
|
||||
for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : sockets
|
||||
|
||||
`RESET_RELAY (socket_reset, reset);
|
||||
|
||||
VX_socket #(
|
||||
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + i)
|
||||
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id),
|
||||
.INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id))
|
||||
) socket (
|
||||
`SCOPE_IO_BIND (scope_socket+i)
|
||||
`SCOPE_IO_BIND (scope_socket+socket_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (socket_reset),
|
||||
|
@ -151,18 +145,16 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_tmp_if),
|
||||
`endif
|
||||
|
||||
|
||||
.dcr_bus_if (socket_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_socket_mem_bus_if[i]),
|
||||
.mem_bus_if (per_socket_mem_bus_if[socket_id]),
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (per_socket_gbar_bus_if[i]),
|
||||
.gbar_bus_if (per_socket_gbar_bus_if[socket_id]),
|
||||
`endif
|
||||
|
||||
.sim_ebreak (per_socket_sim_ebreak[i]),
|
||||
.sim_wb_value (per_socket_sim_wb_value[i]),
|
||||
.busy (per_socket_busy[i])
|
||||
.busy (per_socket_busy[socket_id])
|
||||
);
|
||||
end
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -40,6 +40,18 @@
|
|||
`define EXT_F_ENABLE
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
`ifndef FPU_DSP
|
||||
`ifndef EXT_D_DISABLE
|
||||
`define EXT_D_ENABLE
|
||||
`endif
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef EXT_ZICOND_DISABLE
|
||||
`define EXT_ZICOND_ENABLE
|
||||
`endif
|
||||
|
||||
`ifndef XLEN_32
|
||||
`ifndef XLEN_64
|
||||
`define XLEN_32
|
||||
|
@ -91,13 +103,12 @@
|
|||
`endif
|
||||
|
||||
`ifndef NUM_BARRIERS
|
||||
`define NUM_BARRIERS 4
|
||||
`define NUM_BARRIERS `UP(`NUM_WARPS/2)
|
||||
`endif
|
||||
|
||||
`ifndef SOCKET_SIZE
|
||||
`define SOCKET_SIZE `MIN(4, `NUM_CORES)
|
||||
`endif
|
||||
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
|
||||
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_ENABLED 1
|
||||
|
@ -129,66 +140,88 @@
|
|||
`endif
|
||||
|
||||
`ifndef L1_LINE_SIZE
|
||||
`ifdef L1_DISABLE
|
||||
`define L1_LINE_SIZE ((`L2_ENABLED || `L3_ENABLED) ? 4 : `MEM_BLOCK_SIZE)
|
||||
`else
|
||||
`define L1_LINE_SIZE ((`L2_ENABLED || `L3_ENABLED) ? 16 : `MEM_BLOCK_SIZE)
|
||||
`define L1_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
`ifndef L2_LINE_SIZE
|
||||
`define L2_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
`ifndef L3_LINE_SIZE
|
||||
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
`define STARTUP_ADDR 64'h180000000
|
||||
`ifndef STACK_BASE_ADDR
|
||||
`define STACK_BASE_ADDR 64'h1FFFF0000
|
||||
`endif
|
||||
|
||||
`ifndef STACK_BASE_ADDR
|
||||
`define STACK_BASE_ADDR 64'h1FF000000
|
||||
`ifndef STARTUP_ADDR
|
||||
`define STARTUP_ADDR 64'h080000000
|
||||
`endif
|
||||
|
||||
`ifndef USER_BASE_ADDR
|
||||
`define USER_BASE_ADDR 64'h000010000
|
||||
`endif
|
||||
|
||||
`ifndef IO_BASE_ADDR
|
||||
`define IO_BASE_ADDR 64'h000000040
|
||||
`endif
|
||||
|
||||
`else
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
`define STARTUP_ADDR 32'h80000000
|
||||
`endif
|
||||
|
||||
`ifndef STACK_BASE_ADDR
|
||||
`define STACK_BASE_ADDR 32'hFF000000
|
||||
`define STACK_BASE_ADDR 32'hFFFF0000
|
||||
`endif
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
`define STARTUP_ADDR 32'h80000000
|
||||
`endif
|
||||
|
||||
`ifndef SMEM_BASE_ADDR
|
||||
`define SMEM_BASE_ADDR `STACK_BASE_ADDR
|
||||
`endif
|
||||
|
||||
`ifndef SMEM_LOG_SIZE
|
||||
`define SMEM_LOG_SIZE 14
|
||||
`ifndef USER_BASE_ADDR
|
||||
`define USER_BASE_ADDR 32'h00010000
|
||||
`endif
|
||||
|
||||
`ifndef IO_BASE_ADDR
|
||||
`define IO_BASE_ADDR (`SMEM_BASE_ADDR + (1 << `SMEM_LOG_SIZE))
|
||||
`define IO_BASE_ADDR 32'h00000040
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
||||
`define IO_END_ADDR `USER_BASE_ADDR
|
||||
|
||||
`ifndef LMEM_LOG_SIZE
|
||||
`define LMEM_LOG_SIZE 14
|
||||
`endif
|
||||
|
||||
`ifndef LMEM_BASE_ADDR
|
||||
`define LMEM_BASE_ADDR `STACK_BASE_ADDR
|
||||
`endif
|
||||
|
||||
`ifndef IO_COUT_ADDR
|
||||
`define IO_COUT_ADDR `IO_BASE_ADDR
|
||||
`define IO_COUT_ADDR `IO_BASE_ADDR
|
||||
`endif
|
||||
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
|
||||
`define IO_COUT_SIZE 64
|
||||
|
||||
`ifndef IO_CSR_ADDR
|
||||
`define IO_CSR_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE)
|
||||
`ifndef IO_MPM_ADDR
|
||||
`define IO_MPM_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE)
|
||||
`endif
|
||||
`define IO_CSR_SIZE (4 * 64 * `NUM_CORES * `NUM_CLUSTERS)
|
||||
`define IO_MPM_SIZE (8 * 32 * `NUM_CORES * `NUM_CLUSTERS)
|
||||
|
||||
`ifndef STACK_LOG2_SIZE
|
||||
`define STACK_LOG2_SIZE 13
|
||||
`endif
|
||||
`define STACK_SIZE (1 << `STACK_LOG2_SIZE)
|
||||
`define STACK_SIZE (1 << `STACK_LOG2_SIZE)
|
||||
|
||||
`define RESET_DELAY 8
|
||||
|
||||
`ifndef STALL_TIMEOUT
|
||||
`define STALL_TIMEOUT (100000 * (1 ** (`L2_ENABLED + `L3_ENABLED)))
|
||||
`define STALL_TIMEOUT (100000 * (1 ** (`L2_ENABLED + `L3_ENABLED)))
|
||||
`endif
|
||||
|
||||
`ifndef SV_DPI
|
||||
`define DPI_DISABLE
|
||||
`endif
|
||||
|
||||
`ifndef FPU_FPNEW
|
||||
|
@ -222,7 +255,7 @@
|
|||
|
||||
// Issue width
|
||||
`ifndef ISSUE_WIDTH
|
||||
`define ISSUE_WIDTH `MIN(`NUM_WARPS, 4)
|
||||
`define ISSUE_WIDTH `UP(`NUM_WARPS / 8)
|
||||
`endif
|
||||
|
||||
// Number of ALU units
|
||||
|
@ -243,32 +276,38 @@
|
|||
|
||||
// Number of LSU units
|
||||
`ifndef NUM_LSU_LANES
|
||||
`define NUM_LSU_LANES `MIN(`NUM_THREADS, 4)
|
||||
`define NUM_LSU_LANES `NUM_THREADS
|
||||
`endif
|
||||
`ifndef NUM_LSU_BLOCKS
|
||||
`define NUM_LSU_BLOCKS 1
|
||||
`endif
|
||||
|
||||
// Number of SFU units
|
||||
`ifndef NUM_SFU_LANES
|
||||
`define NUM_SFU_LANES `MIN(`NUM_THREADS, 4)
|
||||
`define NUM_SFU_LANES `NUM_THREADS
|
||||
`endif
|
||||
`ifndef NUM_SFU_BLOCKS
|
||||
`define NUM_SFU_BLOCKS 1
|
||||
`endif
|
||||
|
||||
// Size of Instruction Buffer
|
||||
`ifndef IBUF_SIZE
|
||||
`define IBUF_SIZE (2 * (`NUM_WARPS / `ISSUE_WIDTH))
|
||||
`define IBUF_SIZE 4
|
||||
`endif
|
||||
|
||||
// Size of LSU Request Queue
|
||||
`ifndef LSUQ_SIZE
|
||||
`define LSUQ_SIZE (2 * (`NUM_THREADS / `NUM_LSU_LANES))
|
||||
// LSU line size
|
||||
`ifndef LSU_LINE_SIZE
|
||||
`define LSU_LINE_SIZE `MIN(`NUM_LSU_LANES * (`XLEN / 8), `L1_LINE_SIZE)
|
||||
`endif
|
||||
|
||||
// LSU Duplicate Address Check
|
||||
`ifndef LSU_DUP_DISABLE
|
||||
`define LSU_DUP_ENABLE
|
||||
// Size of LSU Core Request Queue
|
||||
`ifndef LSUQ_IN_SIZE
|
||||
`define LSUQ_IN_SIZE (2 * (`NUM_THREADS / `NUM_LSU_LANES))
|
||||
`endif
|
||||
`ifdef LSU_DUP_ENABLE
|
||||
`define LSU_DUP_ENABLED 1
|
||||
`else
|
||||
`define LSU_DUP_ENABLED 0
|
||||
|
||||
// Size of LSU Memory Request Queue
|
||||
`ifndef LSUQ_OUT_SIZE
|
||||
`define LSUQ_OUT_SIZE `MAX(`LSUQ_IN_SIZE, `LSU_LINE_SIZE / (`XLEN / 8))
|
||||
`endif
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
|
@ -304,20 +343,20 @@
|
|||
// FMA Latency
|
||||
`ifndef LATENCY_FMA
|
||||
`ifdef FPU_DPI
|
||||
`define LATENCY_FMA 4
|
||||
`define LATENCY_FMA 4
|
||||
`endif
|
||||
`ifdef FPU_FPNEW
|
||||
`define LATENCY_FMA 4
|
||||
`define LATENCY_FMA 4
|
||||
`endif
|
||||
`ifdef FPU_DSP
|
||||
`ifdef QUARTUS
|
||||
`define LATENCY_FMA 4
|
||||
`endif
|
||||
`ifdef VIVADO
|
||||
`define LATENCY_FMA 16
|
||||
`define LATENCY_FMA 16
|
||||
`endif
|
||||
`ifndef LATENCY_FMA
|
||||
`define LATENCY_FMA 4
|
||||
`define LATENCY_FMA 4
|
||||
`endif
|
||||
`endif
|
||||
`endif
|
||||
|
@ -325,17 +364,17 @@
|
|||
// FDIV Latency
|
||||
`ifndef LATENCY_FDIV
|
||||
`ifdef FPU_DPI
|
||||
`define LATENCY_FDIV 15
|
||||
`define LATENCY_FDIV 15
|
||||
`endif
|
||||
`ifdef FPU_FPNEW
|
||||
`define LATENCY_FDIV 16
|
||||
`define LATENCY_FDIV 16
|
||||
`endif
|
||||
`ifdef FPU_DSP
|
||||
`ifdef QUARTUS
|
||||
`define LATENCY_FDIV 15
|
||||
`endif
|
||||
`ifdef VIVADO
|
||||
`define LATENCY_FDIV 28
|
||||
`define LATENCY_FDIV 28
|
||||
`endif
|
||||
`ifndef LATENCY_FDIV
|
||||
`define LATENCY_FDIV 16
|
||||
|
@ -346,20 +385,20 @@
|
|||
// FSQRT Latency
|
||||
`ifndef LATENCY_FSQRT
|
||||
`ifdef FPU_DPI
|
||||
`define LATENCY_FSQRT 10
|
||||
`define LATENCY_FSQRT 10
|
||||
`endif
|
||||
`ifdef FPU_FPNEW
|
||||
`define LATENCY_FSQRT 16
|
||||
`define LATENCY_FSQRT 16
|
||||
`endif
|
||||
`ifdef FPU_DSP
|
||||
`ifdef QUARTUS
|
||||
`define LATENCY_FSQRT 10
|
||||
`endif
|
||||
`ifdef VIVADO
|
||||
`define LATENCY_FSQRT 28
|
||||
`define LATENCY_FSQRT 28
|
||||
`endif
|
||||
`ifndef LATENCY_FSQRT
|
||||
`define LATENCY_FSQRT 16
|
||||
`define LATENCY_FSQRT 16
|
||||
`endif
|
||||
`endif
|
||||
`endif
|
||||
|
@ -369,6 +408,31 @@
|
|||
`define LATENCY_FCVT 5
|
||||
`endif
|
||||
|
||||
// FMA Bandwidth ratio
|
||||
`ifndef FMA_PE_RATIO
|
||||
`define FMA_PE_RATIO 1
|
||||
`endif
|
||||
|
||||
// FDIV Bandwidth ratio
|
||||
`ifndef FDIV_PE_RATIO
|
||||
`define FDIV_PE_RATIO 8
|
||||
`endif
|
||||
|
||||
// FSQRT Bandwidth ratio
|
||||
`ifndef FSQRT_PE_RATIO
|
||||
`define FSQRT_PE_RATIO 8
|
||||
`endif
|
||||
|
||||
// FCVT Bandwidth ratio
|
||||
`ifndef FCVT_PE_RATIO
|
||||
`define FCVT_PE_RATIO 8
|
||||
`endif
|
||||
|
||||
// FNCP Bandwidth ratio
|
||||
`ifndef FNCP_PE_RATIO
|
||||
`define FNCP_PE_RATIO 2
|
||||
`endif
|
||||
|
||||
// Icache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Cache Enable
|
||||
|
@ -471,22 +535,27 @@
|
|||
`define DCACHE_NUM_WAYS 1
|
||||
`endif
|
||||
|
||||
// SM Configurable Knobs //////////////////////////////////////////////////////
|
||||
|
||||
`ifndef SM_DISABLE
|
||||
`define SM_ENABLE
|
||||
// Enable Cache Writeback
|
||||
`ifndef DCACHE_WRITEBACK
|
||||
`define DCACHE_WRITEBACK 0
|
||||
`endif
|
||||
|
||||
`ifdef SM_ENABLE
|
||||
`define SM_ENABLED 1
|
||||
// LMEM Configurable Knobs ////////////////////////////////////////////////////
|
||||
|
||||
`ifndef LMEM_DISABLE
|
||||
`define LMEM_ENABLE
|
||||
`endif
|
||||
|
||||
`ifdef LMEM_ENABLE
|
||||
`define LMEM_ENABLED 1
|
||||
`else
|
||||
`define SM_ENABLED 0
|
||||
`define SMEM_NUM_BANKS 1
|
||||
`define LMEM_ENABLED 0
|
||||
`define LMEM_NUM_BANKS 1
|
||||
`endif
|
||||
|
||||
// Number of Banks
|
||||
`ifndef SMEM_NUM_BANKS
|
||||
`define SMEM_NUM_BANKS (`NUM_LSU_LANES)
|
||||
`ifndef LMEM_NUM_BANKS
|
||||
`define LMEM_NUM_BANKS `NUM_LSU_LANES
|
||||
`endif
|
||||
|
||||
// L2cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
@ -530,6 +599,11 @@
|
|||
`define L2_NUM_WAYS 2
|
||||
`endif
|
||||
|
||||
// Enable Cache Writeback
|
||||
`ifndef L2_WRITEBACK
|
||||
`define L2_WRITEBACK 0
|
||||
`endif
|
||||
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Cache Size
|
||||
|
@ -571,6 +645,11 @@
|
|||
`define L3_NUM_WAYS 4
|
||||
`endif
|
||||
|
||||
// Enable Cache Writeback
|
||||
`ifndef L3_WRITEBACK
|
||||
`define L3_WRITEBACK 0
|
||||
`endif
|
||||
|
||||
// ISA Extensions /////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef EXT_A_ENABLE
|
||||
|
@ -603,6 +682,12 @@
|
|||
`define EXT_M_ENABLED 0
|
||||
`endif
|
||||
|
||||
`ifdef EXT_ZICOND_ENABLE
|
||||
`define EXT_ZICOND_ENABLED 1
|
||||
`else
|
||||
`define EXT_ZICOND_ENABLED 0
|
||||
`endif
|
||||
|
||||
`define ISA_STD_A 0
|
||||
`define ISA_STD_C 2
|
||||
`define ISA_STD_D 3
|
||||
|
@ -619,13 +704,15 @@
|
|||
`define ISA_EXT_DCACHE 1
|
||||
`define ISA_EXT_L2CACHE 2
|
||||
`define ISA_EXT_L3CACHE 3
|
||||
`define ISA_EXT_SMEM 4
|
||||
`define ISA_EXT_LMEM 4
|
||||
`define ISA_EXT_ZICOND 5
|
||||
|
||||
`define MISA_EXT (`ICACHE_ENABLED << `ISA_EXT_ICACHE) \
|
||||
| (`DCACHE_ENABLED << `ISA_EXT_DCACHE) \
|
||||
| (`L2_ENABLED << `ISA_EXT_L2CACHE) \
|
||||
| (`L3_ENABLED << `ISA_EXT_L3CACHE) \
|
||||
| (`SM_ENABLED << `ISA_EXT_SMEM)
|
||||
| (`LMEM_ENABLED << `ISA_EXT_LMEM) \
|
||||
| (`EXT_ZICOND_ENABLED << `ISA_EXT_ZICOND)
|
||||
|
||||
`define MISA_STD (`EXT_A_ENABLED << 0) /* A - Atomic Instructions extension */ \
|
||||
| (0 << 1) /* B - Tentatively reserved for Bit operations extension */ \
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -44,6 +44,9 @@
|
|||
|
||||
`define NR_BITS `CLOG2(`NUM_REGS)
|
||||
|
||||
`define DV_STACK_SIZE `UP(`NUM_THREADS-1)
|
||||
`define DV_STACK_SIZEW `UP(`CLOG2(`DV_STACK_SIZE))
|
||||
|
||||
`define PERF_CTR_BITS 44
|
||||
|
||||
`ifndef NDEBUG
|
||||
|
@ -52,6 +55,12 @@
|
|||
`define UUID_WIDTH 1
|
||||
`endif
|
||||
|
||||
`define PC_BITS (`XLEN-1)
|
||||
`define OFFSET_BITS 12
|
||||
`define IMM_BITS `XLEN
|
||||
|
||||
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_ALU 0
|
||||
|
@ -90,10 +99,10 @@
|
|||
|
||||
`define INST_FL 7'b0000111 // float load instruction
|
||||
`define INST_FS 7'b0100111 // float store instruction
|
||||
`define INST_FMADD 7'b1000011
|
||||
`define INST_FMADD 7'b1000011
|
||||
`define INST_FMSUB 7'b1000111
|
||||
`define INST_FNMSUB 7'b1001011
|
||||
`define INST_FNMADD 7'b1001111
|
||||
`define INST_FNMADD 7'b1001111
|
||||
`define INST_FCI 7'b1010011 // float common instructions
|
||||
|
||||
// Custom extension opcodes
|
||||
|
@ -102,6 +111,10 @@
|
|||
`define INST_EXT3 7'b1011011 // 0x5B
|
||||
`define INST_EXT4 7'b1111011 // 0x7B
|
||||
|
||||
// Opcode extensions
|
||||
`define INST_R_F7_MUL 7'b0000001
|
||||
`define INST_R_F7_ZICOND 7'b0000111
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_FRM_RNE 3'b000 // round to nearest even
|
||||
|
@ -115,36 +128,45 @@
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_OP_BITS 4
|
||||
`define INST_MOD_BITS 3
|
||||
`define INST_ARGS_BITS $bits(op_args_t)
|
||||
`define INST_FMT_BITS 2
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_ALU_ADD 4'b0000
|
||||
//`define INST_ALU_UNUSED 4'b0001
|
||||
`define INST_ALU_LUI 4'b0010
|
||||
`define INST_ALU_AUIPC 4'b0011
|
||||
`define INST_ALU_SLTU 4'b0100
|
||||
`define INST_ALU_SLT 4'b0101
|
||||
//`define INST_ALU_UNUSED 4'b0110
|
||||
`define INST_ALU_SUB 4'b0111
|
||||
`define INST_ALU_SRL 4'b1000
|
||||
`define INST_ALU_SRA 4'b1001
|
||||
`define INST_ALU_CZEQ 4'b1010
|
||||
`define INST_ALU_CZNE 4'b1011
|
||||
`define INST_ALU_AND 4'b1100
|
||||
`define INST_ALU_OR 4'b1101
|
||||
`define INST_ALU_XOR 4'b1110
|
||||
`define INST_ALU_SLL 4'b1111
|
||||
`define INST_ALU_OTHER 4'b0111
|
||||
|
||||
|
||||
`define ALU_TYPE_BITS 2
|
||||
`define ALU_TYPE_ARITH 0
|
||||
`define ALU_TYPE_BRANCH 1
|
||||
`define ALU_TYPE_MULDIV 2
|
||||
`define ALU_TYPE_OTHER 3
|
||||
|
||||
`define INST_ALU_BITS 4
|
||||
`define INST_ALU_CLASS(op) op[3:2]
|
||||
`define INST_ALU_SIGNED(op) op[0]
|
||||
`define INST_ALU_IS_SUB(op) op[1]
|
||||
`define INST_ALU_IS_BR(mod) mod[0]
|
||||
`define INST_ALU_IS_M(mod) mod[1]
|
||||
`define INST_ALU_IS_W(mod) mod[2]
|
||||
`define INST_ALU_IS_CZERO(op) (op[3:1] == 3'b101)
|
||||
|
||||
`define INST_BR_EQ 4'b0000
|
||||
`define INST_BR_NE 4'b0010
|
||||
`define INST_BR_LTU 4'b0100
|
||||
`define INST_BR_GEU 4'b0110
|
||||
`define INST_BR_LTU 4'b0100
|
||||
`define INST_BR_GEU 4'b0110
|
||||
`define INST_BR_LT 4'b0101
|
||||
`define INST_BR_GE 4'b0111
|
||||
`define INST_BR_JAL 4'b1000
|
||||
|
@ -184,14 +206,14 @@
|
|||
`define INST_FMT_HU 3'b101
|
||||
`define INST_FMT_WU 3'b110
|
||||
|
||||
`define INST_LSU_LB 4'b0000
|
||||
`define INST_LSU_LB 4'b0000
|
||||
`define INST_LSU_LH 4'b0001
|
||||
`define INST_LSU_LW 4'b0010
|
||||
`define INST_LSU_LD 4'b0011 // new for RV64I LD
|
||||
`define INST_LSU_LBU 4'b0100
|
||||
`define INST_LSU_LHU 4'b0101
|
||||
`define INST_LSU_LWU 4'b0110 // new for RV64I LWU
|
||||
`define INST_LSU_SB 4'b1000
|
||||
`define INST_LSU_SB 4'b1000
|
||||
`define INST_LSU_SH 4'b1001
|
||||
`define INST_LSU_SW 4'b1010
|
||||
`define INST_LSU_SD 4'b1011 // new for RV64I SD
|
||||
|
@ -205,29 +227,28 @@
|
|||
`define INST_FENCE_D 1'h0
|
||||
`define INST_FENCE_I 1'h1
|
||||
|
||||
`define INST_FPU_ADD 4'b0000
|
||||
`define INST_FPU_SUB 4'b0001
|
||||
`define INST_FPU_MUL 4'b0010
|
||||
`define INST_FPU_ADD 4'b0000
|
||||
`define INST_FPU_SUB 4'b0001
|
||||
`define INST_FPU_MUL 4'b0010
|
||||
`define INST_FPU_DIV 4'b0011
|
||||
`define INST_FPU_SQRT 4'b0100
|
||||
`define INST_FPU_CMP 4'b0101 // mod: LE=0, LT=1, EQ=2
|
||||
`define INST_FPU_CMP 4'b0101 // frm: LE=0, LT=1, EQ=2
|
||||
`define INST_FPU_F2F 4'b0110
|
||||
`define INST_FPU_MISC 4'b0111 // mod: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
|
||||
`define INST_FPU_MISC 4'b0111 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
|
||||
`define INST_FPU_F2I 4'b1000
|
||||
`define INST_FPU_F2U 4'b1001
|
||||
`define INST_FPU_I2F 4'b1010
|
||||
`define INST_FPU_U2F 4'b1011
|
||||
`define INST_FPU_MADD 4'b1100
|
||||
`define INST_FPU_MSUB 4'b1101
|
||||
`define INST_FPU_NMSUB 4'b1110
|
||||
`define INST_FPU_MADD 4'b1100
|
||||
`define INST_FPU_MSUB 4'b1101
|
||||
`define INST_FPU_NMSUB 4'b1110
|
||||
`define INST_FPU_NMADD 4'b1111
|
||||
`define INST_FPU_BITS 4
|
||||
`define INST_FPU_IS_W(mod) (mod[4])
|
||||
`define INST_FPU_IS_CLASS(op, mod) (op == `INST_FPU_MISC && mod == 3)
|
||||
`define INST_FPU_IS_MVXW(op, mod) (op == `INST_FPU_MISC && mod == 4)
|
||||
`define INST_FPU_IS_CLASS(op, frm) (op == `INST_FPU_MISC && frm == 3)
|
||||
`define INST_FPU_IS_MVXW(op, frm) (op == `INST_FPU_MISC && frm == 4)
|
||||
|
||||
`define INST_SFU_TMC 4'h0
|
||||
`define INST_SFU_WSPAWN 4'h1
|
||||
`define INST_SFU_WSPAWN 4'h1
|
||||
`define INST_SFU_SPLIT 4'h2
|
||||
`define INST_SFU_JOIN 4'h3
|
||||
`define INST_SFU_BAR 4'h4
|
||||
|
@ -235,7 +256,6 @@
|
|||
`define INST_SFU_CSRRW 4'h6
|
||||
`define INST_SFU_CSRRS 4'h7
|
||||
`define INST_SFU_CSRRC 4'h8
|
||||
`define INST_SFU_CMOV 4'h9
|
||||
`define INST_SFU_BITS 4
|
||||
`define INST_SFU_CSR(f3) (4'h6 + 4'(f3) - 4'h1)
|
||||
`define INST_SFU_IS_WCTL(op) (op <= 5)
|
||||
|
@ -243,67 +263,52 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// non-cacheable tag bits
|
||||
`define NC_TAG_BITS 1
|
||||
|
||||
// cache address type bits
|
||||
`ifdef SM_ENABLE
|
||||
`define CACHE_ADDR_TYPE_BITS (`NC_TAG_BITS + 1)
|
||||
`else
|
||||
`define CACHE_ADDR_TYPE_BITS `NC_TAG_BITS
|
||||
`endif
|
||||
|
||||
`define ARB_SEL_BITS(I, O) ((I > O) ? `CLOG2((I + O - 1) / O) : 0)
|
||||
`define ARB_SEL_BITS(I, O) ((I > O) ? `CLOG2(`CDIV(I, O)) : 0)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks) \
|
||||
(`CLOG2(mshr_size) + `CLOG2(num_banks) + `NC_TAG_BITS)
|
||||
|
||||
`define CACHE_NC_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width) \
|
||||
(`CLOG2(num_reqs) + `CLOG2(line_size / word_size) + tag_width)
|
||||
(`CLOG2(mshr_size) + `CLOG2(num_banks))
|
||||
|
||||
`define CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width) \
|
||||
(`CACHE_NC_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width) + `NC_TAG_BITS)
|
||||
(`CLOG2(num_reqs) + `CLOG2(line_size / word_size) + tag_width)
|
||||
|
||||
`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width) \
|
||||
`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), `CACHE_NC_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width))
|
||||
(`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), `CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width)) + 1)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches) \
|
||||
(tag_width + `ARB_SEL_BITS(num_inputs, `UP(num_caches)))
|
||||
(tag_width + `ARB_SEL_BITS(num_inputs, `UP(num_caches)))
|
||||
|
||||
`define CACHE_CLUSTER_MEM_ARB_TAG(tag_width, num_caches) \
|
||||
(tag_width + `ARB_SEL_BITS(`UP(num_caches), 1))
|
||||
|
||||
`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), num_caches)
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_NC_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG((`CLOG2(num_reqs) + `CLOG2(line_size / word_size) + `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG((`CACHE_NC_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)) + `NC_TAG_BITS), num_caches)
|
||||
`define CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), `CACHE_NC_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches))), num_caches)
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`else
|
||||
`define L2_LINE_SIZE `L1_LINE_SIZE
|
||||
`ifdef ICACHE_ENABLE
|
||||
`define L1_ENABLE
|
||||
`endif
|
||||
|
||||
`ifdef L3_ENABLE
|
||||
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`else
|
||||
`define L3_LINE_SIZE `L2_LINE_SIZE
|
||||
`ifdef DCACHE_ENABLE
|
||||
`define L1_ENABLE
|
||||
`endif
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
||||
`define ADDR_TYPE_FLUSH 0
|
||||
`define ADDR_TYPE_IO 1
|
||||
`define ADDR_TYPE_LOCAL 2 // shoud be last since optional
|
||||
`define ADDR_TYPE_WIDTH (`ADDR_TYPE_LOCAL + `LMEM_ENABLED)
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
||||
`define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE))
|
||||
`define VX_MEM_DATA_WIDTH (`L3_LINE_SIZE * 8)
|
||||
`define VX_MEM_TAG_WIDTH L3_MEM_TAG_WIDTH
|
||||
|
@ -320,7 +325,7 @@
|
|||
.DATAW ($bits(dst)), \
|
||||
.RESETW ($bits(dst)), \
|
||||
.DEPTH (latency) \
|
||||
) __``dst ( \
|
||||
) __``dst``__ ( \
|
||||
.clk (clk), \
|
||||
.reset (reset), \
|
||||
.enable (ena), \
|
||||
|
@ -334,13 +339,18 @@
|
|||
VX_popcount #( \
|
||||
.N ($bits(in)), \
|
||||
.MODEL (model) \
|
||||
) __``out ( \
|
||||
) __``out``__ ( \
|
||||
.data_in (in), \
|
||||
.data_out (out) \
|
||||
)
|
||||
|
||||
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
|
||||
|
||||
`define ASSIGN_VX_IF(dst, src) \
|
||||
assign dst.valid = src.valid; \
|
||||
assign dst.data = src.data; \
|
||||
assign src.ready = dst.ready
|
||||
|
||||
`define ASSIGN_VX_MEM_BUS_IF(dst, src) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data = src.req_data; \
|
||||
|
@ -354,6 +364,7 @@
|
|||
assign dst.req_data.rw = src.req_data.rw; \
|
||||
assign dst.req_data.byteen = src.req_data.byteen; \
|
||||
assign dst.req_data.addr = src.req_data.addr; \
|
||||
assign dst.req_data.atype = src.req_data.atype; \
|
||||
assign dst.req_data.data = src.req_data.data; \
|
||||
if (TD != TS) \
|
||||
assign dst.req_data.tag = {src.req_data.tag, {(TD-TS){1'b0}}}; \
|
||||
|
@ -365,43 +376,51 @@
|
|||
assign src.rsp_data.tag = dst.rsp_data.tag[TD-1 -: TS]; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define BUFFER_DCR_BUS_IF(dst, src, enable) \
|
||||
logic [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __``dst; \
|
||||
if (enable) begin \
|
||||
always @(posedge clk) begin \
|
||||
__``dst <= {src.write_valid, src.write_addr, src.write_data}; \
|
||||
end \
|
||||
end else begin \
|
||||
assign __``dst = {src.write_valid, src.write_addr, src.write_data}; \
|
||||
end \
|
||||
VX_dcr_bus_if dst(); \
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = __``dst
|
||||
`define ASSIGN_VX_LSU_MEM_IF(dst, src) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data = src.req_data; \
|
||||
assign src.req_ready = dst.req_ready; \
|
||||
assign src.rsp_valid = dst.rsp_valid; \
|
||||
assign src.rsp_data = dst.rsp_data; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define PERF_COUNTER_ADD(dst, src, field, width, dst_count, src_count, reg_enable) \
|
||||
for (genvar __d = 0; __d < dst_count; ++__d) begin \
|
||||
localparam __count = ((src_count > dst_count) ? ((src_count + dst_count - 1) / dst_count) : 1); \
|
||||
wire [__count-1:0][width-1:0] __reduce_add_i_``src``field; \
|
||||
wire [width-1:0] __reduce_add_o_``dst``field; \
|
||||
for (genvar __i = 0; __i < __count; ++__i) begin \
|
||||
assign __reduce_add_i_``src``field[__i] = ``src[__d * __count + __i].``field; \
|
||||
`define BUFFER_DCR_BUS_IF(dst, src, enable) \
|
||||
if (enable) begin \
|
||||
reg [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __dst; \
|
||||
always @(posedge clk) begin \
|
||||
__dst <= {src.write_valid, src.write_addr, src.write_data}; \
|
||||
end \
|
||||
VX_reduce #(.DATAW_IN(width), .N(__count), .OP("+")) __reduce_add_``dst``field ( \
|
||||
__reduce_add_i_``src``field, \
|
||||
__reduce_add_o_``dst``field \
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = __dst; \
|
||||
end else begin \
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid, src.write_addr, src.write_data}; \
|
||||
end
|
||||
|
||||
`define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \
|
||||
if (count > 1) begin \
|
||||
wire [count-1:0][width-1:0] __reduce_add_i_field; \
|
||||
wire [width-1:0] __reduce_add_o_field; \
|
||||
for (genvar __i = 0; __i < count; ++__i) begin \
|
||||
assign __reduce_add_i_field[__i] = src[__i].``field; \
|
||||
end \
|
||||
VX_reduce #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_field ( \
|
||||
__reduce_add_i_field, \
|
||||
__reduce_add_o_field \
|
||||
); \
|
||||
if (reg_enable) begin \
|
||||
reg [width-1:0] __reduce_add_r_``dst``field; \
|
||||
reg [width-1:0] __reduce_add_r_field; \
|
||||
always @(posedge clk) begin \
|
||||
if (reset) begin \
|
||||
__reduce_add_r_``dst``field <= '0; \
|
||||
__reduce_add_r_field <= '0; \
|
||||
end else begin \
|
||||
__reduce_add_r_``dst``field <= __reduce_add_o_``dst``field; \
|
||||
__reduce_add_r_field <= __reduce_add_o_field; \
|
||||
end \
|
||||
end \
|
||||
assign ``dst[__d].``field = __reduce_add_r_``dst``field; \
|
||||
assign dst.``field = __reduce_add_r_field; \
|
||||
end else begin \
|
||||
assign ``dst[__d].``field = __reduce_add_o_``dst``field; \
|
||||
assign dst.``field = __reduce_add_o_field; \
|
||||
end \
|
||||
end else begin \
|
||||
assign dst.``field = src[0].``field; \
|
||||
end
|
||||
|
||||
`define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \
|
||||
|
@ -415,23 +434,4 @@
|
|||
assign dst = src; \
|
||||
end
|
||||
|
||||
`define TO_DISPATCH_DATA(data, tid) { \
|
||||
data.uuid, \
|
||||
data.wis, \
|
||||
data.tmask, \
|
||||
data.op_type, \
|
||||
data.op_mod, \
|
||||
data.wb, \
|
||||
data.use_PC, \
|
||||
data.use_imm, \
|
||||
data.PC, \
|
||||
data.imm, \
|
||||
data.rd, \
|
||||
tid, \
|
||||
data.rs1_data, \
|
||||
data.rs2_data, \
|
||||
data.rs3_data}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`endif // VX_DEFINE_VH
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -26,7 +26,7 @@ package VX_gpu_pkg;
|
|||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NUM_WARPS-1:0] wmask;
|
||||
logic [`XLEN-1:0] pc;
|
||||
logic [`PC_BITS-1:0] pc;
|
||||
} wspawn_t;
|
||||
|
||||
typedef struct packed {
|
||||
|
@ -34,12 +34,12 @@ package VX_gpu_pkg;
|
|||
logic is_dvg;
|
||||
logic [`NUM_THREADS-1:0] then_tmask;
|
||||
logic [`NUM_THREADS-1:0] else_tmask;
|
||||
logic [`XLEN-1:0] next_pc;
|
||||
logic [`PC_BITS-1:0] next_pc;
|
||||
} split_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic is_dvg;
|
||||
logic [`DV_STACK_SIZEW-1:0] stack_ptr;
|
||||
} join_t;
|
||||
|
||||
typedef struct packed {
|
||||
|
@ -51,13 +51,17 @@ package VX_gpu_pkg;
|
|||
`else
|
||||
logic [`NW_WIDTH-1:0] size_m1;
|
||||
`endif
|
||||
logic is_noop;
|
||||
} barrier_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [`XLEN-1:0] startup_addr;
|
||||
logic [7:0] mpm_class;
|
||||
logic [`XLEN-1:0] startup_addr;
|
||||
logic [`XLEN-1:0] startup_arg;
|
||||
logic [7:0] mpm_class;
|
||||
} base_dcrs_t;
|
||||
|
||||
//////////////////////////// Perf counter types ///////////////////////////
|
||||
|
||||
typedef struct packed {
|
||||
logic [`PERF_CTR_BITS-1:0] reads;
|
||||
logic [`PERF_CTR_BITS-1:0] writes;
|
||||
|
@ -75,7 +79,72 @@ package VX_gpu_pkg;
|
|||
logic [`PERF_CTR_BITS-1:0] latency;
|
||||
} mem_perf_t;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
typedef struct packed {
|
||||
logic [`PERF_CTR_BITS-1:0] idles;
|
||||
logic [`PERF_CTR_BITS-1:0] stalls;
|
||||
} sched_perf_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [`PERF_CTR_BITS-1:0] ibf_stalls;
|
||||
logic [`PERF_CTR_BITS-1:0] scb_stalls;
|
||||
logic [`PERF_CTR_BITS-1:0] opd_stalls;
|
||||
logic [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] units_uses;
|
||||
logic [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] sfu_uses;
|
||||
} issue_perf_t;
|
||||
|
||||
//////////////////////// instruction arguments ////////////////////////////
|
||||
|
||||
typedef struct packed {
|
||||
logic use_PC;
|
||||
logic use_imm;
|
||||
logic is_w;
|
||||
logic [`ALU_TYPE_BITS-1:0] xtype;
|
||||
logic [`IMM_BITS-1:0] imm;
|
||||
} alu_args_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [($bits(alu_args_t)-`INST_FRM_BITS-`INST_FMT_BITS)-1:0] __padding;
|
||||
logic [`INST_FRM_BITS-1:0] frm;
|
||||
logic [`INST_FMT_BITS-1:0] fmt;
|
||||
} fpu_args_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [($bits(alu_args_t)-1-1-`OFFSET_BITS)-1:0] __padding;
|
||||
logic is_store;
|
||||
logic is_float;
|
||||
logic [`OFFSET_BITS-1:0] offset;
|
||||
} lsu_args_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [($bits(alu_args_t)-1-`VX_CSR_ADDR_BITS-5)-1:0] __padding;
|
||||
logic use_imm;
|
||||
logic [`VX_CSR_ADDR_BITS-1:0] addr;
|
||||
logic [4:0] imm;
|
||||
} csr_args_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [($bits(alu_args_t)-1)-1:0] __padding;
|
||||
logic is_neg;
|
||||
} wctl_args_t;
|
||||
|
||||
typedef union packed {
|
||||
alu_args_t alu;
|
||||
fpu_args_t fpu;
|
||||
lsu_args_t lsu;
|
||||
csr_args_t csr;
|
||||
wctl_args_t wctl;
|
||||
} op_args_t;
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
|
||||
///////////////////////// LSU memory Parameters ///////////////////////////
|
||||
|
||||
localparam LSU_WORD_SIZE = `XLEN / 8;
|
||||
localparam LSU_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(LSU_WORD_SIZE));
|
||||
localparam LSU_MEM_BATCHES = 1;
|
||||
localparam LSU_TAG_ID_BITS = (`CLOG2(`LSUQ_IN_SIZE) + `CLOG2(LSU_MEM_BATCHES));
|
||||
localparam LSU_TAG_WIDTH = (`UUID_WIDTH + LSU_TAG_ID_BITS);
|
||||
localparam LSU_NUM_REQS = `NUM_LSU_BLOCKS * `NUM_LSU_LANES;
|
||||
|
||||
////////////////////////// Icache Parameters //////////////////////////////
|
||||
|
||||
|
@ -86,7 +155,7 @@ package VX_gpu_pkg;
|
|||
// Block size in bytes
|
||||
localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE;
|
||||
|
||||
// Core request tag Id bits
|
||||
// Core request tag Id bits
|
||||
localparam ICACHE_TAG_ID_BITS = `NW_WIDTH;
|
||||
|
||||
// Core request tag bits
|
||||
|
@ -96,54 +165,48 @@ package VX_gpu_pkg;
|
|||
localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef ICACHE_ENABLE
|
||||
`ifdef ICACHE_ENABLE
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES);
|
||||
`else
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
|
||||
`endif
|
||||
`else
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
|
||||
`endif
|
||||
|
||||
////////////////////////// Dcache Parameters //////////////////////////////
|
||||
|
||||
// Word size in bytes
|
||||
localparam DCACHE_WORD_SIZE = (`XLEN / 8);
|
||||
localparam DCACHE_WORD_SIZE = `LSU_LINE_SIZE;
|
||||
localparam DCACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(DCACHE_WORD_SIZE));
|
||||
|
||||
// Block size in bytes
|
||||
localparam DCACHE_LINE_SIZE = `L1_LINE_SIZE;
|
||||
|
||||
// Input request size
|
||||
localparam DCACHE_NUM_REQS = `MAX(`DCACHE_NUM_BANKS, `SMEM_NUM_BANKS);
|
||||
|
||||
// Memory request size
|
||||
localparam LSU_MEM_REQS = `NUM_LSU_LANES;
|
||||
|
||||
// Batch select bits
|
||||
localparam DCACHE_NUM_BATCHES = ((LSU_MEM_REQS + DCACHE_NUM_REQS - 1) / DCACHE_NUM_REQS);
|
||||
localparam DCACHE_BATCH_SEL_BITS = `CLOG2(DCACHE_NUM_BATCHES);
|
||||
localparam DCACHE_CHANNELS = `UP((`NUM_LSU_LANES * LSU_WORD_SIZE) / DCACHE_WORD_SIZE);
|
||||
localparam DCACHE_NUM_REQS = `NUM_LSU_BLOCKS * DCACHE_CHANNELS;
|
||||
|
||||
// Core request tag Id bits
|
||||
localparam LSUQ_TAG_BITS = (`CLOG2(`LSUQ_SIZE) + DCACHE_BATCH_SEL_BITS);
|
||||
localparam DCACHE_TAG_ID_BITS = (LSUQ_TAG_BITS + `CACHE_ADDR_TYPE_BITS);
|
||||
localparam DCACHE_MERGED_REQS = (`NUM_LSU_LANES * LSU_WORD_SIZE) / DCACHE_WORD_SIZE;
|
||||
localparam DCACHE_MEM_BATCHES = `CDIV(DCACHE_MERGED_REQS, DCACHE_CHANNELS);
|
||||
localparam DCACHE_TAG_ID_BITS = (`CLOG2(`LSUQ_OUT_SIZE) + `CLOG2(DCACHE_MEM_BATCHES));
|
||||
|
||||
// Core request tag bits
|
||||
localparam DCACHE_TAG_WIDTH = (`UUID_WIDTH + DCACHE_TAG_ID_BITS);
|
||||
localparam DCACHE_NOSM_TAG_WIDTH = (DCACHE_TAG_WIDTH - `SM_ENABLED);
|
||||
|
||||
|
||||
// Memory request data bits
|
||||
localparam DCACHE_MEM_DATA_WIDTH = (DCACHE_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef DCACHE_ENABLE
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_NOSM_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
||||
`else
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_BYPASS_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_NOSM_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
||||
`endif
|
||||
`ifdef DCACHE_ENABLE
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
||||
`else
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
||||
`endif
|
||||
|
||||
/////////////////////////////// L1 Parameters /////////////////////////////
|
||||
|
||||
localparam L1_MEM_TAG_WIDTH = `MAX(ICACHE_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
|
||||
localparam L1_MEM_ARB_TAG_WIDTH = (L1_MEM_TAG_WIDTH + `CLOG2(2));
|
||||
|
||||
|
||||
/////////////////////////////// L2 Parameters /////////////////////////////
|
||||
|
||||
localparam ICACHE_MEM_ARB_IDX = 0;
|
||||
|
@ -162,11 +225,11 @@ package VX_gpu_pkg;
|
|||
localparam L2_MEM_DATA_WIDTH = (`L2_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef L2_ENABLE
|
||||
`ifdef L2_ENABLE
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
||||
`else
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
||||
`endif
|
||||
`else
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
||||
`endif
|
||||
|
||||
/////////////////////////////// L3 Parameters /////////////////////////////
|
||||
|
||||
|
@ -183,32 +246,29 @@ package VX_gpu_pkg;
|
|||
localparam L3_MEM_DATA_WIDTH = (`L3_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef L3_ENABLE
|
||||
`ifdef L3_ENABLE
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
||||
`else
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
||||
`endif
|
||||
|
||||
/* verilator lint_on UNUSED */
|
||||
`else
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
||||
`endif
|
||||
|
||||
/////////////////////////////// Issue parameters //////////////////////////
|
||||
|
||||
localparam ISSUE_ISW = `CLOG2(`ISSUE_WIDTH);
|
||||
localparam ISSUE_ISW_W = `UP(ISSUE_ISW);
|
||||
localparam ISSUE_RATIO = `NUM_WARPS / `ISSUE_WIDTH;
|
||||
localparam ISSUE_WIS = `CLOG2(ISSUE_RATIO);
|
||||
localparam ISSUE_ISW_W = `UP(ISSUE_ISW);
|
||||
localparam PER_ISSUE_WARPS = `NUM_WARPS / `ISSUE_WIDTH;
|
||||
localparam ISSUE_WIS = `CLOG2(PER_ISSUE_WARPS);
|
||||
localparam ISSUE_WIS_W = `UP(ISSUE_WIS);
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
|
||||
function logic [`NW_WIDTH-1:0] wis_to_wid(
|
||||
input logic [ISSUE_WIS_W-1:0] wis,
|
||||
input logic [ISSUE_WIS_W-1:0] wis,
|
||||
input logic [ISSUE_ISW_W-1:0] isw
|
||||
);
|
||||
if (ISSUE_WIS == 0) begin
|
||||
wis_to_wid = `NW_WIDTH'(isw);
|
||||
end else if (ISSUE_ISW == 0) begin
|
||||
wis_to_wid = `NW_WIDTH'(wis);
|
||||
end else begin
|
||||
end else begin
|
||||
wis_to_wid = `NW_WIDTH'({wis, isw});
|
||||
end
|
||||
endfunction
|
||||
|
@ -216,7 +276,7 @@ package VX_gpu_pkg;
|
|||
function logic [ISSUE_ISW_W-1:0] wid_to_isw(
|
||||
input logic [`NW_WIDTH-1:0] wid
|
||||
);
|
||||
if (ISSUE_ISW != 0) begin
|
||||
if (ISSUE_ISW != 0) begin
|
||||
wid_to_isw = wid[ISSUE_ISW_W-1:0];
|
||||
end else begin
|
||||
wid_to_isw = 0;
|
||||
|
@ -232,6 +292,20 @@ package VX_gpu_pkg;
|
|||
wid_to_wis = 0;
|
||||
end
|
||||
endfunction
|
||||
|
||||
///////////////////////// Miscaellaneous functions ////////////////////////
|
||||
|
||||
function logic [`SFU_WIDTH-1:0] op_to_sfu_type(
|
||||
input logic [`INST_OP_BITS-1:0] op_type
|
||||
);
|
||||
case (op_type)
|
||||
`INST_SFU_CSRRW,
|
||||
`INST_SFU_CSRRS,
|
||||
`INST_SFU_CSRRC: op_to_sfu_type = `SFU_CSRS;
|
||||
default: op_to_sfu_type = `SFU_WCTL;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
endpackage
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,7 +14,7 @@
|
|||
`ifndef VX_PLATFORM_VH
|
||||
`define VX_PLATFORM_VH
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`ifdef SV_DPI
|
||||
`include "util_dpi.vh"
|
||||
`endif
|
||||
|
||||
|
@ -47,7 +47,7 @@
|
|||
`define UNUSED_VAR(x)
|
||||
`define UNUSED_PIN(x) . x ()
|
||||
`define UNUSED_ARG(x) x
|
||||
`define TRACE(level, args) $write args
|
||||
`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args
|
||||
`else
|
||||
`ifdef VERILATOR
|
||||
`define TRACING_ON /* verilator tracing_on */
|
||||
|
@ -77,7 +77,8 @@
|
|||
/* verilator lint_off IMPLICIT */ \
|
||||
/* verilator lint_off PINMISSING */ \
|
||||
/* verilator lint_off IMPORTSTAR */ \
|
||||
/* verilator lint_off UNSIGNED */
|
||||
/* verilator lint_off UNSIGNED */ \
|
||||
/* verilator lint_off SYMRSVDWORD */
|
||||
|
||||
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
|
||||
/* verilator lint_on PINCONNECTEMPTY */ \
|
||||
|
@ -88,7 +89,8 @@
|
|||
/* verilator lint_on IMPLICIT */ \
|
||||
/* verilator lint_off PINMISSING */ \
|
||||
/* verilator lint_on IMPORTSTAR */ \
|
||||
/* verilator lint_on UNSIGNED */
|
||||
/* verilator lint_on UNSIGNED */ \
|
||||
/* verilator lint_on SYMRSVDWORD */
|
||||
|
||||
`define UNUSED_PARAM(x) /* verilator lint_off UNUSED */ \
|
||||
localparam __``x = x; \
|
||||
|
@ -110,8 +112,14 @@
|
|||
`define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \
|
||||
x \
|
||||
/* verilator lint_on UNUSED */
|
||||
`define TRACE(level, args) dpi_trace(level, $sformatf args)
|
||||
`endif
|
||||
|
||||
`ifdef SV_DPI
|
||||
`define TRACE(level, args) dpi_trace(level, $sformatf args)
|
||||
`else
|
||||
`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
||||
`ifdef SIMULATION
|
||||
|
@ -140,21 +148,21 @@
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef QUARTUS
|
||||
`define MAX_FANOUT 4
|
||||
`define MAX_FANOUT 8
|
||||
`define IF_DATA_SIZE(x) $bits(x.data)
|
||||
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
|
||||
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
|
||||
`define DISABLE_BRAM (* ramstyle = "logic" *)
|
||||
`define PRESERVE_NET (* preserve *)
|
||||
`elsif VIVADO
|
||||
`define MAX_FANOUT 4
|
||||
`define MAX_FANOUT 8
|
||||
`define IF_DATA_SIZE(x) $bits(x.data)
|
||||
`define USE_FAST_BRAM (* ram_style = "distributed" *)
|
||||
`define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *)
|
||||
`define DISABLE_BRAM (* ram_style = "registers" *)
|
||||
`define PRESERVE_NET (* keep = "true" *)
|
||||
`else
|
||||
`define MAX_FANOUT 4
|
||||
`define MAX_FANOUT 8
|
||||
`define IF_DATA_SIZE(x) x.DATA_WIDTH
|
||||
`define USE_FAST_BRAM
|
||||
`define NO_RW_RAM_CHECK
|
||||
|
@ -169,7 +177,8 @@
|
|||
`define CLOG2(x) $clog2(x)
|
||||
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > (x)) ? 1 : 0))
|
||||
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
|
||||
`define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
|
||||
`define IS_POW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
|
||||
`define IS_DIVISBLE(n, d) (((n) % (d)) == 0)
|
||||
|
||||
`define ABS(x) (((x) < 0) ? (-(x)) : (x));
|
||||
|
||||
|
@ -181,34 +190,35 @@
|
|||
`define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
`endif
|
||||
|
||||
`ifndef CLAMP
|
||||
`define CLAMP(x, lo, hi) (((x) > (hi)) ? (hi) : (((x) < (lo)) ? (lo) : (x)))
|
||||
`endif
|
||||
|
||||
`ifndef UP
|
||||
`define UP(x) (((x) != 0) ? (x) : 1)
|
||||
`endif
|
||||
|
||||
`define CDIV(n,d) ((n + d - 1) / (d))
|
||||
|
||||
|
||||
`define RTRIM(x, s) x[$bits(x)-1:($bits(x)-s)]
|
||||
|
||||
`define LTRIM(x, s) x[s-1:0]
|
||||
|
||||
`define TRACE_ARRAY1D(lvl, arr, m) \
|
||||
`define SEXT(len, x) {{(len-$bits(x)+1){x[$bits(x)-1]}}, x[$bits(x)-2:0]}
|
||||
|
||||
`define TRACE_ARRAY1D(lvl, fmt, arr, n) \
|
||||
`TRACE(lvl, ("{")); \
|
||||
for (integer __i = (m-1); __i >= 0; --__i) begin \
|
||||
if (__i != (m-1)) `TRACE(lvl, (", ")); \
|
||||
`TRACE(lvl, ("0x%0h", arr[__i])); \
|
||||
for (integer __i = (n-1); __i >= 0; --__i) begin \
|
||||
if (__i != (n-1)) `TRACE(lvl, (", ")); \
|
||||
`TRACE(lvl, (fmt, arr[__i])); \
|
||||
end \
|
||||
`TRACE(lvl, ("}"));
|
||||
|
||||
`define TRACE_ARRAY2D(lvl, arr, m, n) \
|
||||
`define TRACE_ARRAY2D(lvl, fmt, arr, m, n) \
|
||||
`TRACE(lvl, ("{")); \
|
||||
for (integer __i = n-1; __i >= 0; --__i) begin \
|
||||
if (__i != (n-1)) `TRACE(lvl, (", ")); \
|
||||
`TRACE(lvl, ("{")); \
|
||||
for (integer __j = (m-1); __j >= 0; --__j) begin \
|
||||
if (__j != (m-1)) `TRACE(lvl, (", "));\
|
||||
`TRACE(lvl, ("0x%0h", arr[__i][__j])); \
|
||||
`TRACE(lvl, (fmt, arr[__i][__j])); \
|
||||
end \
|
||||
`TRACE(lvl, ("}")); \
|
||||
end \
|
||||
|
@ -228,11 +238,11 @@
|
|||
`define RESET_RELAY(dst, src) \
|
||||
`RESET_RELAY_EX (dst, src, 1, 0)
|
||||
|
||||
// size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2
|
||||
`define OUT_REG_TO_EB_SIZE(out_reg) `MIN(out_reg, 2)
|
||||
// size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2, 5 -> 2
|
||||
`define TO_OUT_BUF_SIZE(s) `MIN(s, 2)
|
||||
|
||||
// reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2
|
||||
`define OUT_REG_TO_EB_REG(out_reg) ((out_reg & 1) + ((out_reg >> 2) << 1))
|
||||
// reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2, 5 > 3
|
||||
`define TO_OUT_BUF_REG(s) ((s < 2) ? s : (s - 2))
|
||||
|
||||
`define REPEAT(n,f,s) `_REPEAT_``n(f,s)
|
||||
`define _REPEAT_0(f,s)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,11 +13,12 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_socket import VX_gpu_pkg::*; #(
|
||||
parameter SOCKET_ID = 0
|
||||
) (
|
||||
module VX_socket import VX_gpu_pkg::*; #(
|
||||
parameter SOCKET_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -36,15 +37,15 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
// Barrier
|
||||
VX_gbar_bus_if.master gbar_bus_if,
|
||||
`endif
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak,
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_core = 0;
|
||||
`SCOPE_IO_SWITCH (`SOCKET_SIZE);
|
||||
`endif
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE]();
|
||||
|
||||
|
@ -52,7 +53,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_gbar_arb #(
|
||||
.NUM_REQS (`SOCKET_SIZE),
|
||||
.OUT_REG ((`SOCKET_SIZE > 1) ? 2 : 0)
|
||||
.OUT_BUF ((`SOCKET_SIZE > 1) ? 2 : 0)
|
||||
) gbar_arb (
|
||||
.clk (clk),
|
||||
.reset (gbar_arb_reset),
|
||||
|
@ -67,14 +68,14 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
VX_mem_perf_if mem_perf_tmp_if();
|
||||
assign mem_perf_tmp_if.l2cache = mem_perf_if.l2cache;
|
||||
assign mem_perf_tmp_if.l3cache = mem_perf_if.l3cache;
|
||||
assign mem_perf_tmp_if.smem = 'x;
|
||||
assign mem_perf_tmp_if.lmem = 'x;
|
||||
assign mem_perf_tmp_if.mem = mem_perf_if.mem;
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (ICACHE_WORD_SIZE),
|
||||
.DATA_SIZE (ICACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (ICACHE_TAG_WIDTH)
|
||||
) per_core_icache_bus_if[`SOCKET_SIZE]();
|
||||
|
||||
|
@ -86,7 +87,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (icache_reset, reset);
|
||||
|
||||
VX_cache_cluster #(
|
||||
.INSTANCE_ID ($sformatf("socket%0d-icache", SOCKET_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-icache", INSTANCE_ID)),
|
||||
.NUM_UNITS (`NUM_ICACHES),
|
||||
.NUM_INPUTS (`SOCKET_SIZE),
|
||||
.TAG_SEL_IDX (0),
|
||||
|
@ -103,8 +104,9 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (ICACHE_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.WRITE_ENABLE (0),
|
||||
.CORE_OUT_REG (2),
|
||||
.MEM_OUT_REG (2)
|
||||
.NC_ENABLE (0),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2)
|
||||
) icache (
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (mem_perf_tmp_if.icache),
|
||||
|
@ -119,9 +121,9 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) per_core_dcache_bus_if[`SOCKET_SIZE * DCACHE_NUM_REQS]();
|
||||
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_LINE_SIZE),
|
||||
.TAG_WIDTH (DCACHE_MEM_TAG_WIDTH)
|
||||
|
@ -130,10 +132,10 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (dcache_reset, reset);
|
||||
|
||||
VX_cache_cluster #(
|
||||
.INSTANCE_ID ($sformatf("socket%0d-dcache", SOCKET_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-dcache", INSTANCE_ID)),
|
||||
.NUM_UNITS (`NUM_DCACHES),
|
||||
.NUM_INPUTS (`SOCKET_SIZE),
|
||||
.TAG_SEL_IDX (1),
|
||||
.TAG_SEL_IDX (0),
|
||||
.CACHE_SIZE (`DCACHE_SIZE),
|
||||
.LINE_SIZE (DCACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`DCACHE_NUM_BANKS),
|
||||
|
@ -143,24 +145,26 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.CRSQ_SIZE (`DCACHE_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`DCACHE_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`DCACHE_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`DCACHE_MREQ_SIZE),
|
||||
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH),
|
||||
.MREQ_SIZE (`DCACHE_WRITEBACK ? `DCACHE_MSHR_SIZE : `DCACHE_MREQ_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`DCACHE_WRITEBACK),
|
||||
.DIRTY_BYTES (`DCACHE_WRITEBACK),
|
||||
.NC_ENABLE (1),
|
||||
.CORE_OUT_REG (`SM_ENABLED ? 2 : 1),
|
||||
.MEM_OUT_REG (2)
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2)
|
||||
) dcache (
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (mem_perf_tmp_if.dcache),
|
||||
`endif
|
||||
`endif
|
||||
.clk (clk),
|
||||
.reset (dcache_reset),
|
||||
.reset (dcache_reset),
|
||||
.core_bus_if (per_core_dcache_bus_if),
|
||||
.mem_bus_if (dcache_mem_bus_if)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (`L1_LINE_SIZE),
|
||||
|
@ -175,19 +179,17 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[0], icache_mem_bus_if, L1_MEM_TAG_WIDTH, ICACHE_MEM_TAG_WIDTH);
|
||||
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[1], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
|
||||
|
||||
`RESET_RELAY (mem_arb_reset, reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATA_SIZE (`L1_LINE_SIZE),
|
||||
.TAG_WIDTH (L1_MEM_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (1), // Skip 0 for NC flag
|
||||
.TAG_SEL_IDX (0),
|
||||
.ARBITER ("R"),
|
||||
.OUT_REG_REQ (2),
|
||||
.OUT_REG_RSP (2)
|
||||
.REQ_OUT_BUF (2),
|
||||
.RSP_OUT_BUF (2)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_arb_reset),
|
||||
.reset (reset),
|
||||
.bus_in_if (l1_mem_bus_if),
|
||||
.bus_out_if (l1_mem_arb_bus_if)
|
||||
);
|
||||
|
@ -196,28 +198,21 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`SOCKET_SIZE-1:0] per_core_sim_ebreak;
|
||||
wire [`SOCKET_SIZE-1:0][`NUM_REGS-1:0][`XLEN-1:0] per_core_sim_wb_value;
|
||||
assign sim_ebreak = per_core_sim_ebreak[0];
|
||||
assign sim_wb_value = per_core_sim_wb_value[0];
|
||||
`UNUSED_VAR (per_core_sim_ebreak)
|
||||
`UNUSED_VAR (per_core_sim_wb_value)
|
||||
|
||||
wire [`SOCKET_SIZE-1:0] per_core_busy;
|
||||
|
||||
VX_dcr_bus_if core_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1));
|
||||
|
||||
`SCOPE_IO_SWITCH (`SOCKET_SIZE)
|
||||
|
||||
// Generate all cores
|
||||
for (genvar i = 0; i < `SOCKET_SIZE; ++i) begin
|
||||
for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : cores
|
||||
|
||||
`RESET_RELAY (core_reset, reset);
|
||||
|
||||
VX_core #(
|
||||
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + i)
|
||||
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id),
|
||||
.INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id))
|
||||
) core (
|
||||
`SCOPE_IO_BIND (i)
|
||||
`SCOPE_IO_BIND (scope_core + core_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (core_reset),
|
||||
|
@ -225,23 +220,21 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_tmp_if),
|
||||
`endif
|
||||
|
||||
|
||||
.dcr_bus_if (core_dcr_bus_if),
|
||||
|
||||
.dcache_bus_if (per_core_dcache_bus_if[i * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
|
||||
.dcache_bus_if (per_core_dcache_bus_if[core_id * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
|
||||
|
||||
.icache_bus_if (per_core_icache_bus_if[i]),
|
||||
.icache_bus_if (per_core_icache_bus_if[core_id]),
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (per_core_gbar_bus_if[i]),
|
||||
.gbar_bus_if (per_core_gbar_bus_if[core_id]),
|
||||
`endif
|
||||
|
||||
.sim_ebreak (per_core_sim_ebreak[i]),
|
||||
.sim_wb_value (per_core_sim_wb_value[i]),
|
||||
.busy (per_core_busy[i])
|
||||
.busy (per_core_busy[core_id])
|
||||
);
|
||||
end
|
||||
|
||||
`BUFFER_EX(busy, (| per_core_busy), 1'b1, (`SOCKET_SIZE > 1));
|
||||
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,7 +14,7 @@
|
|||
`ifndef VX_TYPES_VH
|
||||
`define VX_TYPES_VH
|
||||
|
||||
// Device configuration registers
|
||||
// Device configuration registers /////////////////////////////////////////////
|
||||
|
||||
`define VX_CSR_ADDR_BITS 12
|
||||
`define VX_DCR_ADDR_BITS 12
|
||||
|
@ -22,24 +22,26 @@
|
|||
`define VX_DCR_BASE_STATE_BEGIN 12'h001
|
||||
`define VX_DCR_BASE_STARTUP_ADDR0 12'h001
|
||||
`define VX_DCR_BASE_STARTUP_ADDR1 12'h002
|
||||
`define VX_DCR_BASE_MPM_CLASS 12'h003
|
||||
`define VX_DCR_BASE_STATE_END 12'h004
|
||||
`define VX_DCR_BASE_STARTUP_ARG0 12'h003
|
||||
`define VX_DCR_BASE_STARTUP_ARG1 12'h004
|
||||
`define VX_DCR_BASE_MPM_CLASS 12'h005
|
||||
`define VX_DCR_BASE_STATE_END 12'h006
|
||||
|
||||
`define VX_DCR_BASE_STATE(addr) ((addr) - `VX_DCR_BASE_STATE_BEGIN)
|
||||
`define VX_DCR_BASE_STATE_COUNT (`VX_DCR_BASE_STATE_END-`VX_DCR_BASE_STATE_BEGIN)
|
||||
|
||||
// Machine Performance-monitoring counters classes
|
||||
// Machine Performance-monitoring counters classes ////////////////////////////
|
||||
|
||||
`define VX_DCR_MPM_CLASS_NONE 0
|
||||
`define VX_DCR_MPM_CLASS_NONE 0
|
||||
`define VX_DCR_MPM_CLASS_CORE 1
|
||||
`define VX_DCR_MPM_CLASS_MEM 2
|
||||
|
||||
// User Floating-Point CSRs
|
||||
// User Floating-Point CSRs ///////////////////////////////////////////////////
|
||||
|
||||
`define VX_CSR_FFLAGS 12'h001
|
||||
`define VX_CSR_FRM 12'h002
|
||||
`define VX_CSR_FCSR 12'h003
|
||||
|
||||
|
||||
`define VX_CSR_SATP 12'h180
|
||||
|
||||
`define VX_CSR_PMPCFG0 12'h3A0
|
||||
|
@ -52,7 +54,9 @@
|
|||
`define VX_CSR_MIE 12'h304
|
||||
`define VX_CSR_MTVEC 12'h305
|
||||
|
||||
`define VX_CSR_MSCRATCH 12'h340
|
||||
`define VX_CSR_MEPC 12'h341
|
||||
`define VX_CSR_MCAUSE 12'h342
|
||||
|
||||
`define VX_CSR_MNSTATUS 12'h744
|
||||
|
||||
|
@ -61,14 +65,17 @@
|
|||
`define VX_CSR_MPM_USER 12'hB03
|
||||
`define VX_CSR_MPM_USER_H 12'hB83
|
||||
|
||||
// Machine Performance-monitoring core counters
|
||||
// PERF: Standard
|
||||
// Machine Performance-monitoring core counters (Standard) ////////////////////
|
||||
|
||||
`define VX_CSR_MCYCLE 12'hB00
|
||||
`define VX_CSR_MCYCLE_H 12'hB80
|
||||
`define VX_CSR_MPM_RESERVED 12'hB01
|
||||
`define VX_CSR_MPM_RESERVED_H 12'hB81
|
||||
`define VX_CSR_MINSTRET 12'hB02
|
||||
`define VX_CSR_MINSTRET_H 12'hB82
|
||||
|
||||
// Machine Performance-monitoring core counters (class 1) /////////////////////
|
||||
|
||||
// PERF: pipeline
|
||||
`define VX_CSR_MPM_SCHED_ID 12'hB03
|
||||
`define VX_CSR_MPM_SCHED_ID_H 12'hB83
|
||||
|
@ -78,32 +85,34 @@
|
|||
`define VX_CSR_MPM_IBUF_ST_H 12'hB85
|
||||
`define VX_CSR_MPM_SCRB_ST 12'hB06
|
||||
`define VX_CSR_MPM_SCRB_ST_H 12'hB86
|
||||
`define VX_CSR_MPM_SCRB_ALU 12'hB07
|
||||
`define VX_CSR_MPM_SCRB_ALU_H 12'hB87
|
||||
`define VX_CSR_MPM_SCRB_FPU 12'hB08
|
||||
`define VX_CSR_MPM_SCRB_FPU_H 12'hB88
|
||||
`define VX_CSR_MPM_SCRB_LSU 12'hB09
|
||||
`define VX_CSR_MPM_SCRB_LSU_H 12'hB89
|
||||
`define VX_CSR_MPM_SCRB_SFU 12'hB0A
|
||||
`define VX_CSR_MPM_SCRB_SFU_H 12'hB8A
|
||||
`define VX_CSR_MPM_OPDS_ST 12'hB07
|
||||
`define VX_CSR_MPM_OPDS_ST_H 12'hB87
|
||||
`define VX_CSR_MPM_SCRB_ALU 12'hB08
|
||||
`define VX_CSR_MPM_SCRB_ALU_H 12'hB88
|
||||
`define VX_CSR_MPM_SCRB_FPU 12'hB09
|
||||
`define VX_CSR_MPM_SCRB_FPU_H 12'hB89
|
||||
`define VX_CSR_MPM_SCRB_LSU 12'hB0A
|
||||
`define VX_CSR_MPM_SCRB_LSU_H 12'hB8A
|
||||
`define VX_CSR_MPM_SCRB_SFU 12'hB0B
|
||||
`define VX_CSR_MPM_SCRB_SFU_H 12'hB8B
|
||||
`define VX_CSR_MPM_SCRB_CSRS 12'hB0C
|
||||
`define VX_CSR_MPM_SCRB_CSRS_H 12'hB8C
|
||||
`define VX_CSR_MPM_SCRB_WCTL 12'hB0D
|
||||
`define VX_CSR_MPM_SCRB_WCTL_H 12'hB8D
|
||||
// PERF: memory
|
||||
`define VX_CSR_MPM_IFETCHES 12'hB0B
|
||||
`define VX_CSR_MPM_IFETCHES_H 12'hB8B
|
||||
`define VX_CSR_MPM_LOADS 12'hB0C
|
||||
`define VX_CSR_MPM_LOADS_H 12'hB8C
|
||||
`define VX_CSR_MPM_STORES 12'hB0D
|
||||
`define VX_CSR_MPM_STORES_H 12'hB8D
|
||||
`define VX_CSR_MPM_IFETCH_LT 12'hB0E
|
||||
`define VX_CSR_MPM_IFETCH_LT_H 12'hB8E
|
||||
`define VX_CSR_MPM_LOAD_LT 12'hB0F
|
||||
`define VX_CSR_MPM_LOAD_LT_H 12'hB8F
|
||||
// SFU: scoreboard
|
||||
`define VX_CSR_MPM_SCRB_WCTL 12'hB10
|
||||
`define VX_CSR_MPM_SCRB_WCTL_H 12'hB90
|
||||
`define VX_CSR_MPM_SCRB_CSRS 12'hB11
|
||||
`define VX_CSR_MPM_SCRB_CSRS_H 12'hB91
|
||||
`define VX_CSR_MPM_IFETCHES 12'hB0E
|
||||
`define VX_CSR_MPM_IFETCHES_H 12'hB8E
|
||||
`define VX_CSR_MPM_LOADS 12'hB0F
|
||||
`define VX_CSR_MPM_LOADS_H 12'hB8F
|
||||
`define VX_CSR_MPM_STORES 12'hB10
|
||||
`define VX_CSR_MPM_STORES_H 12'hB90
|
||||
`define VX_CSR_MPM_IFETCH_LT 12'hB11
|
||||
`define VX_CSR_MPM_IFETCH_LT_H 12'hB91
|
||||
`define VX_CSR_MPM_LOAD_LT 12'hB12
|
||||
`define VX_CSR_MPM_LOAD_LT_H 12'hB92
|
||||
|
||||
// Machine Performance-monitoring memory counters (class 2) ///////////////////
|
||||
|
||||
// Machine Performance-monitoring memory counters
|
||||
// PERF: icache
|
||||
`define VX_CSR_MPM_ICACHE_READS 12'hB03 // total reads
|
||||
`define VX_CSR_MPM_ICACHE_READS_H 12'hB83
|
||||
|
@ -157,15 +166,18 @@
|
|||
`define VX_CSR_MPM_MEM_WRITES_H 12'hB99
|
||||
`define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency
|
||||
`define VX_CSR_MPM_MEM_LT_H 12'hB9A
|
||||
// PERF: smem
|
||||
`define VX_CSR_MPM_SMEM_READS 12'hB1B // memory reads
|
||||
`define VX_CSR_MPM_SMEM_READS_H 12'hB9B
|
||||
`define VX_CSR_MPM_SMEM_WRITES 12'hB1C // memory writes
|
||||
`define VX_CSR_MPM_SMEM_WRITES_H 12'hB9C
|
||||
`define VX_CSR_MPM_SMEM_BANK_ST 12'hB1D // bank conflicts
|
||||
`define VX_CSR_MPM_SMEM_BANK_ST_H 12'hB9D
|
||||
// PERF: lmem
|
||||
`define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads
|
||||
`define VX_CSR_MPM_LMEM_READS_H 12'hB9B
|
||||
`define VX_CSR_MPM_LMEM_WRITES 12'hB1C // memory writes
|
||||
`define VX_CSR_MPM_LMEM_WRITES_H 12'hB9C
|
||||
`define VX_CSR_MPM_LMEM_BANK_ST 12'hB1D // bank conflicts
|
||||
`define VX_CSR_MPM_LMEM_BANK_ST_H 12'hB9D
|
||||
|
||||
// Machine Information Registers
|
||||
// Machine Performance-monitoring memory counters (class 3) ///////////////////
|
||||
// <Add your own counters: use addresses hB03..B1F, hB83..hB9F>
|
||||
|
||||
// Machine Information Registers //////////////////////////////////////////////
|
||||
|
||||
`define VX_CSR_MVENDORID 12'hF11
|
||||
`define VX_CSR_MARCHID 12'hF12
|
||||
|
@ -177,11 +189,12 @@
|
|||
`define VX_CSR_THREAD_ID 12'hCC0
|
||||
`define VX_CSR_WARP_ID 12'hCC1
|
||||
`define VX_CSR_CORE_ID 12'hCC2
|
||||
`define VX_CSR_WARP_MASK 12'hCC3
|
||||
`define VX_CSR_THREAD_MASK 12'hCC4 // warning! this value is also used in LLVM
|
||||
`define VX_CSR_ACTIVE_WARPS 12'hCC3
|
||||
`define VX_CSR_ACTIVE_THREADS 12'hCC4 // warning! this value is also used in LLVM
|
||||
|
||||
`define VX_CSR_NUM_THREADS 12'hFC0
|
||||
`define VX_CSR_NUM_WARPS 12'hFC1
|
||||
`define VX_CSR_NUM_CORES 12'hFC2
|
||||
`define VX_CSR_LOCAL_MEM_BASE 12'hFC3
|
||||
|
||||
`endif // VX_TYPES_VH
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -29,8 +29,8 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
output wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
@ -44,11 +44,17 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_cluster = 0;
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS);
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_if();
|
||||
assign mem_perf_if.icache = 'x;
|
||||
assign mem_perf_if.dcache = 'x;
|
||||
assign mem_perf_if.l2cache = 'x;
|
||||
assign mem_perf_if.lmem = 'x;
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if #(
|
||||
|
@ -74,12 +80,14 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
.CRSQ_SIZE (`L3_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L3_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L3_MREQ_SIZE),
|
||||
.MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_REG (2),
|
||||
.MEM_OUT_REG (2),
|
||||
.WRITEBACK (`L3_WRITEBACK),
|
||||
.DIRTY_BYTES (`L3_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
.NC_ENABLE (1),
|
||||
.PASSTHRU (!`L3_ENABLED)
|
||||
) l3cache (
|
||||
|
@ -101,6 +109,7 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
`UNUSED_VAR (mem_bus_if.req_data.atype)
|
||||
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_data.data = mem_rsp_data;
|
||||
|
@ -112,15 +121,6 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
`UNUSED_VAR (mem_req_fire)
|
||||
`UNUSED_VAR (mem_rsp_fire)
|
||||
|
||||
wire sim_ebreak /* verilator public */;
|
||||
wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value /* verilator public */;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_sim_ebreak;
|
||||
wire [`NUM_CLUSTERS-1:0][`NUM_REGS-1:0][`XLEN-1:0] per_cluster_sim_wb_value;
|
||||
assign sim_ebreak = per_cluster_sim_ebreak[0];
|
||||
assign sim_wb_value = per_cluster_sim_wb_value[0];
|
||||
`UNUSED_VAR (per_cluster_sim_ebreak)
|
||||
`UNUSED_VAR (per_cluster_sim_wb_value)
|
||||
|
||||
VX_dcr_bus_if dcr_bus_if();
|
||||
assign dcr_bus_if.write_valid = dcr_wr_valid;
|
||||
assign dcr_bus_if.write_addr = dcr_wr_addr;
|
||||
|
@ -128,19 +128,19 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS)
|
||||
|
||||
// Generate all clusters
|
||||
for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin
|
||||
for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters
|
||||
|
||||
`RESET_RELAY (cluster_reset, reset);
|
||||
|
||||
VX_dcr_bus_if cluster_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
|
||||
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID (i)
|
||||
.CLUSTER_ID (cluster_id),
|
||||
.INSTANCE_ID ($sformatf("cluster%0d", cluster_id))
|
||||
) cluster (
|
||||
`SCOPE_IO_BIND (i)
|
||||
`SCOPE_IO_BIND (scope_cluster + cluster_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (cluster_reset),
|
||||
|
@ -148,15 +148,12 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
`endif
|
||||
|
||||
|
||||
.dcr_bus_if (cluster_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_cluster_mem_bus_if[i]),
|
||||
.mem_bus_if (per_cluster_mem_bus_if[cluster_id]),
|
||||
|
||||
.sim_ebreak (per_cluster_sim_ebreak[i]),
|
||||
.sim_wb_value (per_cluster_sim_wb_value[i]),
|
||||
|
||||
.busy (per_cluster_busy[i])
|
||||
.busy (per_cluster_busy[cluster_id])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -164,14 +161,14 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads;
|
||||
mem_perf_t mem_perf;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_mem_pending_reads <= '0;
|
||||
end else begin
|
||||
perf_mem_pending_reads <= $signed(perf_mem_pending_reads) +
|
||||
perf_mem_pending_reads <= $signed(perf_mem_pending_reads) +
|
||||
`PERF_CTR_BITS'($signed(2'(mem_req_fire && ~mem_bus_if.req_data.rw) - 2'(mem_rsp_fire)));
|
||||
end
|
||||
end
|
||||
|
@ -180,7 +177,7 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
wire mem_wr_req_fire = mem_req_fire && mem_bus_if.req_data.rw;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
if (reset) begin
|
||||
mem_perf <= '0;
|
||||
end else begin
|
||||
mem_perf.reads <= mem_perf.reads + `PERF_CTR_BITS'(mem_rd_req_fire);
|
||||
|
@ -189,19 +186,19 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
end
|
||||
end
|
||||
assign mem_perf_if.mem = mem_perf;
|
||||
|
||||
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_MEM
|
||||
`ifdef DBG_TRACE_MEM
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw)
|
||||
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data));
|
||||
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data));
|
||||
else
|
||||
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen));
|
||||
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: MEM Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag, mem_rsp_data));
|
||||
`TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,8 +14,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module Vortex_axi import VX_gpu_pkg::*; #(
|
||||
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = `XLEN,
|
||||
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH,
|
||||
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
|
||||
parameter AXI_NUM_BANKS = 1
|
||||
)(
|
||||
|
@ -25,7 +25,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// AXI write request address channel
|
||||
// AXI write request address channel
|
||||
output wire m_axi_awvalid [AXI_NUM_BANKS],
|
||||
input wire m_axi_awready [AXI_NUM_BANKS],
|
||||
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr [AXI_NUM_BANKS],
|
||||
|
@ -39,19 +39,19 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
output wire [3:0] m_axi_awqos [AXI_NUM_BANKS],
|
||||
output wire [3:0] m_axi_awregion [AXI_NUM_BANKS],
|
||||
|
||||
// AXI write request data channel
|
||||
output wire m_axi_wvalid [AXI_NUM_BANKS],
|
||||
// AXI write request data channel
|
||||
output wire m_axi_wvalid [AXI_NUM_BANKS],
|
||||
input wire m_axi_wready [AXI_NUM_BANKS],
|
||||
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata [AXI_NUM_BANKS],
|
||||
output wire [AXI_DATA_WIDTH/8-1:0] m_axi_wstrb [AXI_NUM_BANKS],
|
||||
output wire m_axi_wlast [AXI_NUM_BANKS],
|
||||
output wire [AXI_DATA_WIDTH/8-1:0] m_axi_wstrb [AXI_NUM_BANKS],
|
||||
output wire m_axi_wlast [AXI_NUM_BANKS],
|
||||
|
||||
// AXI write response channel
|
||||
input wire m_axi_bvalid [AXI_NUM_BANKS],
|
||||
output wire m_axi_bready [AXI_NUM_BANKS],
|
||||
input wire [AXI_TID_WIDTH-1:0] m_axi_bid [AXI_NUM_BANKS],
|
||||
input wire [1:0] m_axi_bresp [AXI_NUM_BANKS],
|
||||
|
||||
|
||||
// AXI read request channel
|
||||
output wire m_axi_arvalid [AXI_NUM_BANKS],
|
||||
input wire m_axi_arready [AXI_NUM_BANKS],
|
||||
|
@ -59,13 +59,13 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
output wire [AXI_TID_WIDTH-1:0] m_axi_arid [AXI_NUM_BANKS],
|
||||
output wire [7:0] m_axi_arlen [AXI_NUM_BANKS],
|
||||
output wire [2:0] m_axi_arsize [AXI_NUM_BANKS],
|
||||
output wire [1:0] m_axi_arburst [AXI_NUM_BANKS],
|
||||
output wire [1:0] m_axi_arlock [AXI_NUM_BANKS],
|
||||
output wire [1:0] m_axi_arburst [AXI_NUM_BANKS],
|
||||
output wire [1:0] m_axi_arlock [AXI_NUM_BANKS],
|
||||
output wire [3:0] m_axi_arcache [AXI_NUM_BANKS],
|
||||
output wire [2:0] m_axi_arprot [AXI_NUM_BANKS],
|
||||
output wire [3:0] m_axi_arqos [AXI_NUM_BANKS],
|
||||
output wire [2:0] m_axi_arprot [AXI_NUM_BANKS],
|
||||
output wire [3:0] m_axi_arqos [AXI_NUM_BANKS],
|
||||
output wire [3:0] m_axi_arregion [AXI_NUM_BANKS],
|
||||
|
||||
|
||||
// AXI read response channel
|
||||
input wire m_axi_rvalid [AXI_NUM_BANKS],
|
||||
output wire m_axi_rready [AXI_NUM_BANKS],
|
||||
|
@ -73,7 +73,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
input wire m_axi_rlast [AXI_NUM_BANKS],
|
||||
input wire [AXI_TID_WIDTH-1:0] m_axi_rid [AXI_NUM_BANKS],
|
||||
input wire [1:0] m_axi_rresp [AXI_NUM_BANKS],
|
||||
|
||||
|
||||
// DCR write request
|
||||
input wire dcr_wr_valid,
|
||||
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
|
||||
|
@ -83,35 +83,35 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
output wire busy
|
||||
);
|
||||
`STATIC_ASSERT((AXI_DATA_WIDTH == `VX_MEM_DATA_WIDTH), ("invalid memory data size: current=%0d, expected=%0d", AXI_DATA_WIDTH, `VX_MEM_DATA_WIDTH))
|
||||
`STATIC_ASSERT((AXI_ADDR_WIDTH >= `XLEN), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH))
|
||||
`STATIC_ASSERT((AXI_ADDR_WIDTH >= `MEM_ADDR_WIDTH), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH))
|
||||
//`STATIC_ASSERT((AXI_TID_WIDTH >= `VX_MEM_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, `VX_MEM_TAG_WIDTH))
|
||||
|
||||
|
||||
wire mem_req_valid;
|
||||
wire mem_req_rw;
|
||||
wire mem_req_rw;
|
||||
wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen;
|
||||
wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr;
|
||||
wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data;
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_ready;
|
||||
|
||||
wire mem_rsp_valid;
|
||||
wire mem_rsp_valid;
|
||||
wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data;
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
wire [`XLEN-1:0] m_axi_awaddr_unqual [AXI_NUM_BANKS];
|
||||
wire [`XLEN-1:0] m_axi_araddr_unqual [AXI_NUM_BANKS];
|
||||
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_awaddr_unqual [AXI_NUM_BANKS];
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_araddr_unqual [AXI_NUM_BANKS];
|
||||
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_awid_unqual [AXI_NUM_BANKS];
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_arid_unqual [AXI_NUM_BANKS];
|
||||
|
||||
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual [AXI_NUM_BANKS];
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual [AXI_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin
|
||||
assign m_axi_awaddr[i] = `XLEN'(m_axi_awaddr_unqual[i]);
|
||||
assign m_axi_araddr[i] = `XLEN'(m_axi_araddr_unqual[i]);
|
||||
|
||||
assign m_axi_awaddr[i] = `MEM_ADDR_WIDTH'(m_axi_awaddr_unqual[i]);
|
||||
assign m_axi_araddr[i] = `MEM_ADDR_WIDTH'(m_axi_araddr_unqual[i]);
|
||||
|
||||
assign m_axi_awid[i] = AXI_TID_WIDTH'(m_axi_awid_unqual[i]);
|
||||
assign m_axi_arid[i] = AXI_TID_WIDTH'(m_axi_arid_unqual[i]);
|
||||
|
||||
|
@ -120,11 +120,11 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
VX_axi_adapter #(
|
||||
.DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`XLEN),
|
||||
.DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`VX_MEM_TAG_WIDTH),
|
||||
.NUM_BANKS (AXI_NUM_BANKS),
|
||||
.OUT_REG_RSP((AXI_NUM_BANKS > 1) ? 2 : 0)
|
||||
.RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0)
|
||||
) axi_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -141,18 +141,18 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
.mem_rsp_data (mem_rsp_data),
|
||||
.mem_rsp_tag (mem_rsp_tag),
|
||||
.mem_rsp_ready (mem_rsp_ready),
|
||||
|
||||
|
||||
.m_axi_awvalid (m_axi_awvalid),
|
||||
.m_axi_awready (m_axi_awready),
|
||||
.m_axi_awaddr (m_axi_awaddr_unqual),
|
||||
.m_axi_awid (m_axi_awid_unqual),
|
||||
.m_axi_awlen (m_axi_awlen),
|
||||
.m_axi_awsize (m_axi_awsize),
|
||||
.m_axi_awburst (m_axi_awburst),
|
||||
.m_axi_awlock (m_axi_awlock),
|
||||
.m_axi_awburst (m_axi_awburst),
|
||||
.m_axi_awlock (m_axi_awlock),
|
||||
.m_axi_awcache (m_axi_awcache),
|
||||
.m_axi_awprot (m_axi_awprot),
|
||||
.m_axi_awqos (m_axi_awqos),
|
||||
.m_axi_awprot (m_axi_awprot),
|
||||
.m_axi_awqos (m_axi_awqos),
|
||||
.m_axi_awregion (m_axi_awregion),
|
||||
|
||||
.m_axi_wvalid (m_axi_wvalid),
|
||||
|
@ -160,35 +160,35 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
.m_axi_wdata (m_axi_wdata),
|
||||
.m_axi_wstrb (m_axi_wstrb),
|
||||
.m_axi_wlast (m_axi_wlast),
|
||||
|
||||
|
||||
.m_axi_bvalid (m_axi_bvalid),
|
||||
.m_axi_bready (m_axi_bready),
|
||||
.m_axi_bid (m_axi_bid_unqual),
|
||||
.m_axi_bresp (m_axi_bresp),
|
||||
|
||||
|
||||
.m_axi_arvalid (m_axi_arvalid),
|
||||
.m_axi_arready (m_axi_arready),
|
||||
.m_axi_araddr (m_axi_araddr_unqual),
|
||||
.m_axi_arid (m_axi_arid_unqual),
|
||||
.m_axi_arid (m_axi_arid_unqual),
|
||||
.m_axi_arlen (m_axi_arlen),
|
||||
.m_axi_arsize (m_axi_arsize),
|
||||
.m_axi_arburst (m_axi_arburst),
|
||||
.m_axi_arlock (m_axi_arlock),
|
||||
.m_axi_arburst (m_axi_arburst),
|
||||
.m_axi_arlock (m_axi_arlock),
|
||||
.m_axi_arcache (m_axi_arcache),
|
||||
.m_axi_arprot (m_axi_arprot),
|
||||
.m_axi_arprot (m_axi_arprot),
|
||||
.m_axi_arqos (m_axi_arqos),
|
||||
.m_axi_arregion (m_axi_arregion),
|
||||
|
||||
.m_axi_arregion (m_axi_arregion),
|
||||
|
||||
.m_axi_rvalid (m_axi_rvalid),
|
||||
.m_axi_rready (m_axi_rready),
|
||||
.m_axi_rdata (m_axi_rdata),
|
||||
.m_axi_rlast (m_axi_rlast) ,
|
||||
.m_axi_rdata (m_axi_rdata),
|
||||
.m_axi_rlast (m_axi_rlast) ,
|
||||
.m_axi_rid (m_axi_rid_unqual),
|
||||
.m_axi_rresp (m_axi_rresp)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (1)
|
||||
|
||||
|
||||
Vortex vortex (
|
||||
`SCOPE_IO_BIND (0)
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
// To be done:
|
||||
// Check how to run this with OPAE. Looks like setup issue
|
||||
|
||||
`ifndef NOPAE
|
||||
|
||||
`include "platform_if.vh"
|
||||
|
||||
|
@ -85,7 +86,7 @@ module ccip_std_afu #(
|
|||
t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS];
|
||||
t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS];
|
||||
logic avs_write [NUM_LOCAL_MEM_BANKS];
|
||||
logic avs_read [NUM_LOCAL_MEM_BANKS];
|
||||
logic avs_read [NUM_LOCAL_MEM_BANKS];
|
||||
|
||||
for (genvar b = 0; b < NUM_LOCAL_MEM_BANKS; b++) begin
|
||||
assign local_mem[b].burstcount = avs_burstcount[b];
|
||||
|
@ -94,7 +95,7 @@ module ccip_std_afu #(
|
|||
assign local_mem[b].byteenable = avs_byteenable[b];
|
||||
assign local_mem[b].write = avs_write[b];
|
||||
assign local_mem[b].read = avs_read[b];
|
||||
|
||||
|
||||
assign avs_waitrequest[b] = local_mem[b].waitrequest;
|
||||
assign avs_readdata[b] = local_mem[b].readdata;
|
||||
assign avs_readdatavalid[b] = local_mem[b].readdatavalid;
|
||||
|
@ -107,7 +108,7 @@ module ccip_std_afu #(
|
|||
.reset (reset_T1),
|
||||
|
||||
.cp2af_sRxPort (cp2af_sRx_T1),
|
||||
.af2cp_sTxPort (af2cp_sTx_T0),
|
||||
.af2cp_sTxPort (af2cp_sTx_T0),
|
||||
|
||||
.avs_writedata (avs_writedata),
|
||||
.avs_readdata (avs_readdata),
|
||||
|
@ -121,3 +122,5 @@ module ccip_std_afu #(
|
|||
);
|
||||
|
||||
endmodule
|
||||
|
||||
`endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -62,7 +62,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
localparam CCI_RW_PENDING_SIZE= 256;
|
||||
|
||||
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
|
||||
localparam AFU_ID_H = 16'h0004; // AFU ID Higher
|
||||
localparam AFU_ID_H = 16'h0004; // AFU ID Higher
|
||||
|
||||
localparam CMD_MEM_READ = `AFU_IMAGE_CMD_MEM_READ;
|
||||
localparam CMD_MEM_WRITE = `AFU_IMAGE_CMD_MEM_WRITE;
|
||||
|
@ -70,15 +70,15 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
localparam CMD_RUN = `AFU_IMAGE_CMD_RUN;
|
||||
localparam CMD_TYPE_WIDTH = `CLOG2(`AFU_IMAGE_CMD_MAX_VALUE+1);
|
||||
|
||||
localparam MMIO_CMD_TYPE = `AFU_IMAGE_MMIO_CMD_TYPE;
|
||||
localparam MMIO_CMD_TYPE = `AFU_IMAGE_MMIO_CMD_TYPE;
|
||||
localparam MMIO_CMD_ARG0 = `AFU_IMAGE_MMIO_CMD_ARG0;
|
||||
localparam MMIO_CMD_ARG1 = `AFU_IMAGE_MMIO_CMD_ARG1;
|
||||
localparam MMIO_CMD_ARG2 = `AFU_IMAGE_MMIO_CMD_ARG2;
|
||||
localparam MMIO_STATUS = `AFU_IMAGE_MMIO_STATUS;
|
||||
|
||||
localparam COUT_TID_WIDTH = `CLOG2(`VX_MEM_BYTEEN_WIDTH);
|
||||
localparam COUT_TID_WIDTH = `CLOG2(`VX_MEM_BYTEEN_WIDTH);
|
||||
localparam COUT_QUEUE_DATAW = COUT_TID_WIDTH + 8;
|
||||
localparam COUT_QUEUE_SIZE = 64;
|
||||
localparam COUT_QUEUE_SIZE = 64;
|
||||
|
||||
localparam MMIO_DEV_CAPS = `AFU_IMAGE_MMIO_DEV_CAPS;
|
||||
localparam MMIO_ISA_CAPS = `AFU_IMAGE_MMIO_ISA_CAPS;
|
||||
|
@ -97,14 +97,14 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
wire [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
wire [63:0] dev_caps = {16'b0,
|
||||
8'(`SM_ENABLED ? `SMEM_LOG_SIZE : 0),
|
||||
16'(`NUM_CORES * `NUM_CLUSTERS),
|
||||
8'(`NUM_WARPS),
|
||||
8'(`NUM_THREADS),
|
||||
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
|
||||
16'(`NUM_CORES * `NUM_CLUSTERS),
|
||||
8'(`NUM_WARPS),
|
||||
8'(`NUM_THREADS),
|
||||
8'(`IMPLEMENTATION_ID)};
|
||||
|
||||
wire [63:0] isa_caps = {32'(`MISA_EXT),
|
||||
2'(`CLOG2(`XLEN)-4),
|
||||
wire [63:0] isa_caps = {32'(`MISA_EXT),
|
||||
2'(`CLOG2(`XLEN)-4),
|
||||
30'(`MISA_STD)};
|
||||
|
||||
reg [STATE_WIDTH-1:0] state;
|
||||
|
@ -161,7 +161,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
reg scope_bus_in;
|
||||
wire scope_bus_out;
|
||||
|
||||
|
||||
reg [5:0] scope_bus_ctr;
|
||||
|
||||
wire scope_reset = reset;
|
||||
|
@ -177,8 +177,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
scope_bus_ctr <= 63;
|
||||
end
|
||||
scope_bus_in <= 0;
|
||||
if (cp2af_sRxPort.c0.mmioWrValid
|
||||
&& (MMIO_SCOPE_WRITE == mmio_hdr.address)) begin
|
||||
if (cp2af_sRxPort.c0.mmioWrValid
|
||||
&& (MMIO_SCOPE_WRITE == mmio_hdr.address)) begin
|
||||
cmd_scope_wdata <= 64'(cp2af_sRxPort.c0.data);
|
||||
cmd_scope_writing <= 1;
|
||||
scope_bus_ctr <= 63;
|
||||
|
@ -191,7 +191,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_writing <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
if (cmd_scope_reading) begin
|
||||
cmd_scope_rdata <= {cmd_scope_rdata[62:0], scope_bus_out};
|
||||
scope_bus_ctr <= scope_bus_ctr - 1;
|
||||
|
@ -211,7 +211,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
// disable assertions until full reset
|
||||
reg [`CLOG2(`RESET_DELAY+1)-1:0] assert_delay_ctr;
|
||||
initial begin
|
||||
$assertoff;
|
||||
$assertoff;
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -231,22 +231,22 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
mmio_tx.mmioRdValid <= 0;
|
||||
mmio_tx.hdr <= '0;
|
||||
end else begin
|
||||
mmio_tx.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid;
|
||||
mmio_tx.hdr.tid <= mmio_hdr.tid;
|
||||
mmio_tx.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid;
|
||||
mmio_tx.hdr.tid <= mmio_hdr.tid;
|
||||
end
|
||||
// serve MMIO write request
|
||||
if (cp2af_sRxPort.c0.mmioWrValid) begin
|
||||
case (mmio_hdr.address)
|
||||
MMIO_CMD_ARG0: begin
|
||||
cmd_args[0] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%0h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_ARG1: begin
|
||||
cmd_args[1] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%0h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_ARG2: begin
|
||||
|
@ -263,13 +263,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
`ifdef SCOPE
|
||||
MMIO_SCOPE_WRITE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%0h\n", $time, cmd_scope_wdata));
|
||||
`TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata));
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%0h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)));
|
||||
`TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
|
@ -284,7 +284,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
8'b0, // reserved
|
||||
4'b0, // afu minor revision = 0
|
||||
7'b0, // reserved
|
||||
1'b1, // end of DFH list = 1
|
||||
1'b1, // end of DFH list = 1
|
||||
24'b0, // next DFH offset = 0
|
||||
4'b0, // afu major revision = 0
|
||||
12'b0 // feature ID = 0
|
||||
|
@ -305,16 +305,16 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
MMIO_SCOPE_READ: begin
|
||||
mmio_tx.data <= cmd_scope_rdata;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%0h\n", $time, cmd_scope_rdata));
|
||||
`TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata));
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
MMIO_DEV_CAPS: begin
|
||||
mmio_tx.data <= dev_caps;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%0h\n", $time, dev_caps));
|
||||
`TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps));
|
||||
`endif
|
||||
end
|
||||
end
|
||||
MMIO_ISA_CAPS: begin
|
||||
mmio_tx.data <= isa_caps;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
|
@ -352,41 +352,41 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
end
|
||||
|
||||
wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address);
|
||||
wire [CMD_TYPE_WIDTH-1:0] cmd_type = is_mmio_wr_cmd ?
|
||||
wire [CMD_TYPE_WIDTH-1:0] cmd_type = is_mmio_wr_cmd ?
|
||||
CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(0);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
vx_busy_wait <= 0;
|
||||
vx_running <= 0;
|
||||
vx_running <= 0;
|
||||
end else begin
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
STATE_IDLE: begin
|
||||
case (cmd_type)
|
||||
CMD_MEM_READ: begin
|
||||
CMD_MEM_READ: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size));
|
||||
`endif
|
||||
state <= STATE_MEM_READ;
|
||||
end
|
||||
CMD_MEM_WRITE: begin
|
||||
state <= STATE_MEM_READ;
|
||||
end
|
||||
CMD_MEM_WRITE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size));
|
||||
`endif
|
||||
state <= STATE_MEM_WRITE;
|
||||
end
|
||||
CMD_DCR_WRITE: begin
|
||||
CMD_DCR_WRITE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data));
|
||||
`endif
|
||||
state <= STATE_DCR_WRITE;
|
||||
end
|
||||
CMD_RUN: begin
|
||||
CMD_RUN: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE RUN\n", $time));
|
||||
`endif
|
||||
state <= STATE_RUN;
|
||||
`endif
|
||||
state <= STATE_RUN;
|
||||
vx_running <= 0;
|
||||
end
|
||||
default: begin
|
||||
|
@ -425,7 +425,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
end
|
||||
end else begin
|
||||
// wait until the gpu is not busy
|
||||
if (~vx_busy) begin
|
||||
if (~vx_busy) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: AFU: End execution\n", $time));
|
||||
|
@ -441,8 +441,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
`endif
|
||||
vx_running <= 1;
|
||||
vx_busy_wait <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
|
@ -462,7 +462,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
wire [CCI_ADDR_WIDTH-1:0] cci_mem_req_tag;
|
||||
wire cci_mem_req_ready;
|
||||
|
||||
wire cci_mem_rsp_valid;
|
||||
wire cci_mem_rsp_valid;
|
||||
wire [CCI_DATA_WIDTH-1:0] cci_mem_rsp_data;
|
||||
wire [CCI_ADDR_WIDTH-1:0] cci_mem_rsp_tag;
|
||||
wire cci_mem_rsp_ready;
|
||||
|
@ -475,30 +475,32 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.TAG_WIDTH (AVS_REQ_TAGW)
|
||||
) cci_vx_mem_bus_if[2]();
|
||||
|
||||
`RESET_RELAY (cci_adapter_reset, reset);
|
||||
|
||||
VX_mem_adapter #(
|
||||
.SRC_DATA_WIDTH (CCI_DATA_WIDTH),
|
||||
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
.SRC_ADDR_WIDTH (CCI_ADDR_WIDTH),
|
||||
.DST_ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.SRC_DATA_WIDTH (CCI_DATA_WIDTH),
|
||||
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
.SRC_ADDR_WIDTH (CCI_ADDR_WIDTH),
|
||||
.DST_ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.SRC_TAG_WIDTH (CCI_ADDR_WIDTH),
|
||||
.DST_TAG_WIDTH (AVS_REQ_TAGW),
|
||||
.OUT_REG_REQ (0),
|
||||
.OUT_REG_RSP (0)
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (0)
|
||||
) cci_mem_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (cci_adapter_reset),
|
||||
|
||||
.mem_req_valid_in (cci_mem_req_valid),
|
||||
.mem_req_addr_in (cci_mem_req_addr),
|
||||
.mem_req_rw_in (cci_mem_req_rw),
|
||||
.mem_req_byteen_in ({CCI_DATA_SIZE{1'b1}}),
|
||||
.mem_req_data_in (cci_mem_req_data),
|
||||
.mem_req_tag_in (cci_mem_req_tag),
|
||||
.mem_req_ready_in (cci_mem_req_ready),
|
||||
.mem_req_tag_in (cci_mem_req_tag),
|
||||
.mem_req_ready_in (cci_mem_req_ready),
|
||||
|
||||
.mem_rsp_valid_in (cci_mem_rsp_valid),
|
||||
.mem_rsp_data_in (cci_mem_rsp_data),
|
||||
.mem_rsp_tag_in (cci_mem_rsp_tag),
|
||||
.mem_rsp_valid_in (cci_mem_rsp_valid),
|
||||
.mem_rsp_data_in (cci_mem_rsp_data),
|
||||
.mem_rsp_tag_in (cci_mem_rsp_tag),
|
||||
.mem_rsp_ready_in (cci_mem_rsp_ready),
|
||||
|
||||
.mem_req_valid_out (cci_vx_mem_bus_if[1].req_valid),
|
||||
|
@ -507,14 +509,17 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.mem_req_byteen_out (cci_vx_mem_bus_if[1].req_data.byteen),
|
||||
.mem_req_data_out (cci_vx_mem_bus_if[1].req_data.data),
|
||||
.mem_req_tag_out (cci_vx_mem_bus_if[1].req_data.tag),
|
||||
.mem_req_ready_out (cci_vx_mem_bus_if[1].req_ready),
|
||||
.mem_req_ready_out (cci_vx_mem_bus_if[1].req_ready),
|
||||
|
||||
.mem_rsp_valid_out (cci_vx_mem_bus_if[1].rsp_valid),
|
||||
.mem_rsp_data_out (cci_vx_mem_bus_if[1].rsp_data.data),
|
||||
.mem_rsp_tag_out (cci_vx_mem_bus_if[1].rsp_data.tag),
|
||||
.mem_rsp_valid_out (cci_vx_mem_bus_if[1].rsp_valid),
|
||||
.mem_rsp_data_out (cci_vx_mem_bus_if[1].rsp_data.data),
|
||||
.mem_rsp_tag_out (cci_vx_mem_bus_if[1].rsp_data.tag),
|
||||
.mem_rsp_ready_out (cci_vx_mem_bus_if[1].rsp_ready)
|
||||
);
|
||||
|
||||
assign cci_vx_mem_bus_if[1].req_data.atype = '0;
|
||||
`UNUSED_VAR (cci_vx_mem_bus_if[1].req_data.atype)
|
||||
|
||||
//--
|
||||
|
||||
wire vx_mem_is_cout;
|
||||
|
@ -523,30 +528,32 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
assign vx_mem_req_valid_qual = vx_mem_req_valid && ~vx_mem_is_cout;
|
||||
|
||||
`RESET_RELAY (vx_adapter_reset, reset);
|
||||
|
||||
VX_mem_adapter #(
|
||||
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
|
||||
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
|
||||
.DST_ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
|
||||
.DST_TAG_WIDTH (AVS_REQ_TAGW),
|
||||
.OUT_REG_REQ (0),
|
||||
.OUT_REG_RSP (2)
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (2)
|
||||
) vx_mem_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (vx_adapter_reset),
|
||||
|
||||
.mem_req_valid_in (vx_mem_req_valid_qual),
|
||||
.mem_req_addr_in (vx_mem_req_addr),
|
||||
.mem_req_rw_in (vx_mem_req_rw),
|
||||
.mem_req_byteen_in (vx_mem_req_byteen),
|
||||
.mem_req_data_in (vx_mem_req_data),
|
||||
.mem_req_tag_in (vx_mem_req_tag),
|
||||
.mem_req_ready_in (vx_mem_req_ready_qual),
|
||||
.mem_req_tag_in (vx_mem_req_tag),
|
||||
.mem_req_ready_in (vx_mem_req_ready_qual),
|
||||
|
||||
.mem_rsp_valid_in (vx_mem_rsp_valid),
|
||||
.mem_rsp_data_in (vx_mem_rsp_data),
|
||||
.mem_rsp_tag_in (vx_mem_rsp_tag),
|
||||
.mem_rsp_valid_in (vx_mem_rsp_valid),
|
||||
.mem_rsp_data_in (vx_mem_rsp_data),
|
||||
.mem_rsp_tag_in (vx_mem_rsp_tag),
|
||||
.mem_rsp_ready_in (vx_mem_rsp_ready),
|
||||
|
||||
.mem_req_valid_out (cci_vx_mem_bus_if[0].req_valid),
|
||||
|
@ -555,14 +562,17 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.mem_req_byteen_out (cci_vx_mem_bus_if[0].req_data.byteen),
|
||||
.mem_req_data_out (cci_vx_mem_bus_if[0].req_data.data),
|
||||
.mem_req_tag_out (cci_vx_mem_bus_if[0].req_data.tag),
|
||||
.mem_req_ready_out (cci_vx_mem_bus_if[0].req_ready),
|
||||
.mem_req_ready_out (cci_vx_mem_bus_if[0].req_ready),
|
||||
|
||||
.mem_rsp_valid_out (cci_vx_mem_bus_if[0].rsp_valid),
|
||||
.mem_rsp_data_out (cci_vx_mem_bus_if[0].rsp_data.data),
|
||||
.mem_rsp_tag_out (cci_vx_mem_bus_if[0].rsp_data.tag),
|
||||
.mem_rsp_valid_out (cci_vx_mem_bus_if[0].rsp_valid),
|
||||
.mem_rsp_data_out (cci_vx_mem_bus_if[0].rsp_data.data),
|
||||
.mem_rsp_tag_out (cci_vx_mem_bus_if[0].rsp_data.tag),
|
||||
.mem_rsp_ready_out (cci_vx_mem_bus_if[0].rsp_ready)
|
||||
);
|
||||
|
||||
assign cci_vx_mem_bus_if[0].req_data.atype = '0;
|
||||
`UNUSED_VAR (cci_vx_mem_bus_if[0].req_data.atype)
|
||||
|
||||
//--
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LMEM_DATA_SIZE),
|
||||
|
@ -570,19 +580,17 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.TAG_WIDTH (AVS_REQ_TAGW+1)
|
||||
) mem_bus_if[1]();
|
||||
|
||||
`RESET_RELAY (mem_arb_reset, reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATA_SIZE (LMEM_DATA_SIZE),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (AVS_REQ_TAGW),
|
||||
.ARBITER ("P"),
|
||||
.OUT_REG_REQ (0),
|
||||
.OUT_REG_RSP (0)
|
||||
.ARBITER ("P"), // prioritize VX requests
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (0)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_arb_reset),
|
||||
.reset (reset),
|
||||
.bus_in_if (cci_vx_mem_bus_if),
|
||||
.bus_out_if (mem_bus_if)
|
||||
);
|
||||
|
@ -592,19 +600,19 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
`RESET_RELAY (avs_adapter_reset, reset);
|
||||
|
||||
VX_avs_adapter #(
|
||||
.DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
.DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.BURST_WIDTH (LMEM_BURST_CTRW),
|
||||
.NUM_BANKS (NUM_LOCAL_MEM_BANKS),
|
||||
.TAG_WIDTH (AVS_REQ_TAGW + 1),
|
||||
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE),
|
||||
.OUT_REG_REQ (2),
|
||||
.OUT_REG_RSP (0)
|
||||
.REQ_OUT_BUF (2),
|
||||
.RSP_OUT_BUF (0)
|
||||
) avs_adapter (
|
||||
.clk (clk),
|
||||
.reset (avs_adapter_reset),
|
||||
|
||||
// Memory request
|
||||
// Memory request
|
||||
.mem_req_valid (mem_bus_if[0].req_valid),
|
||||
.mem_req_rw (mem_bus_if[0].req_data.rw),
|
||||
.mem_req_byteen (mem_bus_if[0].req_data.byteen),
|
||||
|
@ -613,7 +621,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.mem_req_tag (mem_bus_if[0].req_data.tag),
|
||||
.mem_req_ready (mem_bus_if[0].req_ready),
|
||||
|
||||
// Memory response
|
||||
// Memory response
|
||||
.mem_rsp_valid (mem_bus_if[0].rsp_valid),
|
||||
.mem_rsp_data (mem_bus_if[0].rsp_data.data),
|
||||
.mem_rsp_tag (mem_bus_if[0].rsp_data.tag),
|
||||
|
@ -631,6 +639,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.avs_readdatavalid(avs_readdatavalid)
|
||||
);
|
||||
|
||||
assign mem_bus_if[0].req_data.atype = '0;
|
||||
`UNUSED_VAR (mem_bus_if[0].req_data.atype)
|
||||
|
||||
// CCI-P Read Request ///////////////////////////////////////////////////////////
|
||||
|
||||
reg [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_ctr;
|
||||
|
@ -654,13 +665,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
always @(*) begin
|
||||
af2cp_sTxPort.c0.valid = cci_rd_req_fire;
|
||||
af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0);
|
||||
af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr;
|
||||
af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr;
|
||||
af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag);
|
||||
end
|
||||
|
||||
wire cci_mem_wr_req_fire = cci_mem_wr_req_valid && cci_mem_req_ready;
|
||||
|
||||
wire cci_rd_rsp_fire = cp2af_sRxPort.c0.rspValid
|
||||
wire cci_rd_rsp_fire = cp2af_sRxPort.c0.rspValid
|
||||
&& (cp2af_sRxPort.c0.hdr.resp_type == eRSP_RDLINE);
|
||||
|
||||
assign cci_rd_req_tag = CCI_RD_QUEUE_TAGW'(cci_rd_req_ctr);
|
||||
|
@ -672,16 +683,18 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
wire [`CLOG2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads;
|
||||
wire cci_pending_reads_full;
|
||||
VX_pending_size #(
|
||||
VX_pending_size #(
|
||||
.SIZE (CCI_RD_QUEUE_SIZE)
|
||||
) cci_rd_pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (cci_rd_req_fire),
|
||||
.decr (cci_rdq_pop),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (cci_pending_reads_full),
|
||||
.size (cci_pending_reads),
|
||||
`UNUSED_PIN (empty)
|
||||
`UNUSED_PIN (alm_full),
|
||||
.size (cci_pending_reads)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (cci_pending_reads)
|
||||
|
@ -699,29 +712,29 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
if (reset) begin
|
||||
cci_rd_req_valid <= 0;
|
||||
cci_rd_req_wait <= 0;
|
||||
end else begin
|
||||
if ((STATE_IDLE == state)
|
||||
end else begin
|
||||
if ((STATE_IDLE == state)
|
||||
&& (CMD_MEM_WRITE == cmd_type)) begin
|
||||
cci_rd_req_valid <= (cmd_data_size != 0);
|
||||
cci_rd_req_wait <= 0;
|
||||
end
|
||||
|
||||
cci_rd_req_valid <= (STATE_MEM_WRITE == state)
|
||||
cci_rd_req_valid <= (STATE_MEM_WRITE == state)
|
||||
&& (cci_rd_req_ctr_next != cmd_data_size)
|
||||
&& !cp2af_sRxPort.c0TxAlmFull;
|
||||
&& !cp2af_sRxPort.c0TxAlmFull;
|
||||
|
||||
if (cci_rd_req_fire
|
||||
if (cci_rd_req_fire
|
||||
&& (cci_rd_req_tag == CCI_RD_QUEUE_TAGW'(CCI_RD_WINDOW_SIZE-1))) begin
|
||||
cci_rd_req_wait <= 1; // end current request batch
|
||||
end
|
||||
|
||||
if (cci_rd_rsp_fire
|
||||
if (cci_rd_rsp_fire
|
||||
&& (cci_rd_rsp_ctr == CCI_RD_QUEUE_TAGW'(CCI_RD_WINDOW_SIZE-1))) begin
|
||||
cci_rd_req_wait <= 0; // begin new request batch
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if ((STATE_IDLE == state)
|
||||
if ((STATE_IDLE == state)
|
||||
&& (CMD_MEM_WRITE == cmd_type)) begin
|
||||
cci_rd_req_addr <= cmd_io_addr;
|
||||
cci_rd_req_ctr <= '0;
|
||||
|
@ -731,7 +744,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
cmd_mem_wr_done <= 0;
|
||||
end
|
||||
|
||||
if (cci_rd_req_fire) begin
|
||||
if (cci_rd_req_fire) begin
|
||||
cci_rd_req_addr <= cci_rd_req_addr + 1;
|
||||
cci_rd_req_ctr <= cci_rd_req_ctr + $bits(cci_rd_req_ctr)'(1);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
|
@ -745,9 +758,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE);
|
||||
end
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%0h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data));
|
||||
`TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
||||
if (cci_rdq_pop) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
|
@ -755,7 +768,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
`endif
|
||||
end
|
||||
|
||||
if (cci_mem_wr_req_fire) begin
|
||||
if (cci_mem_wr_req_fire) begin
|
||||
cci_mem_wr_req_ctr <= cci_mem_wr_req_ctr + CCI_ADDR_WIDTH'(1);
|
||||
if (cci_mem_wr_req_ctr == (cmd_data_size-1)) begin
|
||||
cmd_mem_wr_done <= 1;
|
||||
|
@ -763,14 +776,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (cci_rdq_reset, reset);
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (CCI_RD_QUEUE_DATAW),
|
||||
.DEPTH (CCI_RD_QUEUE_SIZE)
|
||||
) cci_rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (cci_rdq_reset),
|
||||
.reset (reset),
|
||||
.push (cci_rdq_push),
|
||||
.pop (cci_rdq_pop),
|
||||
.data_in (cci_rdq_din),
|
||||
|
@ -788,13 +799,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
if (reset) begin
|
||||
dbg_cci_rd_rsp_mask <= '0;
|
||||
end else begin
|
||||
if (cci_rd_rsp_fire) begin
|
||||
if (cci_rd_rsp_fire) begin
|
||||
if (cci_rd_rsp_ctr == 0) begin
|
||||
dbg_cci_rd_rsp_mask <= (CCI_RD_WINDOW_SIZE'(1) << cci_rd_rsp_tag);
|
||||
end else begin
|
||||
dbg_cci_rd_rsp_mask <= (CCI_RD_WINDOW_SIZE'(1) << cci_rd_rsp_tag);
|
||||
end else begin
|
||||
assert(!dbg_cci_rd_rsp_mask[cci_rd_rsp_tag]);
|
||||
dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -817,21 +828,21 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0);
|
||||
af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode
|
||||
af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr;
|
||||
af2cp_sTxPort.c1.data = cci_wr_req_data;
|
||||
end
|
||||
af2cp_sTxPort.c1.data = cci_wr_req_data;
|
||||
end
|
||||
|
||||
wire cci_mem_rd_req_fire = cci_mem_rd_req_valid && cci_mem_req_ready;
|
||||
wire cci_mem_rd_rsp_fire = cci_mem_rsp_valid && cci_mem_rsp_ready;
|
||||
|
||||
wire cci_wr_rsp_fire = (STATE_MEM_READ == state)
|
||||
&& cp2af_sRxPort.c1.rspValid
|
||||
wire cci_wr_rsp_fire = (STATE_MEM_READ == state)
|
||||
&& cp2af_sRxPort.c1.rspValid
|
||||
&& (cp2af_sRxPort.c1.hdr.resp_type == eRSP_WRLINE);
|
||||
|
||||
wire [`CLOG2(CCI_RW_PENDING_SIZE+1)-1:0] cci_pending_writes;
|
||||
wire cci_pending_writes_empty;
|
||||
wire cci_pending_writes_full;
|
||||
|
||||
VX_pending_size #(
|
||||
VX_pending_size #(
|
||||
.SIZE (CCI_RW_PENDING_SIZE)
|
||||
) cci_wr_pending_size (
|
||||
.clk (clk),
|
||||
|
@ -839,16 +850,18 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.incr (cci_mem_rd_rsp_fire),
|
||||
.decr (cci_wr_rsp_fire),
|
||||
.empty (cci_pending_writes_empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (cci_pending_writes_full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
.size (cci_pending_writes)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (cci_pending_writes)
|
||||
|
||||
assign cci_mem_rd_req_valid = (STATE_MEM_READ == state)
|
||||
assign cci_mem_rd_req_valid = (STATE_MEM_READ == state)
|
||||
&& ~cci_mem_rd_req_done;
|
||||
|
||||
assign cci_mem_rsp_ready = ~cp2af_sRxPort.c1TxAlmFull
|
||||
assign cci_mem_rsp_ready = ~cp2af_sRxPort.c1TxAlmFull
|
||||
&& ~cci_pending_writes_full;
|
||||
|
||||
assign cmd_mem_rd_done = cci_wr_req_done
|
||||
|
@ -861,42 +874,42 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
end else begin
|
||||
cci_wr_req_fire <= cci_mem_rd_rsp_fire;
|
||||
end
|
||||
|
||||
if ((STATE_IDLE == state)
|
||||
|
||||
if ((STATE_IDLE == state)
|
||||
&& (CMD_MEM_READ == cmd_type)) begin
|
||||
cci_mem_rd_req_ctr <= '0;
|
||||
cci_mem_rd_req_addr <= cmd_mem_addr;
|
||||
cci_mem_rd_req_done <= 0;
|
||||
cci_wr_req_ctr <= cmd_data_size;
|
||||
cci_wr_req_done <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
if (cci_mem_rd_req_fire) begin
|
||||
cci_mem_rd_req_addr <= cci_mem_rd_req_addr + CCI_ADDR_WIDTH'(1);
|
||||
cci_mem_rd_req_addr <= cci_mem_rd_req_addr + CCI_ADDR_WIDTH'(1);
|
||||
cci_mem_rd_req_ctr <= cci_mem_rd_req_ctr + CCI_ADDR_WIDTH'(1);
|
||||
if (cci_mem_rd_req_ctr == (cmd_data_size-1)) begin
|
||||
cci_mem_rd_req_done <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
cci_wr_req_addr <= cmd_io_addr + t_ccip_clAddr'(cci_mem_rsp_tag);
|
||||
cci_wr_req_data <= t_ccip_clData'(cci_mem_rsp_data);
|
||||
cci_wr_req_data <= t_ccip_clData'(cci_mem_rsp_data);
|
||||
|
||||
if (cci_wr_req_fire) begin
|
||||
`ASSERT(cci_wr_req_ctr != 0, ("runtime error"));
|
||||
`ASSERT(cci_wr_req_ctr != 0, ("runtime error"));
|
||||
cci_wr_req_ctr <= cci_wr_req_ctr - CCI_ADDR_WIDTH'(1);
|
||||
if (cci_wr_req_ctr == CCI_ADDR_WIDTH'(1)) begin
|
||||
cci_wr_req_done <= 1;
|
||||
end
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%0h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data));
|
||||
`TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data));
|
||||
`endif
|
||||
end
|
||||
|
||||
if (cci_wr_rsp_fire) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes));
|
||||
`endif
|
||||
if (cci_wr_rsp_fire) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes));
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -916,7 +929,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
wire vx_dcr_wr_valid = (STATE_DCR_WRITE == state);
|
||||
wire [`VX_DCR_ADDR_WIDTH-1:0] vx_dcr_wr_addr = cmd_dcr_addr;
|
||||
wire [`VX_DCR_DATA_WIDTH-1:0] vx_dcr_wr_data = cmd_dcr_data;
|
||||
|
||||
|
||||
`SCOPE_IO_SWITCH (2)
|
||||
|
||||
Vortex vortex (
|
||||
|
@ -925,7 +938,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.clk (clk),
|
||||
.reset (reset || ~vx_running),
|
||||
|
||||
// Memory request
|
||||
// Memory request
|
||||
.mem_req_valid (vx_mem_req_valid),
|
||||
.mem_req_rw (vx_mem_req_rw),
|
||||
.mem_req_byteen (vx_mem_req_byteen),
|
||||
|
@ -934,7 +947,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.mem_req_tag (vx_mem_req_tag),
|
||||
.mem_req_ready (vx_mem_req_ready),
|
||||
|
||||
// Memory response
|
||||
// Memory response
|
||||
.mem_rsp_valid (vx_mem_rsp_valid),
|
||||
.mem_rsp_data (vx_mem_rsp_data),
|
||||
.mem_rsp_tag (vx_mem_rsp_tag),
|
||||
|
@ -944,7 +957,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.dcr_wr_valid (vx_dcr_wr_valid),
|
||||
.dcr_wr_addr (vx_dcr_wr_addr),
|
||||
.dcr_wr_data (vx_dcr_wr_data),
|
||||
|
||||
|
||||
// Status
|
||||
.busy (vx_busy)
|
||||
);
|
||||
|
@ -973,7 +986,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
wire cout_q_push = vx_mem_req_valid && vx_mem_is_cout && ~cout_q_full;
|
||||
|
||||
wire cout_q_pop = cp2af_sRxPort.c0.mmioRdValid
|
||||
wire cout_q_pop = cp2af_sRxPort.c0.mmioRdValid
|
||||
&& (mmio_hdr.address == MMIO_STATUS)
|
||||
&& ~cout_q_empty;
|
||||
|
||||
|
@ -997,7 +1010,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
// SCOPE //////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_SCOPE_AFU
|
||||
`ifdef SCOPE
|
||||
wire mem_req_fire = mem_bus_if[0].req_valid && mem_bus_if[0].req_ready;
|
||||
wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready;
|
||||
wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0];
|
||||
|
@ -1022,26 +1034,26 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.triggers({
|
||||
reset,
|
||||
state_changed,
|
||||
mem_req_fire,
|
||||
mem_rsp_fire,
|
||||
avs_write_fire,
|
||||
avs_read_fire,
|
||||
avs_waitrequest[0],
|
||||
avs_readdatavalid[0],
|
||||
cp2af_sRxPort.c0.mmioRdValid,
|
||||
cp2af_sRxPort.c0.mmioWrValid,
|
||||
cp2af_sRxPort.c0.rspValid,
|
||||
cp2af_sRxPort.c1.rspValid,
|
||||
af2cp_sTxPort.c0.valid,
|
||||
af2cp_sTxPort.c1.valid,
|
||||
cp2af_sRxPort.c0TxAlmFull,
|
||||
cp2af_sRxPort.c1TxAlmFull,
|
||||
af2cp_sTxPort.c2.mmioRdValid,
|
||||
cci_wr_req_fire,
|
||||
cci_wr_rsp_fire,
|
||||
cci_rd_req_fire,
|
||||
mem_req_fire,
|
||||
mem_rsp_fire,
|
||||
avs_write_fire,
|
||||
avs_read_fire,
|
||||
avs_waitrequest[0],
|
||||
avs_readdatavalid[0],
|
||||
cp2af_sRxPort.c0.mmioRdValid,
|
||||
cp2af_sRxPort.c0.mmioWrValid,
|
||||
cp2af_sRxPort.c0.rspValid,
|
||||
cp2af_sRxPort.c1.rspValid,
|
||||
af2cp_sTxPort.c0.valid,
|
||||
af2cp_sTxPort.c1.valid,
|
||||
cp2af_sRxPort.c0TxAlmFull,
|
||||
cp2af_sRxPort.c1TxAlmFull,
|
||||
af2cp_sTxPort.c2.mmioRdValid,
|
||||
cci_wr_req_fire,
|
||||
cci_wr_rsp_fire,
|
||||
cci_rd_req_fire,
|
||||
cci_rd_rsp_fire,
|
||||
cci_pending_reads_full,
|
||||
cci_pending_reads_full,
|
||||
cci_pending_writes_empty,
|
||||
cci_pending_writes_full
|
||||
}),
|
||||
|
@ -1067,7 +1079,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.bus_in(scope_bus_in_w[0]),
|
||||
.bus_out(scope_bus_out_w[0])
|
||||
);
|
||||
`endif
|
||||
`else
|
||||
`SCOPE_IO_UNUSED_W(0)
|
||||
`endif
|
||||
|
@ -1078,13 +1089,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin
|
||||
if (avs_write[i] && ~avs_waitrequest[i]) begin
|
||||
`TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]));
|
||||
`TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]));
|
||||
end
|
||||
if (avs_read[i] && ~avs_waitrequest[i]) begin
|
||||
if (avs_read[i] && ~avs_waitrequest[i]) begin
|
||||
`TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i]));
|
||||
end
|
||||
end
|
||||
if (avs_readdatavalid[i]) begin
|
||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%0h\n", $time, i, avs_readdata[i]));
|
||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i]));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -22,34 +22,34 @@ module VX_afu_ctrl #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk_en,
|
||||
|
||||
|
||||
input wire s_axi_awvalid,
|
||||
input wire [AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
|
||||
input wire [AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
|
||||
output wire s_axi_awready,
|
||||
|
||||
input wire s_axi_wvalid,
|
||||
input wire [AXI_DATA_WIDTH-1:0] s_axi_wdata,
|
||||
input wire [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb,
|
||||
input wire [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb,
|
||||
output wire s_axi_wready,
|
||||
|
||||
output wire s_axi_bvalid,
|
||||
output wire [1:0] s_axi_bresp,
|
||||
output wire [1:0] s_axi_bresp,
|
||||
input wire s_axi_bready,
|
||||
|
||||
input wire s_axi_arvalid,
|
||||
input wire [AXI_ADDR_WIDTH-1:0] s_axi_araddr,
|
||||
input wire [AXI_ADDR_WIDTH-1:0] s_axi_araddr,
|
||||
output wire s_axi_arready,
|
||||
|
||||
output wire s_axi_rvalid,
|
||||
output wire [AXI_DATA_WIDTH-1:0] s_axi_rdata,
|
||||
output wire [1:0] s_axi_rresp,
|
||||
input wire s_axi_rready,
|
||||
|
||||
output wire [1:0] s_axi_rresp,
|
||||
input wire s_axi_rready,
|
||||
|
||||
output wire ap_reset,
|
||||
output wire ap_start,
|
||||
input wire ap_done,
|
||||
input wire ap_ready,
|
||||
input wire ap_idle,
|
||||
input wire ap_idle,
|
||||
output wire interrupt,
|
||||
|
||||
`ifdef SCOPE
|
||||
|
@ -101,7 +101,7 @@ module VX_afu_ctrl #(
|
|||
// 0x48 : Control signal of MEM
|
||||
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
|
||||
|
||||
// Parameters
|
||||
// Parameters
|
||||
localparam
|
||||
ADDR_AP_CTRL = 8'h00,
|
||||
ADDR_GIE = 8'h04,
|
||||
|
@ -110,45 +110,47 @@ module VX_afu_ctrl #(
|
|||
|
||||
ADDR_DEV_0 = 8'h10,
|
||||
ADDR_DEV_1 = 8'h14,
|
||||
ADDR_DEV_CTRL = 8'h18,
|
||||
|
||||
//ADDR_DEV_CTRL = 8'h18,
|
||||
|
||||
ADDR_ISA_0 = 8'h1C,
|
||||
ADDR_ISA_1 = 8'h20,
|
||||
ADDR_ISA_CTRL = 8'h24,
|
||||
|
||||
//ADDR_ISA_CTRL = 8'h24,
|
||||
|
||||
ADDR_DCR_0 = 8'h28,
|
||||
ADDR_DCR_1 = 8'h2C,
|
||||
ADDR_DCR_CTRL = 8'h30,
|
||||
//ADDR_DCR_CTRL = 8'h30,
|
||||
|
||||
`ifdef SCOPE
|
||||
ADDR_SCP_0 = 8'h34,
|
||||
ADDR_SCP_1 = 8'h38,
|
||||
ADDR_SCP_CTRL = 8'h3C,
|
||||
//ADDR_SCP_CTRL = 8'h3C,
|
||||
`endif
|
||||
|
||||
ADDR_MEM_0 = 8'h40,
|
||||
ADDR_MEM_1 = 8'h44,
|
||||
ADDR_MEM_CTRL = 8'h48,
|
||||
|
||||
//ADDR_MEM_CTRL = 8'h48,
|
||||
|
||||
ADDR_BITS = 8;
|
||||
|
||||
localparam
|
||||
WSTATE_IDLE = 2'd0,
|
||||
WSTATE_DATA = 2'd1,
|
||||
WSTATE_RESP = 2'd2;
|
||||
|
||||
|
||||
localparam
|
||||
RSTATE_IDLE = 2'd0,
|
||||
RSTATE_DATA = 2'd1;
|
||||
|
||||
// device caps
|
||||
wire [63:0] dev_caps = {16'b0,
|
||||
8'(`SM_ENABLED ? `SMEM_LOG_SIZE : 0),
|
||||
16'(`NUM_CORES * `NUM_CLUSTERS),
|
||||
8'(`NUM_WARPS),
|
||||
8'(`NUM_THREADS),
|
||||
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
|
||||
16'(`NUM_CORES * `NUM_CLUSTERS),
|
||||
8'(`NUM_WARPS),
|
||||
8'(`NUM_THREADS),
|
||||
8'(`IMPLEMENTATION_ID)};
|
||||
|
||||
wire [63:0] isa_caps = {32'(`MISA_EXT),
|
||||
2'(`CLOG2(`XLEN)-4),
|
||||
wire [63:0] isa_caps = {32'(`MISA_EXT),
|
||||
2'(`CLOG2(`XLEN)-4),
|
||||
30'(`MISA_STD)};
|
||||
|
||||
reg [1:0] wstate;
|
||||
|
@ -156,7 +158,7 @@ module VX_afu_ctrl #(
|
|||
wire [31:0] wmask;
|
||||
wire s_axi_aw_fire;
|
||||
wire s_axi_w_fire;
|
||||
|
||||
|
||||
reg [1:0] rstate;
|
||||
reg [31:0] rdata;
|
||||
wire [ADDR_BITS-1:0] raddr;
|
||||
|
@ -171,12 +173,12 @@ module VX_afu_ctrl #(
|
|||
reg [63:0] mem_r [AXI_NUM_BANKS];
|
||||
reg [31:0] dcra_r;
|
||||
reg [31:0] dcrv_r;
|
||||
reg dcr_wr_valid_r;
|
||||
|
||||
reg dcr_wr_valid_r;
|
||||
|
||||
`ifdef SCOPE
|
||||
|
||||
reg [63:0] scope_bus_wdata;
|
||||
reg [63:0] scope_bus_rdata;
|
||||
reg [63:0] scope_bus_rdata;
|
||||
reg [5:0] scope_bus_ctr;
|
||||
|
||||
reg cmd_scope_reading;
|
||||
|
@ -186,7 +188,7 @@ module VX_afu_ctrl #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
cmd_scope_reading <= 0;
|
||||
cmd_scope_writing <= 0;
|
||||
cmd_scope_writing <= 0;
|
||||
scope_bus_ctr <= '0;
|
||||
scope_bus_out_r <= 0;
|
||||
end else if (clk_en) begin
|
||||
|
@ -194,29 +196,29 @@ module VX_afu_ctrl #(
|
|||
scope_bus_wdata[31:0] <= (s_axi_wdata & wmask) | (scope_bus_wdata[31:0] & ~wmask);
|
||||
end
|
||||
if (s_axi_w_fire && waddr == ADDR_SCP_1) begin
|
||||
scope_bus_wdata[63:32] <= (s_axi_wdata & wmask) | (scope_bus_wdata[63:32] & ~wmask);
|
||||
scope_bus_wdata[63:32] <= (s_axi_wdata & wmask) | (scope_bus_wdata[63:32] & ~wmask);
|
||||
cmd_scope_writing <= 1;
|
||||
scope_bus_out_r <= 1;
|
||||
scope_bus_out_r <= 1;
|
||||
scope_bus_ctr <= 63;
|
||||
end
|
||||
end
|
||||
if (scope_bus_in) begin
|
||||
cmd_scope_reading <= 1;
|
||||
scope_bus_ctr <= 63;
|
||||
end
|
||||
end
|
||||
if (cmd_scope_reading) begin
|
||||
scope_bus_rdata <= {scope_bus_rdata[62:0], scope_bus_in};
|
||||
scope_bus_ctr <= scope_bus_ctr - 1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_reading <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
if (cmd_scope_writing) begin
|
||||
scope_bus_out_r <= 1'(scope_bus_wdata >> scope_bus_ctr);
|
||||
scope_bus_ctr <= scope_bus_ctr - 1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_writing <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -224,7 +226,7 @@ module VX_afu_ctrl #(
|
|||
|
||||
`endif
|
||||
|
||||
// AXI Write
|
||||
// AXI Write
|
||||
|
||||
assign s_axi_awready = (wstate == WSTATE_IDLE);
|
||||
assign s_axi_wready = (wstate == WSTATE_DATA);
|
||||
|
@ -259,14 +261,14 @@ module VX_afu_ctrl #(
|
|||
waddr <= s_axi_awaddr[ADDR_BITS-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// wdata
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
ap_start_r <= 0;
|
||||
ap_reset_r <= 0;
|
||||
auto_restart_r <= 0;
|
||||
|
||||
|
||||
gie_r <= 0;
|
||||
ier_r <= '0;
|
||||
isr_r <= '0;
|
||||
|
@ -287,7 +289,7 @@ module VX_afu_ctrl #(
|
|||
if (s_axi_w_fire) begin
|
||||
case (waddr)
|
||||
ADDR_AP_CTRL: begin
|
||||
if (s_axi_wstrb[0]) begin
|
||||
if (s_axi_wstrb[0]) begin
|
||||
if (s_axi_wdata[0])
|
||||
ap_start_r <= 1;
|
||||
if (s_axi_wdata[4])
|
||||
|
@ -317,16 +319,16 @@ module VX_afu_ctrl #(
|
|||
end
|
||||
default: begin
|
||||
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
|
||||
if (waddr == (ADDR_MEM_0 + i * 12)) begin
|
||||
if (waddr == (ADDR_MEM_0 + 8'(i) * 8'd12)) begin
|
||||
mem_r[i][31:0] <= (s_axi_wdata & wmask) | (mem_r[i][31:0] & ~wmask);
|
||||
end
|
||||
if (waddr == (ADDR_MEM_1 + i * 12)) begin
|
||||
if (waddr == (ADDR_MEM_1 + 8'(i) * 8'd12)) begin
|
||||
mem_r[i][63:32] <= (s_axi_wdata & wmask) | (mem_r[i][63:32] & ~wmask);
|
||||
end
|
||||
end
|
||||
end
|
||||
endcase
|
||||
|
||||
|
||||
if (ier_r[0] & ap_done)
|
||||
isr_r[0] <= 1'b1;
|
||||
if (ier_r[1] & ap_ready)
|
||||
|
@ -341,10 +343,10 @@ module VX_afu_ctrl #(
|
|||
assign s_axi_rvalid = (rstate == RSTATE_DATA);
|
||||
assign s_axi_rdata = rdata;
|
||||
assign s_axi_rresp = 2'b00; // OKAY
|
||||
|
||||
|
||||
assign s_axi_ar_fire = s_axi_arvalid && s_axi_arready;
|
||||
assign raddr = s_axi_araddr[ADDR_BITS-1:0];
|
||||
|
||||
|
||||
// rstate
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -414,6 +416,6 @@ module VX_afu_ctrl #(
|
|||
|
||||
assign dcr_wr_valid = dcr_wr_valid_r;
|
||||
assign dcr_wr_addr = `VX_DCR_ADDR_WIDTH'(dcra_r);
|
||||
assign dcr_wr_data = `VX_DCR_DATA_WIDTH'(dcrv_r);
|
||||
assign dcr_wr_data = `VX_DCR_DATA_WIDTH'(dcrv_r);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,12 +13,12 @@
|
|||
|
||||
`include "vortex_afu.vh"
|
||||
|
||||
module VX_afu_wrap #(
|
||||
module VX_afu_wrap #(
|
||||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = 16,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = 512
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH
|
||||
) (
|
||||
// System signals
|
||||
input wire ap_clk,
|
||||
|
@ -45,8 +45,8 @@ module VX_afu_wrap #(
|
|||
output wire s_axi_ctrl_bvalid,
|
||||
input wire s_axi_ctrl_bready,
|
||||
output wire [1:0] s_axi_ctrl_bresp,
|
||||
|
||||
output wire interrupt
|
||||
|
||||
output wire interrupt
|
||||
);
|
||||
localparam C_M_AXI_MEM_NUM_BANKS = `M_AXI_MEM_NUM_BANKS;
|
||||
|
||||
|
@ -62,7 +62,7 @@ module VX_afu_wrap #(
|
|||
wire m_axi_mem_wready_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_DATA_WIDTH-1:0] m_axi_mem_wdata_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_DATA_WIDTH/8-1:0] m_axi_mem_wstrb_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_wlast_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_wlast_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_bvalid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_bready_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_ID_WIDTH-1:0] m_axi_mem_bid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
@ -82,17 +82,16 @@ module VX_afu_wrap #(
|
|||
// convert memory interface to array
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||
|
||||
wire clk = ap_clk;
|
||||
wire reset = ~ap_rst_n;
|
||||
|
||||
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
|
||||
reg [15:0] vx_pending_writes;
|
||||
reg vx_busy_wait;
|
||||
reg vx_running;
|
||||
|
||||
|
||||
wire vx_busy;
|
||||
|
||||
wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
wire dcr_wr_valid;
|
||||
wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr;
|
||||
|
@ -109,7 +108,7 @@ module VX_afu_wrap #(
|
|||
`ifdef SCOPE
|
||||
wire scope_bus_in;
|
||||
wire scope_bus_out;
|
||||
wire scope_reset = reset;
|
||||
wire scope_reset = reset;
|
||||
`endif
|
||||
|
||||
always @(posedge ap_clk) begin
|
||||
|
@ -120,15 +119,15 @@ module VX_afu_wrap #(
|
|||
end else begin
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (ap_start) begin
|
||||
if (ap_start) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE RUN\n", $time));
|
||||
`endif
|
||||
`endif
|
||||
state <= STATE_RUN;
|
||||
vx_running <= 0;
|
||||
end
|
||||
end
|
||||
STATE_RUN: begin
|
||||
STATE_RUN: begin
|
||||
if (vx_running) begin
|
||||
if (vx_busy_wait) begin
|
||||
// wait until processor goes busy
|
||||
|
@ -151,7 +150,7 @@ module VX_afu_wrap #(
|
|||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: AFU: Begin execution\n", $time));
|
||||
`endif
|
||||
vx_running <= 1;
|
||||
vx_running <= 1;
|
||||
vx_busy_wait <= 1;
|
||||
end
|
||||
end
|
||||
|
@ -185,7 +184,7 @@ module VX_afu_wrap #(
|
|||
|
||||
always @(posedge ap_clk) begin
|
||||
if (state == STATE_RUN) begin
|
||||
vx_reset_ctr <= vx_reset_ctr + 1;
|
||||
vx_reset_ctr <= vx_reset_ctr + 1;
|
||||
end else begin
|
||||
vx_reset_ctr <= '0;
|
||||
end
|
||||
|
@ -197,9 +196,9 @@ module VX_afu_wrap #(
|
|||
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
) afu_ctrl (
|
||||
.clk (ap_clk),
|
||||
.reset (reset || ap_reset),
|
||||
.reset (reset || ap_reset),
|
||||
.clk_en (1'b1),
|
||||
|
||||
|
||||
.s_axi_awvalid (s_axi_ctrl_awvalid),
|
||||
.s_axi_awready (s_axi_ctrl_awready),
|
||||
.s_axi_awaddr (s_axi_ctrl_awaddr),
|
||||
|
@ -226,9 +225,9 @@ module VX_afu_wrap #(
|
|||
.interrupt (interrupt),
|
||||
|
||||
`ifdef SCOPE
|
||||
.scope_bus_in (scope_bus_out),
|
||||
.scope_bus_in (scope_bus_out),
|
||||
.scope_bus_out (scope_bus_in),
|
||||
`endif
|
||||
`endif
|
||||
|
||||
.mem_base (mem_base),
|
||||
|
||||
|
@ -237,8 +236,8 @@ module VX_afu_wrap #(
|
|||
.dcr_wr_data (dcr_wr_data)
|
||||
);
|
||||
|
||||
wire [`XLEN-1:0] m_axi_mem_awaddr_w [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [`XLEN-1:0] m_axi_mem_araddr_w [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_w [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_w [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
|
||||
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
|
||||
|
@ -249,7 +248,7 @@ module VX_afu_wrap #(
|
|||
|
||||
Vortex_axi #(
|
||||
.AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
|
||||
.AXI_ADDR_WIDTH (`XLEN),
|
||||
.AXI_ADDR_WIDTH (`MEM_ADDR_WIDTH),
|
||||
.AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
|
||||
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
) vortex_axi (
|
||||
|
@ -257,7 +256,7 @@ module VX_afu_wrap #(
|
|||
|
||||
.clk (ap_clk),
|
||||
.reset (reset || ap_reset || ~vx_running),
|
||||
|
||||
|
||||
.m_axi_awvalid (m_axi_mem_awvalid_a),
|
||||
.m_axi_awready (m_axi_mem_awready_a),
|
||||
.m_axi_awaddr (m_axi_mem_awaddr_w),
|
||||
|
@ -268,7 +267,7 @@ module VX_afu_wrap #(
|
|||
`UNUSED_PIN (m_axi_awlock),
|
||||
`UNUSED_PIN (m_axi_awcache),
|
||||
`UNUSED_PIN (m_axi_awprot),
|
||||
`UNUSED_PIN (m_axi_awqos),
|
||||
`UNUSED_PIN (m_axi_awqos),
|
||||
`UNUSED_PIN (m_axi_awregion),
|
||||
|
||||
.m_axi_wvalid (m_axi_mem_wvalid_a),
|
||||
|
@ -280,7 +279,7 @@ module VX_afu_wrap #(
|
|||
.m_axi_bvalid (m_axi_mem_bvalid_a),
|
||||
.m_axi_bready (m_axi_mem_bready_a),
|
||||
.m_axi_bid (m_axi_mem_bid_a),
|
||||
.m_axi_bresp (m_axi_mem_bresp_a),
|
||||
.m_axi_bresp (m_axi_mem_bresp_a),
|
||||
|
||||
.m_axi_arvalid (m_axi_mem_arvalid_a),
|
||||
.m_axi_arready (m_axi_mem_arready_a),
|
||||
|
@ -292,7 +291,7 @@ module VX_afu_wrap #(
|
|||
`UNUSED_PIN (m_axi_arlock),
|
||||
`UNUSED_PIN (m_axi_arcache),
|
||||
`UNUSED_PIN (m_axi_arprot),
|
||||
`UNUSED_PIN (m_axi_arqos),
|
||||
`UNUSED_PIN (m_axi_arqos),
|
||||
`UNUSED_PIN (m_axi_arregion),
|
||||
|
||||
.m_axi_rvalid (m_axi_mem_rvalid_a),
|
||||
|
@ -312,7 +311,6 @@ module VX_afu_wrap #(
|
|||
// SCOPE //////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_SCOPE_AFU
|
||||
`ifdef SCOPE
|
||||
`define TRIGGERS { \
|
||||
reset, \
|
||||
ap_start, \
|
||||
|
@ -331,35 +329,17 @@ module VX_afu_wrap #(
|
|||
VX_scope_tap #(
|
||||
.SCOPE_ID (0),
|
||||
.TRIGGERW ($bits(`TRIGGERS)),
|
||||
.PROBEW ($bits(`PROBES))
|
||||
.PROBEW ($bits(`PROBES))
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset_w[0]),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers(`TRIGGERS),
|
||||
.probes(`PROBES),
|
||||
.bus_in(scope_bus_in_w[0]),
|
||||
.bus_out(scope_bus_out_w[0])
|
||||
.clk (clk),
|
||||
.reset (scope_reset_w[0]),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers (`TRIGGERS),
|
||||
.probes (`PROBES),
|
||||
.bus_in (scope_bus_in_w[0]),
|
||||
.bus_out (scope_bus_out_w[0])
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_afu ila_afu_inst (
|
||||
.clk (ap_clk),
|
||||
.probe0 ({
|
||||
ap_start,
|
||||
ap_done,
|
||||
ap_idle,
|
||||
interrupt
|
||||
}),
|
||||
.probe1 ({
|
||||
vx_pending_writes,
|
||||
vx_busy_wait,
|
||||
vx_busy,
|
||||
vx_running
|
||||
})
|
||||
);
|
||||
`endif
|
||||
`else
|
||||
`SCOPE_IO_UNUSED_W(0)
|
||||
`endif
|
||||
|
@ -370,13 +350,13 @@ module VX_afu_wrap #(
|
|||
reg [`CLOG2(`RESET_DELAY+1)-1:0] assert_delay_ctr;
|
||||
reg assert_enabled;
|
||||
initial begin
|
||||
$assertoff(0, vortex_axi);
|
||||
end
|
||||
$assertoff(0, vortex_axi);
|
||||
end
|
||||
always @(posedge ap_clk) begin
|
||||
if (reset) begin
|
||||
assert_delay_ctr <= '0;
|
||||
assert_enabled <= 0;
|
||||
end else begin
|
||||
end else begin
|
||||
if (~assert_enabled) begin
|
||||
if (assert_delay_ctr == (`RESET_DELAY-1)) begin
|
||||
assert_enabled <= 1;
|
||||
|
@ -394,19 +374,19 @@ module VX_afu_wrap #(
|
|||
always @(posedge ap_clk) begin
|
||||
for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
|
||||
if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]));
|
||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]));
|
||||
end
|
||||
if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%0h\n", $time, i, m_axi_mem_wdata_a[i]));
|
||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i]));
|
||||
end
|
||||
if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin
|
||||
if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i]));
|
||||
end
|
||||
if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin
|
||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]));
|
||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
|
||||
endmodule
|
||||
|
|
127
hw/rtl/cache/VX_bank_flush.sv
vendored
Normal file
127
hw/rtl/cache/VX_bank_flush.sv
vendored
Normal file
|
@ -0,0 +1,127 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_bank_flush #(
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire flush_begin,
|
||||
output wire flush_end,
|
||||
output wire flush_init,
|
||||
output wire flush_valid,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] flush_line,
|
||||
output wire [NUM_WAYS-1:0] flush_way,
|
||||
input wire flush_ready,
|
||||
input wire mshr_empty,
|
||||
input wire bank_empty
|
||||
);
|
||||
// ways interation is only needed when eviction is enabled
|
||||
localparam CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);
|
||||
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_INIT = 1;
|
||||
localparam STATE_WAIT1 = 2;
|
||||
localparam STATE_FLUSH = 3;
|
||||
localparam STATE_WAIT2 = 4;
|
||||
localparam STATE_DONE = 5;
|
||||
|
||||
reg [2:0] state_r, state_n;
|
||||
|
||||
reg [CTR_WIDTH-1:0] counter_r;
|
||||
|
||||
always @(*) begin
|
||||
state_n = state_r;
|
||||
case (state_r)
|
||||
STATE_IDLE: begin
|
||||
if (flush_begin) begin
|
||||
state_n = STATE_WAIT1;
|
||||
end
|
||||
end
|
||||
STATE_INIT: begin
|
||||
if (counter_r == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_WAIT1: begin
|
||||
// wait for pending requests to complete
|
||||
if (mshr_empty) begin
|
||||
state_n = STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
|
||||
state_n = (BANK_ID == 0) ? STATE_DONE : STATE_WAIT2;
|
||||
end
|
||||
end
|
||||
STATE_WAIT2: begin
|
||||
// ensure the bank is empty before notifying the cache flush unit,
|
||||
// because the flush request to lower caches only goes through bank0
|
||||
// and it is important that request gets send out last.
|
||||
if (bank_empty) begin
|
||||
state_n = STATE_DONE;
|
||||
end
|
||||
end
|
||||
STATE_DONE: begin
|
||||
// generate a completion pulse
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state_r <= STATE_INIT;
|
||||
counter_r <= '0;
|
||||
end else begin
|
||||
state_r <= state_n;
|
||||
if (state_r != STATE_IDLE) begin
|
||||
if ((state_r == STATE_INIT)
|
||||
|| ((state_r == STATE_FLUSH) && flush_ready)) begin
|
||||
counter_r <= counter_r + CTR_WIDTH'(1);
|
||||
end
|
||||
end else begin
|
||||
counter_r <= '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign flush_end = (state_r == STATE_DONE);
|
||||
assign flush_init = (state_r == STATE_INIT);
|
||||
assign flush_valid = (state_r == STATE_FLUSH);
|
||||
assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin
|
||||
reg [NUM_WAYS-1:0] flush_way_r;
|
||||
always @(*) begin
|
||||
flush_way_r = '0;
|
||||
flush_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
|
||||
end
|
||||
assign flush_way = flush_way_r;
|
||||
end else begin
|
||||
assign flush_way = {NUM_WAYS{1'b1}};
|
||||
end
|
||||
|
||||
endmodule
|
372
hw/rtl/cache/VX_cache.sv
vendored
372
hw/rtl/cache/VX_cache.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,15 +14,15 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 4096,
|
||||
parameter CACHE_SIZE = 4096,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
|
@ -33,7 +33,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
|
@ -42,6 +42,12 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -49,16 +55,16 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_REG = 0,
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_REG = 0
|
||||
) (
|
||||
parameter MEM_OUT_BUF = 0
|
||||
) (
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t cache_perf,
|
||||
`endif
|
||||
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -66,8 +72,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
VX_mem_bus_if.master mem_bus_if
|
||||
);
|
||||
|
||||
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid parameter"))
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter: number of banks must be power of 2"))
|
||||
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable"))
|
||||
`STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback"))
|
||||
|
||||
// In writeback mode, memory fill response may issue a new memory request to handle evicted blocks.
|
||||
// We need to ensure that the memory request queue never fills up to avoid deadlock.
|
||||
`STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE"))
|
||||
|
||||
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
|
||||
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
|
||||
|
@ -79,35 +90,43 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH;
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1;
|
||||
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
|
||||
|
||||
localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
|
||||
localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1);
|
||||
|
||||
localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
||||
`endif
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
|
||||
wire [NUM_REQS-1:0] core_req_rw;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
||||
wire [NUM_REQS-1:0] core_req_ready;
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) core_bus2_if[NUM_REQS]();
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_valid[i] = core_bus_if[i].req_valid;
|
||||
assign core_req_addr[i] = core_bus_if[i].req_data.addr;
|
||||
assign core_req_rw[i] = core_bus_if[i].req_data.rw;
|
||||
assign core_req_byteen[i] = core_bus_if[i].req_data.byteen;
|
||||
assign core_req_data[i] = core_bus_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus_if[i].req_data.tag;
|
||||
assign core_bus_if[i].req_ready = core_req_ready[i];
|
||||
end
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_begin;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_end;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_fire;
|
||||
|
||||
VX_cache_flush #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.core_bus_in_if (core_bus_if),
|
||||
.core_bus_out_if (core_bus2_if),
|
||||
.bank_req_fire (per_bank_core_req_fire),
|
||||
.flush_begin (per_bank_flush_begin),
|
||||
.flush_end (per_bank_flush_end)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -117,23 +136,23 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_s;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
`RESET_RELAY_EX (core_rsp_reset, reset, NUM_REQS, `MAX_FANOUT);
|
||||
|
||||
`RESET_RELAY (core_rsp_reset, reset);
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
|
||||
.SIZE (CORE_REQ_BUF_ENABLE ? `OUT_REG_TO_EB_SIZE(CORE_OUT_REG) : 0),
|
||||
.OUT_REG (`OUT_REG_TO_EB_REG(CORE_OUT_REG))
|
||||
.SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
) core_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (core_rsp_reset),
|
||||
.reset (core_rsp_reset[i]),
|
||||
.valid_in (core_rsp_valid_s[i]),
|
||||
.ready_in (core_rsp_ready_s[i]),
|
||||
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
|
||||
.data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus_if[i].rsp_valid),
|
||||
.ready_out (core_bus_if[i].rsp_ready)
|
||||
.data_out ({core_bus2_if[i].rsp_data.data, core_bus2_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus2_if[i].rsp_valid),
|
||||
.ready_out (core_bus2_if[i].rsp_ready)
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -146,25 +165,28 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [LINE_SIZE-1:0] mem_req_byteen_s;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
|
||||
wire mem_req_flush_s;
|
||||
wire mem_req_ready_s;
|
||||
|
||||
`RESET_RELAY (mem_req_buf_reset, reset);
|
||||
wire mem_bus_if_flush;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH),
|
||||
.SIZE (MEM_REQ_BUF_ENABLE ? `OUT_REG_TO_EB_SIZE(MEM_OUT_REG) : 0),
|
||||
.OUT_REG (`OUT_REG_TO_EB_REG(MEM_OUT_REG))
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (mem_req_buf_reset),
|
||||
.valid_in (mem_req_valid_s),
|
||||
.ready_in (mem_req_ready_s),
|
||||
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s}),
|
||||
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}),
|
||||
.valid_out (mem_bus_if.req_valid),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_valid_s),
|
||||
.ready_in (mem_req_ready_s),
|
||||
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}),
|
||||
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag, mem_bus_if_flush}),
|
||||
.valid_out (mem_bus_if.req_valid),
|
||||
.ready_out (mem_bus_if.req_ready)
|
||||
);
|
||||
|
||||
assign mem_bus_if.req_data.atype = mem_bus_if_flush ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Memory response buffering
|
||||
|
@ -173,44 +195,23 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||
wire mem_rsp_ready_s;
|
||||
|
||||
`RESET_RELAY (mem_rsp_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
|
||||
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
|
||||
.SIZE (MRSQ_SIZE),
|
||||
.OUT_REG (MRSQ_SIZE > 2)
|
||||
) mem_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (mem_rsp_reset),
|
||||
.reset (reset),
|
||||
.valid_in (mem_bus_if.rsp_valid),
|
||||
.ready_in (mem_bus_if.rsp_ready),
|
||||
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
|
||||
.data_out ({mem_rsp_tag_s, mem_rsp_data_s}),
|
||||
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
|
||||
.data_out ({mem_rsp_tag_s, mem_rsp_data_s}),
|
||||
.valid_out (mem_rsp_valid_s),
|
||||
.ready_out (mem_rsp_ready_s)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] init_line_sel;
|
||||
wire init_enable;
|
||||
|
||||
`RESET_RELAY (init_reset, reset);
|
||||
|
||||
VX_cache_init #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS)
|
||||
) cache_init (
|
||||
.clk (clk),
|
||||
.reset (init_reset),
|
||||
.addr_out (init_line_sel),
|
||||
.valid_out (init_enable)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
|
||||
|
@ -219,25 +220,28 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_flush;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
|
||||
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
|
||||
wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_mem_req_wsel;
|
||||
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_flush;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
|
||||
|
||||
|
||||
assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign mem_rsp_ready_s = per_bank_mem_rsp_ready;
|
||||
end else begin
|
||||
|
@ -246,12 +250,33 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
// Bank requests dispatch
|
||||
|
||||
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
|
||||
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
|
||||
wire [NUM_REQS-1:0] core_req_valid;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
|
||||
wire [NUM_REQS-1:0] core_req_rw;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
||||
wire [NUM_REQS-1:0] core_req_flush;
|
||||
wire [NUM_REQS-1:0] core_req_ready;
|
||||
|
||||
wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr;
|
||||
wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid;
|
||||
wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel;
|
||||
|
||||
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
|
||||
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_valid[i] = core_bus2_if[i].req_valid;
|
||||
assign core_req_rw[i] = core_bus2_if[i].req_data.rw;
|
||||
assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen;
|
||||
assign core_req_addr[i] = core_bus2_if[i].req_data.addr;
|
||||
assign core_req_data[i] = core_bus2_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus2_if[i].req_data.tag;
|
||||
assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
|
||||
assign core_bus2_if[i].req_ready = core_req_ready[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS];
|
||||
|
@ -274,9 +299,11 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
core_req_line_addr[i],
|
||||
core_req_rw[i],
|
||||
core_req_wsel[i],
|
||||
core_req_byteen[i],
|
||||
core_req_byteen[i],
|
||||
core_req_data[i],
|
||||
core_req_tag[i]};
|
||||
core_req_tag[i],
|
||||
core_req_flush[i]
|
||||
};
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -285,11 +312,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
`RESET_RELAY (req_xbar_reset, reset);
|
||||
|
||||
VX_stream_xbar #(
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_REQS),
|
||||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (CORE_REQ_DATAW),
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS)
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (REQ_XBAR_BUF)
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
.reset (req_xbar_reset),
|
||||
|
@ -313,27 +342,29 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
per_bank_core_req_addr[i],
|
||||
per_bank_core_req_rw[i],
|
||||
per_bank_core_req_wsel[i],
|
||||
per_bank_core_req_byteen[i],
|
||||
per_bank_core_req_byteen[i],
|
||||
per_bank_core_req_data[i],
|
||||
per_bank_core_req_tag[i]} = core_req_data_out[i];
|
||||
per_bank_core_req_tag[i],
|
||||
per_bank_core_req_flush[i]
|
||||
} = core_req_data_out[i];
|
||||
end
|
||||
|
||||
|
||||
// Banks access
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
|
||||
wire curr_bank_mem_rsp_valid;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s;
|
||||
end else begin
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == i);
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == bank_id);
|
||||
end
|
||||
|
||||
`RESET_RELAY (bank_reset, reset);
|
||||
|
||||
VX_cache_bank #(
|
||||
.BANK_ID (i),
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
|
||||
VX_cache_bank #(
|
||||
.BANK_ID (bank_id),
|
||||
.INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
|
@ -344,65 +375,67 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_REG (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_REG),
|
||||
.MEM_OUT_REG (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_REG)
|
||||
) bank (
|
||||
.CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF)
|
||||
) bank (
|
||||
.clk (clk),
|
||||
.reset (bank_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_read_misses (perf_read_miss_per_bank[i]),
|
||||
.perf_write_misses (perf_write_miss_per_bank[i]),
|
||||
.perf_mshr_stalls (perf_mshr_stall_per_bank[i]),
|
||||
.perf_read_misses (perf_read_miss_per_bank[bank_id]),
|
||||
.perf_write_misses (perf_write_miss_per_bank[bank_id]),
|
||||
.perf_mshr_stalls (perf_mshr_stall_per_bank[bank_id]),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_bank_core_req_valid[i]),
|
||||
.core_req_addr (per_bank_core_req_addr[i]),
|
||||
.core_req_rw (per_bank_core_req_rw[i]),
|
||||
.core_req_wsel (per_bank_core_req_wsel[i]),
|
||||
.core_req_byteen (per_bank_core_req_byteen[i]),
|
||||
.core_req_data (per_bank_core_req_data[i]),
|
||||
.core_req_tag (per_bank_core_req_tag[i]),
|
||||
.core_req_idx (per_bank_core_req_idx[i]),
|
||||
.core_req_ready (per_bank_core_req_ready[i]),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_bank_core_rsp_valid[i]),
|
||||
.core_rsp_data (per_bank_core_rsp_data[i]),
|
||||
.core_rsp_tag (per_bank_core_rsp_tag[i]),
|
||||
.core_rsp_idx (per_bank_core_rsp_idx[i]),
|
||||
.core_rsp_ready (per_bank_core_rsp_ready[i]),
|
||||
// Core request
|
||||
.core_req_valid (per_bank_core_req_valid[bank_id]),
|
||||
.core_req_addr (per_bank_core_req_addr[bank_id]),
|
||||
.core_req_rw (per_bank_core_req_rw[bank_id]),
|
||||
.core_req_wsel (per_bank_core_req_wsel[bank_id]),
|
||||
.core_req_byteen (per_bank_core_req_byteen[bank_id]),
|
||||
.core_req_data (per_bank_core_req_data[bank_id]),
|
||||
.core_req_tag (per_bank_core_req_tag[bank_id]),
|
||||
.core_req_idx (per_bank_core_req_idx[bank_id]),
|
||||
.core_req_flush (per_bank_core_req_flush[bank_id]),
|
||||
.core_req_ready (per_bank_core_req_ready[bank_id]),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_bank_core_rsp_valid[bank_id]),
|
||||
.core_rsp_data (per_bank_core_rsp_data[bank_id]),
|
||||
.core_rsp_tag (per_bank_core_rsp_tag[bank_id]),
|
||||
.core_rsp_idx (per_bank_core_rsp_idx[bank_id]),
|
||||
.core_rsp_ready (per_bank_core_rsp_ready[bank_id]),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (per_bank_mem_req_valid[i]),
|
||||
.mem_req_valid (per_bank_mem_req_valid[bank_id]),
|
||||
.mem_req_addr (curr_bank_mem_req_addr),
|
||||
.mem_req_rw (per_bank_mem_req_rw[i]),
|
||||
.mem_req_wsel (per_bank_mem_req_wsel[i]),
|
||||
.mem_req_byteen (per_bank_mem_req_byteen[i]),
|
||||
.mem_req_data (per_bank_mem_req_data[i]),
|
||||
.mem_req_id (per_bank_mem_req_id[i]),
|
||||
.mem_req_ready (per_bank_mem_req_ready[i]),
|
||||
.mem_req_rw (per_bank_mem_req_rw[bank_id]),
|
||||
.mem_req_byteen (per_bank_mem_req_byteen[bank_id]),
|
||||
.mem_req_data (per_bank_mem_req_data[bank_id]),
|
||||
.mem_req_id (per_bank_mem_req_id[bank_id]),
|
||||
.mem_req_flush (per_bank_mem_req_flush[bank_id]),
|
||||
.mem_req_ready (per_bank_mem_req_ready[bank_id]),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (curr_bank_mem_rsp_valid),
|
||||
.mem_rsp_data (mem_rsp_data_s),
|
||||
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
|
||||
.mem_rsp_ready (per_bank_mem_rsp_ready[i]),
|
||||
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
|
||||
|
||||
// initialization
|
||||
.init_enable (init_enable),
|
||||
.init_line_sel (init_line_sel)
|
||||
.flush_begin (per_bank_flush_begin[bank_id]),
|
||||
.flush_end (per_bank_flush_end[bank_id])
|
||||
);
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr;
|
||||
assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr;
|
||||
end else begin
|
||||
assign per_bank_mem_req_addr[i] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, i);
|
||||
assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Bank responses gather
|
||||
|
||||
|
@ -418,7 +451,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.NUM_OUTPUTS (NUM_REQS),
|
||||
.DATAW (CORE_RSP_DATAW)
|
||||
.DATAW (CORE_RSP_DATAW),
|
||||
.ARBITER ("F")
|
||||
) rsp_xbar (
|
||||
.clk (clk),
|
||||
.reset (rsp_xbar_reset),
|
||||
|
@ -442,39 +476,39 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire mem_req_valid_p;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p;
|
||||
wire mem_req_rw_p;
|
||||
wire [WORD_SEL_WIDTH-1:0] mem_req_wsel_p;
|
||||
wire [WORD_SIZE-1:0] mem_req_byteen_p;
|
||||
wire [`CS_WORD_WIDTH-1:0] mem_req_data_p;
|
||||
wire [LINE_SIZE-1:0] mem_req_byteen_p;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data_p;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_p;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id_p;
|
||||
wire mem_req_flush_p;
|
||||
wire mem_req_ready_p;
|
||||
|
||||
// Memory request arbitration
|
||||
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH)-1:0] data_in;
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign data_in[i] = {per_bank_mem_req_addr[i],
|
||||
per_bank_mem_req_rw[i],
|
||||
per_bank_mem_req_wsel[i],
|
||||
per_bank_mem_req_byteen[i],
|
||||
per_bank_mem_req_data[i],
|
||||
per_bank_mem_req_id[i]};
|
||||
assign data_in[i] = {
|
||||
per_bank_mem_req_addr[i],
|
||||
per_bank_mem_req_rw[i],
|
||||
per_bank_mem_req_byteen[i],
|
||||
per_bank_mem_req_data[i],
|
||||
per_bank_mem_req_id[i],
|
||||
per_bank_mem_req_flush[i]
|
||||
};
|
||||
end
|
||||
|
||||
`RESET_RELAY (mem_req_arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.ARBITER ("R")
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1),
|
||||
.ARBITER ("F")
|
||||
) mem_req_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_req_arb_reset),
|
||||
.reset (reset),
|
||||
.valid_in (per_bank_mem_req_valid),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.data_in (data_in),
|
||||
.data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_wsel_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p}),
|
||||
.data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}),
|
||||
.valid_out (mem_req_valid_p),
|
||||
.ready_out (mem_req_ready_p),
|
||||
`UNUSED_PIN (sel_out)
|
||||
|
@ -482,44 +516,28 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
if (NUM_BANKS > 1) begin
|
||||
wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p);
|
||||
assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p});
|
||||
assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p});
|
||||
end else begin
|
||||
assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p);
|
||||
end
|
||||
end
|
||||
|
||||
// Memory request multi-port handling
|
||||
|
||||
assign mem_req_valid_s = mem_req_valid_p;
|
||||
assign mem_req_addr_s = mem_req_addr_p;
|
||||
assign mem_req_tag_s = mem_req_tag_p;
|
||||
assign mem_req_flush_s = mem_req_flush_p;
|
||||
assign mem_req_ready_p = mem_req_ready_s;
|
||||
|
||||
if (WRITE_ENABLE != 0) begin
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
reg [LINE_SIZE-1:0] mem_req_byteen_r;
|
||||
reg [`CS_LINE_WIDTH-1:0] mem_req_data_r;
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen_r = '0;
|
||||
mem_req_data_r = 'x;
|
||||
mem_req_byteen_r[mem_req_wsel_p * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p;
|
||||
mem_req_data_r[mem_req_wsel_p * `CS_WORD_WIDTH +: `CS_WORD_WIDTH] = mem_req_data_p;
|
||||
end
|
||||
assign mem_req_rw_s = mem_req_rw_p;
|
||||
assign mem_req_byteen_s = mem_req_byteen_r;
|
||||
assign mem_req_data_s = mem_req_data_r;
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_wsel_p)
|
||||
assign mem_req_rw_s = mem_req_rw_p;
|
||||
assign mem_req_byteen_s = mem_req_byteen_p;
|
||||
assign mem_req_data_s = mem_req_data_p;
|
||||
end
|
||||
assign mem_req_rw_s = mem_req_rw_p;
|
||||
assign mem_req_byteen_s = mem_req_byteen_p;
|
||||
assign mem_req_data_s = mem_req_data_p;
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_byteen_p)
|
||||
`UNUSED_VAR (mem_req_wsel_p)
|
||||
`UNUSED_VAR (mem_req_data_p)
|
||||
`UNUSED_VAR (mem_req_rw_p)
|
||||
|
||||
|
||||
assign mem_req_rw_s = 0;
|
||||
assign mem_req_byteen_s = {LINE_SIZE{1'b1}};
|
||||
assign mem_req_data_s = '0;
|
||||
|
@ -529,10 +547,10 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
// per cycle: core_reads, core_writes
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
||||
|
||||
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_req;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_req;
|
||||
|
||||
|
||||
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
|
||||
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
|
||||
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
|
||||
|
@ -541,16 +559,16 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
`BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw);
|
||||
`BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw);
|
||||
|
||||
|
||||
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req);
|
||||
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req);
|
||||
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
|
||||
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
|
||||
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
|
||||
|
||||
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_req;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign perf_crsp_stall_per_req[i] = core_bus_if[i].rsp_valid && ~core_bus_if[i].rsp_ready;
|
||||
assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req);
|
||||
|
@ -563,7 +581,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
reg [`PERF_CTR_BITS-1:0] perf_write_misses;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
|
571
hw/rtl/cache/VX_cache_bank.sv
vendored
571
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -18,40 +18,46 @@ module VX_cache_bank #(
|
|||
parameter BANK_ID = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 1,
|
||||
parameter NUM_REQS = 1,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 1,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 1,
|
||||
parameter MSHR_SIZE = 1,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 1,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_REG = 0,
|
||||
// Core response output buffer
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_REG = 0,
|
||||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0,
|
||||
|
||||
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE),
|
||||
parameter REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS),
|
||||
|
@ -66,18 +72,19 @@ module VX_cache_bank #(
|
|||
output wire perf_mshr_stalls,
|
||||
`endif
|
||||
|
||||
// Core Request
|
||||
// Core Request
|
||||
input wire core_req_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire core_req_rw,
|
||||
input wire [WORD_SEL_WIDTH-1:0] core_req_wsel,
|
||||
input wire [WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [`CS_WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag,
|
||||
input wire [REQ_SEL_WIDTH-1:0] core_req_idx,
|
||||
input wire core_req_rw, // write enable
|
||||
input wire [WORD_SEL_WIDTH-1:0] core_req_wsel, // select the word in a cacheline, e.g. word size = 4 bytes, cacheline size = 64 bytes, it should have log(64/4)= 4 bits
|
||||
input wire [WORD_SIZE-1:0] core_req_byteen,// which bytes in data to write
|
||||
input wire [`CS_WORD_WIDTH-1:0] core_req_data, // data to be written
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag, // identifier of the request (request id)
|
||||
input wire [REQ_SEL_WIDTH-1:0] core_req_idx, // index of the request in the core request array
|
||||
input wire core_req_flush, // flush enable
|
||||
output wire core_req_ready,
|
||||
|
||||
// Core Response
|
||||
|
||||
// Core Response
|
||||
output wire core_rsp_valid,
|
||||
output wire [`CS_WORD_WIDTH-1:0] core_rsp_data,
|
||||
output wire [TAG_WIDTH-1:0] core_rsp_tag,
|
||||
|
@ -88,33 +95,36 @@ module VX_cache_bank #(
|
|||
output wire mem_req_valid,
|
||||
output wire [`CS_LINE_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire mem_req_rw,
|
||||
output wire [WORD_SEL_WIDTH-1:0] mem_req_wsel,
|
||||
output wire [WORD_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_WORD_WIDTH-1:0] mem_req_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
|
||||
output wire [LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, // index of the head entry in the mshr
|
||||
output wire mem_req_flush,
|
||||
input wire mem_req_ready,
|
||||
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// initialization
|
||||
input wire init_enable,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] init_line_sel
|
||||
// flush
|
||||
input wire flush_begin,
|
||||
output wire flush_end
|
||||
);
|
||||
|
||||
localparam PIPELINE_STAGES = 2;
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
wire [`UP(UUID_WIDTH)-1:0] req_uuid_sel, req_uuid_st0, req_uuid_st1;
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
wire crsq_stall;
|
||||
wire crsp_queue_stall;
|
||||
wire mshr_alm_full;
|
||||
wire mreq_alm_full;
|
||||
|
||||
wire mreq_queue_empty;
|
||||
wire mreq_queue_alm_full;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
|
||||
|
||||
|
||||
wire replay_valid;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] replay_addr;
|
||||
wire replay_rw;
|
||||
|
@ -125,103 +135,149 @@ module VX_cache_bank #(
|
|||
wire [REQ_SEL_WIDTH-1:0] replay_idx;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] replay_id;
|
||||
wire replay_ready;
|
||||
|
||||
|
||||
wire is_init_st0, is_init_st1;
|
||||
wire is_flush_st0, is_flush_st1;
|
||||
wire [NUM_WAYS-1:0] flush_way_st0;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
|
||||
wire rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] wsel_st0, wsel_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_st0, byteen_st1;
|
||||
wire [REQ_SEL_WIDTH-1:0] req_idx_st0, req_idx_st1;
|
||||
wire [TAG_WIDTH-1:0] tag_st0, tag_st1;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel_st0, line_sel_st1;
|
||||
wire rw_sel, rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
|
||||
wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1;
|
||||
wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1;
|
||||
wire [`CS_WORD_WIDTH-1:0] read_data_st1;
|
||||
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1;
|
||||
wire valid_sel, valid_st0, valid_st1;
|
||||
wire is_init_st0;
|
||||
wire is_creq_st0, is_creq_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire is_replay_st0, is_replay_st1;
|
||||
wire creq_flush_sel, creq_flush_st0, creq_flush_st1;
|
||||
wire evict_dirty_st0, evict_dirty_st1;
|
||||
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_tail_st0, mshr_tail_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1;
|
||||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
wire mshr_empty;
|
||||
|
||||
wire rdw_hazard_st0;
|
||||
reg rdw_hazard_st1;
|
||||
wire flush_valid;
|
||||
wire init_valid;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] flush_sel;
|
||||
wire [NUM_WAYS-1:0] flush_way;
|
||||
wire flush_ready;
|
||||
|
||||
wire pipe_stall = crsq_stall || rdw_hazard_st1;
|
||||
// ensure we have no pending memory request in the bank
|
||||
wire no_pending_req = ~valid_st0 && ~valid_st1 && mreq_queue_empty;
|
||||
|
||||
// flush unit
|
||||
VX_bank_flush #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WRITEBACK (WRITEBACK)
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.flush_begin (flush_begin),
|
||||
.flush_end (flush_end),
|
||||
.flush_init (init_valid),
|
||||
.flush_valid (flush_valid),
|
||||
.flush_line (flush_sel),
|
||||
.flush_way (flush_way),
|
||||
.flush_ready (flush_ready),
|
||||
.mshr_empty (mshr_empty),
|
||||
.bank_empty (no_pending_req)
|
||||
);
|
||||
|
||||
wire rdw_hazard1_sel;
|
||||
wire rdw_hazard2_sel;
|
||||
reg rdw_hazard3_st1;
|
||||
|
||||
wire pipe_stall = crsp_queue_stall || rdw_hazard3_st1;
|
||||
|
||||
// inputs arbitration:
|
||||
// mshr replay has highest priority to maximize utilization since there is no miss.
|
||||
// handle memory responses next to prevent deadlock with potential memory request from a miss.
|
||||
wire replay_grant = ~init_enable;
|
||||
wire replay_enable = replay_grant && replay_valid;
|
||||
// flush has precedence over core requests to ensure that the cache is in a consistent state.
|
||||
wire replay_grant = ~init_valid;
|
||||
wire replay_enable = replay_grant && replay_valid;
|
||||
|
||||
wire fill_grant = ~init_enable && ~replay_enable;
|
||||
wire fill_grant = ~init_valid && ~replay_enable;
|
||||
wire fill_enable = fill_grant && mem_rsp_valid;
|
||||
|
||||
wire creq_grant = ~init_enable && ~replay_enable && ~fill_enable;
|
||||
wire flush_grant = ~init_valid && ~replay_enable && ~fill_enable;
|
||||
wire flush_enable = flush_grant && flush_valid;
|
||||
|
||||
wire creq_grant = ~init_valid && ~replay_enable && ~fill_enable && ~flush_enable;
|
||||
wire creq_enable = creq_grant && core_req_valid;
|
||||
|
||||
assign replay_ready = replay_grant
|
||||
&& ~rdw_hazard_st0
|
||||
&& ~pipe_stall;
|
||||
&& ~rdw_hazard1_sel
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign mem_rsp_ready = fill_grant
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign core_req_ready = creq_grant
|
||||
&& ~mreq_alm_full
|
||||
&& ~mshr_alm_full
|
||||
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||
&& ~rdw_hazard2_sel
|
||||
&& ~pipe_stall;
|
||||
|
||||
wire init_fire = init_enable;
|
||||
wire replay_fire = replay_valid && replay_ready;
|
||||
assign flush_ready = flush_grant
|
||||
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||
&& ~rdw_hazard2_sel
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign core_req_ready = creq_grant
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~mshr_alm_full
|
||||
&& ~pipe_stall;
|
||||
|
||||
wire init_fire = init_valid;
|
||||
wire replay_fire = replay_valid && replay_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
wire flush_fire = flush_valid && flush_ready;
|
||||
wire core_req_fire = core_req_valid && core_req_ready;
|
||||
|
||||
wire [TAG_WIDTH-1:0] mshr_creq_tag = replay_enable ? replay_tag : core_req_tag;
|
||||
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
|
||||
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
|
||||
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
|
||||
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
|
||||
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
|
||||
assign tag_sel = replay_valid ? replay_tag : core_req_tag;
|
||||
assign creq_flush_sel = core_req_valid && core_req_flush;
|
||||
|
||||
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
|
||||
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data);
|
||||
end else begin
|
||||
assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0];
|
||||
`UNUSED_VAR (core_req_data)
|
||||
`UNUSED_VAR (replay_data)
|
||||
end
|
||||
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin
|
||||
assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel
|
||||
end
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign req_uuid_sel = mshr_creq_tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign req_uuid_sel = 0;
|
||||
end
|
||||
|
||||
`UNUSED_VAR (mshr_creq_tag)
|
||||
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || core_req_fire;
|
||||
|
||||
assign addr_sel = init_enable ? `CS_LINE_ADDR_WIDTH'(init_line_sel) :
|
||||
(replay_valid ? replay_addr :
|
||||
(mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
|
||||
assign data_sel[`CS_WORD_WIDTH-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : (replay_valid ? replay_data : core_req_data);
|
||||
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin
|
||||
assign data_sel[i] = mem_rsp_data[i];
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({
|
||||
valid_sel,
|
||||
init_enable,
|
||||
replay_enable,
|
||||
fill_enable,
|
||||
creq_enable,
|
||||
addr_sel,
|
||||
data_sel,
|
||||
replay_valid ? replay_rw : core_req_rw,
|
||||
replay_valid ? replay_byteen : core_req_byteen,
|
||||
replay_valid ? replay_wsel : core_req_wsel,
|
||||
replay_valid ? replay_idx : core_req_idx,
|
||||
replay_valid ? replay_tag : core_req_tag,
|
||||
replay_id
|
||||
}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_creq_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
|
@ -230,59 +286,81 @@ module VX_cache_bank #(
|
|||
assign req_uuid_st0 = 0;
|
||||
end
|
||||
|
||||
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_init_st0 = valid_st0 && is_init_st0;
|
||||
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_init_st0);
|
||||
wire do_flush_st0 = valid_st0 && is_flush_st0;
|
||||
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
|
||||
wire do_creq_wr_st0 = valid_st0 && is_creq_st0 && rw_st0;
|
||||
wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0;
|
||||
wire do_replay_wr_st0 = valid_st0 && is_replay_st0 && rw_st0;
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0;
|
||||
wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0;
|
||||
wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0;
|
||||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
||||
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0, tag_matches_st1;
|
||||
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
||||
assign line_sel_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
`RESET_RELAY (tag_reset, reset);
|
||||
wire [NUM_WAYS-1:0] evict_way_st0;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0;
|
||||
|
||||
VX_cache_tags #(
|
||||
.INSTANCE_ID(INSTANCE_ID),
|
||||
.BANK_ID (BANK_ID),
|
||||
.INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
) cache_tags (
|
||||
.clk (clk),
|
||||
.reset (tag_reset),
|
||||
.reset (reset),
|
||||
|
||||
.req_uuid (req_uuid_st0),
|
||||
|
||||
|
||||
.stall (pipe_stall),
|
||||
|
||||
// read/Fill
|
||||
// init/flush/fill/write/lookup
|
||||
.init (do_init_st0),
|
||||
.flush (do_flush_st0),
|
||||
.fill (do_fill_st0),
|
||||
.write (do_cache_wr_st0),
|
||||
.lookup (do_lookup_st0),
|
||||
.line_addr (addr_st0),
|
||||
.fill (do_fill_st0),
|
||||
.init (do_init_st0),
|
||||
.way_sel (way_sel_st0),
|
||||
.tag_matches(tag_matches_st0)
|
||||
.way_sel (flush_way_st0),
|
||||
.tag_matches(tag_matches_st0),
|
||||
|
||||
// replacement
|
||||
.evict_dirty(evict_dirty_st0),
|
||||
.evict_way (evict_way_st0),
|
||||
.evict_tag (evict_tag_st0)
|
||||
);
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr2_st0;
|
||||
|
||||
wire is_flush2_st0 = WRITEBACK && is_flush_st0;
|
||||
|
||||
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
|
||||
|
||||
assign way_sel_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
|
||||
|
||||
assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + NUM_WAYS + 1),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_st0, is_replay_st0, is_fill_st0, is_creq_st0, rw_st0, addr_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_tail_st0, tag_matches_st0, way_sel_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_replay_st1, is_fill_st1, is_creq_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_tail_st1, tag_matches_st1, way_sel_st1, mshr_pending_st1})
|
||||
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, creq_flush_st0, rw_st0, addr2_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, evict_dirty_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, evict_dirty_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
// we have a tag hit
|
||||
wire is_hit_st1 = (| tag_matches_st1);
|
||||
wire is_hit_st1 = (| way_sel_st1);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
|
@ -290,9 +368,15 @@ module VX_cache_bank #(
|
|||
assign req_uuid_st1 = 0;
|
||||
end
|
||||
|
||||
wire do_creq_rd_st1 = valid_st1 && is_creq_st1 && ~rw_st1;
|
||||
wire do_creq_wr_st1 = valid_st1 && is_creq_st1 && rw_st1;
|
||||
wire is_read_st1 = is_creq_st1 && ~rw_st1;
|
||||
wire is_write_st1 = is_creq_st1 && rw_st1;
|
||||
|
||||
wire do_init_st1 = valid_st1 && is_init_st1;
|
||||
wire do_fill_st1 = valid_st1 && is_fill_st1;
|
||||
wire do_flush_st1 = valid_st1 && is_flush_st1;
|
||||
|
||||
wire do_creq_rd_st1 = valid_st1 && is_read_st1;
|
||||
wire do_creq_wr_st1 = valid_st1 && is_write_st1;
|
||||
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
|
||||
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
|
||||
|
||||
|
@ -302,76 +386,114 @@ module VX_cache_bank #(
|
|||
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
|
||||
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
|
||||
|
||||
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
|
||||
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
|
||||
|
||||
assign line_sel_st1 = addr_st1[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
`UNUSED_VAR (do_write_miss_st1)
|
||||
|
||||
// ensure mshr replay always get a hit
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: invalid mshr replay"));
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("missed mshr replay"));
|
||||
|
||||
// detect BRAM's read-during-write hazard
|
||||
assign rdw_hazard_st0 = do_fill_st0; // after a fill
|
||||
// both tag and data stores use BRAM with no read-during-write protection.
|
||||
// we ned to stall the pipeline to prevent read-after-write hazards.
|
||||
assign rdw_hazard1_sel = do_fill_st0; // stall first replay following a fill
|
||||
assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write
|
||||
always @(posedge clk) begin
|
||||
rdw_hazard_st1 <= (do_creq_rd_st0 && do_write_hit_st1 && (addr_st0 == addr_st1))
|
||||
&& ~rdw_hazard_st1; // after a write to same address
|
||||
// stall reads following writes to same line address
|
||||
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_sel_st0 == line_sel_st1)
|
||||
&& ~rdw_hazard3_st1; // release pipeline stall
|
||||
end
|
||||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0];
|
||||
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
|
||||
wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1;
|
||||
wire [LINE_SIZE-1:0] write_byteen_st1;
|
||||
|
||||
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
|
||||
wire [LINE_SIZE-1:0] dirty_byteen_st1;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
reg [LINE_SIZE-1:0] write_byteen_r;
|
||||
always @(*) begin
|
||||
write_byteen_r = '0;
|
||||
write_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1;
|
||||
end
|
||||
assign write_byteen_st1 = write_byteen_r;
|
||||
end else begin
|
||||
assign write_byteen_st1 = byteen_st1;
|
||||
end
|
||||
|
||||
`RESET_RELAY (data_reset, reset);
|
||||
|
||||
VX_cache_data #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.BANK_ID (BANK_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
) cache_data (
|
||||
.clk (clk),
|
||||
.reset (data_reset),
|
||||
.reset (reset),
|
||||
|
||||
.req_uuid (req_uuid_st1),
|
||||
|
||||
.stall (pipe_stall),
|
||||
|
||||
.read (do_read_hit_st1 || do_replay_rd_st1),
|
||||
.fill (do_fill_st1),
|
||||
.write (do_write_hit_st1 || do_replay_wr_st1),
|
||||
.way_sel (way_sel_st1 | tag_matches_st1),
|
||||
.init (do_init_st1),
|
||||
.read (do_cache_rd_st1),
|
||||
.fill (do_fill_st1),
|
||||
.flush (do_flush_st1),
|
||||
.write (do_cache_wr_st1),
|
||||
.way_sel (way_sel_st1),
|
||||
.line_addr (addr_st1),
|
||||
.wsel (wsel_st1),
|
||||
.byteen (byteen_st1),
|
||||
.fill_data (fill_data_st1),
|
||||
.fill_data (fill_data_st1),
|
||||
.write_data (write_data_st1),
|
||||
.read_data (read_data_st1)
|
||||
.write_byteen(write_byteen_st1),
|
||||
.read_data (read_data_st1),
|
||||
.dirty_data (dirty_data_st1),
|
||||
.dirty_byteen(dirty_byteen_st1)
|
||||
);
|
||||
|
||||
wire [MSHR_SIZE-1:0] mshr_matches_st0;
|
||||
|
||||
wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0;
|
||||
wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0;
|
||||
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall;
|
||||
wire mshr_lookup_st0 = mshr_allocate_st0;
|
||||
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall;
|
||||
wire mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
|
||||
|
||||
VX_pending_size #(
|
||||
// release allocated mshr entry if we had a hit
|
||||
wire mshr_release_st1;
|
||||
if (WRITEBACK) begin
|
||||
assign mshr_release_st1 = is_hit_st1;
|
||||
end else begin
|
||||
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
|
||||
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
|
||||
// this can happen when writes are sent late, when the fill was already in flight.
|
||||
assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
|
||||
end
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (MSHR_SIZE)
|
||||
) mshr_pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (core_req_fire),
|
||||
.decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)),
|
||||
.empty (mshr_empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (mshr_alm_full),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
`RESET_RELAY (mshr_reset, reset);
|
||||
|
||||
VX_cache_mshr #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.BANK_ID (BANK_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
|
@ -379,7 +501,7 @@ module VX_cache_bank #(
|
|||
.DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH)
|
||||
) cache_mshr (
|
||||
.clk (clk),
|
||||
.reset (mshr_reset),
|
||||
.reset (reset),
|
||||
|
||||
.deq_req_uuid (req_uuid_sel),
|
||||
.lkp_req_uuid (req_uuid_st0),
|
||||
|
@ -393,7 +515,7 @@ module VX_cache_bank #(
|
|||
// dequeue
|
||||
.dequeue_valid (replay_valid),
|
||||
.dequeue_addr (replay_addr),
|
||||
.dequeue_rw (replay_rw),
|
||||
.dequeue_rw (replay_rw),
|
||||
.dequeue_data ({replay_wsel, replay_byteen, replay_data, replay_tag, replay_idx}),
|
||||
.dequeue_id (replay_id),
|
||||
.dequeue_ready (replay_ready),
|
||||
|
@ -404,104 +526,128 @@ module VX_cache_bank #(
|
|||
.allocate_rw (rw_st0),
|
||||
.allocate_data ({wsel_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}),
|
||||
.allocate_id (mshr_alloc_id_st0),
|
||||
.allocate_tail (mshr_tail_st0),
|
||||
.allocate_prev (mshr_prev_st0),
|
||||
`UNUSED_PIN (allocate_ready),
|
||||
|
||||
// lookup
|
||||
.lookup_valid (mshr_lookup_st0),
|
||||
.lookup_addr (addr_st0),
|
||||
.lookup_matches (mshr_matches_st0),
|
||||
.lookup_pending (mshr_lookup_pending_st0),
|
||||
.lookup_rw (mshr_lookup_rw_st0),
|
||||
|
||||
// finalize
|
||||
.finalize_valid (mshr_finalize_st1),
|
||||
.finalize_release(mshr_release_st1),
|
||||
.finalize_pending(mshr_pending_st1),
|
||||
.finalize_id (mshr_id_st1),
|
||||
.finalize_tail (mshr_tail_st1)
|
||||
.finalize_prev (mshr_prev_st1)
|
||||
);
|
||||
|
||||
// ignore allocated id from mshr matches
|
||||
// check if there are pending requests to same line in the MSHR
|
||||
wire [MSHR_SIZE-1:0] lookup_matches;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
|
||||
assign lookup_matches[i] = (i != mshr_alloc_id_st0) && mshr_matches_st0[i];
|
||||
assign lookup_matches[i] = mshr_lookup_pending_st0[i]
|
||||
&& (i != mshr_alloc_id_st0) // exclude current mshr id
|
||||
&& (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough
|
||||
end
|
||||
assign mshr_pending_st0 = (| lookup_matches);
|
||||
|
||||
// schedule core response
|
||||
|
||||
wire crsq_valid, crsq_ready;
|
||||
wire [`CS_WORD_WIDTH-1:0] crsq_data;
|
||||
wire [REQ_SEL_WIDTH-1:0] crsq_idx;
|
||||
wire [TAG_WIDTH-1:0] crsq_tag;
|
||||
|
||||
assign crsq_valid = do_read_hit_st1 || do_replay_rd_st1;
|
||||
assign crsq_idx = req_idx_st1;
|
||||
assign crsq_data = read_data_st1;
|
||||
assign crsq_tag = tag_st1;
|
||||
wire crsp_queue_valid, crsp_queue_ready;
|
||||
wire [`CS_WORD_WIDTH-1:0] crsp_queue_data;
|
||||
wire [REQ_SEL_WIDTH-1:0] crsp_queue_idx;
|
||||
wire [TAG_WIDTH-1:0] crsp_queue_tag;
|
||||
|
||||
`RESET_RELAY (crsp_reset, reset);
|
||||
assign crsp_queue_valid = do_cache_rd_st1;
|
||||
assign crsp_queue_idx = req_idx_st1;
|
||||
assign crsp_queue_data = read_data_st1;
|
||||
assign crsp_queue_tag = tag_st1;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (TAG_WIDTH + `CS_WORD_WIDTH + REQ_SEL_WIDTH),
|
||||
.SIZE (CRSQ_SIZE),
|
||||
.OUT_REG (CORE_OUT_REG)
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
) core_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (crsp_reset),
|
||||
.valid_in (crsq_valid && ~rdw_hazard_st1),
|
||||
.ready_in (crsq_ready),
|
||||
.data_in ({crsq_tag, crsq_data, crsq_idx}),
|
||||
.reset (reset),
|
||||
.valid_in (crsp_queue_valid && ~rdw_hazard3_st1),
|
||||
.ready_in (crsp_queue_ready),
|
||||
.data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}),
|
||||
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
|
||||
.valid_out (core_rsp_valid),
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
assign crsq_stall = crsq_valid && ~crsq_ready;
|
||||
assign crsp_queue_stall = crsp_queue_valid && ~crsp_queue_ready;
|
||||
|
||||
// schedule memory request
|
||||
|
||||
wire mreq_push, mreq_pop, mreq_empty;
|
||||
wire [`CS_WORD_WIDTH-1:0] mreq_data;
|
||||
wire [WORD_SIZE-1:0] mreq_byteen;
|
||||
wire [WORD_SEL_WIDTH-1:0] mreq_wsel;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_addr;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mreq_id;
|
||||
wire mreq_rw;
|
||||
wire mreq_queue_push, mreq_queue_pop;
|
||||
wire [`CS_LINE_WIDTH-1:0] mreq_queue_data;
|
||||
wire [LINE_SIZE-1:0] mreq_queue_byteen;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mreq_queue_id;
|
||||
wire mreq_queue_rw;
|
||||
wire mreq_queue_flush;
|
||||
|
||||
assign mreq_push = (do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1;
|
||||
wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1;
|
||||
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
|
||||
wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1;
|
||||
|
||||
assign mreq_pop = mem_req_valid && mem_req_ready;
|
||||
if (WRITEBACK) begin
|
||||
if (DIRTY_BYTES) begin
|
||||
// ensure dirty bytes match the tag info
|
||||
wire has_dirty_bytes = (| dirty_byteen_st1);
|
||||
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)));
|
||||
end
|
||||
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|
||||
|| do_writeback_st1)
|
||||
&& ~rdw_hazard3_st1;
|
||||
end else begin
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1)
|
||||
&& ~rdw_hazard3_st1;
|
||||
end
|
||||
|
||||
assign mreq_rw = WRITE_ENABLE && rw_st1;
|
||||
assign mreq_addr = addr_st1;
|
||||
assign mreq_id = mshr_id_st1;
|
||||
assign mreq_wsel = wsel_st1;
|
||||
assign mreq_byteen = byteen_st1;
|
||||
assign mreq_data = write_data_st1;
|
||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||
assign mreq_queue_addr = addr_st1;
|
||||
assign mreq_queue_id = mshr_id_st1;
|
||||
assign mreq_queue_flush = creq_flush_st1;
|
||||
|
||||
`RESET_RELAY (mreq_reset, reset);
|
||||
if (WRITE_ENABLE) begin
|
||||
assign mreq_queue_rw = WRITEBACK ? is_fill_or_flush_st1 : rw_st1;
|
||||
assign mreq_queue_data = WRITEBACK ? dirty_data_st1 : write_data_st1;
|
||||
assign mreq_queue_byteen = WRITEBACK ? dirty_byteen_st1 : write_byteen_st1;
|
||||
end else begin
|
||||
assign mreq_queue_rw = 0;
|
||||
assign mreq_queue_data = 0;
|
||||
assign mreq_queue_byteen = 0;
|
||||
`UNUSED_VAR (dirty_data_st1)
|
||||
`UNUSED_VAR (dirty_byteen_st1)
|
||||
end
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH),
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1),
|
||||
.DEPTH (MREQ_SIZE),
|
||||
.ALM_FULL (MREQ_SIZE-2),
|
||||
.OUT_REG (MEM_OUT_REG)
|
||||
.ALM_FULL (MREQ_SIZE-PIPELINE_STAGES),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (mreq_reset),
|
||||
.push (mreq_push),
|
||||
.pop (mreq_pop),
|
||||
.data_in ({mreq_rw, mreq_addr, mreq_id, mreq_byteen, mreq_wsel, mreq_data}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_wsel, mem_req_data}),
|
||||
.empty (mreq_empty),
|
||||
.alm_full (mreq_alm_full),
|
||||
.reset (reset),
|
||||
.push (mreq_queue_push),
|
||||
.pop (mreq_queue_pop),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_data, mem_req_flush}),
|
||||
.empty (mreq_queue_empty),
|
||||
.alm_full (mreq_queue_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign mem_req_valid = ~mreq_empty;
|
||||
assign mem_req_valid = ~mreq_queue_empty;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -511,39 +657,38 @@ module VX_cache_bank #(
|
|||
assign perf_mshr_stalls = mshr_alm_full;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_BANK
|
||||
wire crsq_fire = crsq_valid && crsq_ready;
|
||||
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire);
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
|
||||
wire input_stall = (replay_valid || mem_rsp_valid || core_req_valid || flush_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
|
||||
always @(posedge clk) begin
|
||||
if (pipeline_stall) begin
|
||||
`TRACE(3, ("%d: *** %s-bank%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, BANK_ID, crsq_stall, mreq_alm_full, mshr_alm_full));
|
||||
end
|
||||
if (init_enable) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d init: addr=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(init_line_sel, BANK_ID)));
|
||||
if (input_stall || pipe_stall) begin
|
||||
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
|
||||
`TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
|
||||
end
|
||||
if (replay_fire) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
|
||||
`TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
|
||||
end
|
||||
if (core_req_fire) begin
|
||||
if (core_req_rw)
|
||||
`TRACE(2, ("%d: %s-bank%0d core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
|
||||
`TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
|
||||
else
|
||||
`TRACE(2, ("%d: %s-bank%0d core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
|
||||
`TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
|
||||
end
|
||||
if (crsq_fire) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_idx, crsq_data, req_uuid_st1));
|
||||
if (crsp_queue_fire) begin
|
||||
`TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
|
||||
end
|
||||
if (mreq_push) begin
|
||||
if (do_creq_wr_st1)
|
||||
`TRACE(2, ("%d: %s-bank%0d writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_addr, BANK_ID), mreq_byteen, mreq_data, req_uuid_st1));
|
||||
if (mreq_queue_push) begin
|
||||
if (do_creq_wr_st1 && !WRITEBACK)
|
||||
`TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
|
||||
else if (do_writeback_st1)
|
||||
`TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data));
|
||||
else
|
||||
`TRACE(2, ("%d: %s-bank%0d fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_addr, BANK_ID), mreq_id, req_uuid_st1));
|
||||
`TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
411
hw/rtl/cache/VX_cache_bypass.sv
vendored
411
hw/rtl/cache/VX_cache_bypass.sv
vendored
|
@ -1,221 +1,186 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_bypass #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter NC_TAG_BIT = 0,
|
||||
parameter TAG_SEL_IDX = 0,
|
||||
|
||||
parameter NC_ENABLE = 0,
|
||||
parameter PASSTHRU = 0,
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
parameter WORD_SIZE = 1,
|
||||
parameter LINE_SIZE = 1,
|
||||
|
||||
parameter CORE_ADDR_WIDTH = 1,
|
||||
parameter CORE_DATA_SIZE = 1,
|
||||
parameter CORE_TAG_IN_WIDTH = 1,
|
||||
|
||||
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
parameter MEM_ADDR_WIDTH = 1,
|
||||
parameter MEM_DATA_SIZE = 1,
|
||||
parameter MEM_TAG_IN_WIDTH = 1,
|
||||
parameter MEM_TAG_OUT_WIDTH = 1,
|
||||
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
|
||||
parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
|
||||
parameter CORE_TAG_OUT_WIDTH= CORE_TAG_IN_WIDTH - NC_ENABLE
|
||||
) (
|
||||
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
parameter MEM_OUT_BUF = 0,
|
||||
|
||||
parameter CORE_DATA_WIDTH = WORD_SIZE * 8
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Core request in
|
||||
input wire [NUM_REQS-1:0] core_req_valid_in,
|
||||
input wire [NUM_REQS-1:0] core_req_rw_in,
|
||||
input wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_in,
|
||||
input wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_in,
|
||||
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_in,
|
||||
input wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_req_tag_in,
|
||||
output wire [NUM_REQS-1:0] core_req_ready_in,
|
||||
// Core request in
|
||||
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
|
||||
|
||||
// Core request out
|
||||
output wire [NUM_REQS-1:0] core_req_valid_out,
|
||||
output wire [NUM_REQS-1:0] core_req_rw_out,
|
||||
output wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_out,
|
||||
output wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_out,
|
||||
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_out,
|
||||
output wire [NUM_REQS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_req_tag_out,
|
||||
input wire [NUM_REQS-1:0] core_req_ready_out,
|
||||
|
||||
// Core response in
|
||||
input wire [NUM_REQS-1:0] core_rsp_valid_in,
|
||||
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_in,
|
||||
input wire [NUM_REQS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_rsp_tag_in,
|
||||
output wire [NUM_REQS-1:0] core_rsp_ready_in,
|
||||
|
||||
// Core response out
|
||||
output wire [NUM_REQS-1:0] core_rsp_valid_out,
|
||||
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out,
|
||||
output wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out,
|
||||
input wire [NUM_REQS-1:0] core_rsp_ready_out,
|
||||
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
|
||||
|
||||
// Memory request in
|
||||
input wire mem_req_valid_in,
|
||||
input wire mem_req_rw_in,
|
||||
input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in,
|
||||
input wire [MEM_DATA_SIZE-1:0] mem_req_byteen_in,
|
||||
input wire [MEM_DATA_WIDTH-1:0] mem_req_data_in,
|
||||
input wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_in,
|
||||
output wire mem_req_ready_in,
|
||||
VX_mem_bus_if.slave mem_bus_in_if,
|
||||
|
||||
// Memory request out
|
||||
output wire mem_req_valid_out,
|
||||
output wire mem_req_rw_out,
|
||||
output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out,
|
||||
output wire [MEM_DATA_SIZE-1:0] mem_req_byteen_out,
|
||||
output wire [MEM_DATA_WIDTH-1:0] mem_req_data_out,
|
||||
output wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_out,
|
||||
input wire mem_req_ready_out,
|
||||
|
||||
// Memory response in
|
||||
input wire mem_rsp_valid_in,
|
||||
input wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_in,
|
||||
input wire [MEM_TAG_OUT_WIDTH-1:0] mem_rsp_tag_in,
|
||||
output wire mem_rsp_ready_in,
|
||||
|
||||
// Memory response out
|
||||
output wire mem_rsp_valid_out,
|
||||
output wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_out,
|
||||
output wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_out,
|
||||
input wire mem_rsp_ready_out
|
||||
VX_mem_bus_if.master mem_bus_out_if
|
||||
);
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1);
|
||||
|
||||
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
|
||||
localparam MUX_DATAW = CORE_TAG_IN_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1;
|
||||
localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `ADDR_TYPE_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH;
|
||||
|
||||
localparam WORDS_PER_LINE = MEM_DATA_SIZE / CORE_DATA_SIZE;
|
||||
localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE;
|
||||
localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE);
|
||||
|
||||
localparam CORE_TAG_ID_BITS = CORE_TAG_IN_WIDTH - UUID_WIDTH;
|
||||
localparam CORE_TAG_ID_BITS = CORE_TAG_WIDTH - UUID_WIDTH;
|
||||
localparam MEM_TAG_ID_BITS = REQ_SEL_BITS + WSEL_BITS + CORE_TAG_ID_BITS;
|
||||
localparam MEM_TAG_BYPASS_BITS = UUID_WIDTH + MEM_TAG_ID_BITS;
|
||||
|
||||
localparam MEM_TAG_OUT_NC_WIDTH = MEM_TAG_OUT_WIDTH - 1 + NC_ENABLE;
|
||||
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
|
||||
// core request handling
|
||||
// handle core requests ///////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid_in_nc;
|
||||
wire [NUM_REQS-1:0] core_req_nc_idxs;
|
||||
wire core_req_nc_valid;
|
||||
wire [NUM_REQS-1:0] core_req_nc_valids;
|
||||
wire [NUM_REQS-1:0] core_req_nc_idxs;
|
||||
wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire core_req_nc_valid;
|
||||
|
||||
wire core_req_nc_ready;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (PASSTHRU != 0) begin
|
||||
assign core_req_nc_idxs[i] = 1'b1;
|
||||
end else if (NC_ENABLE) begin
|
||||
assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO];
|
||||
end else begin
|
||||
assign core_req_nc_idxs[i] = core_req_tag_in[i][NC_TAG_BIT];
|
||||
assign core_req_nc_idxs[i] = 1'b0;
|
||||
end
|
||||
assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i];
|
||||
end
|
||||
|
||||
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_idxs;
|
||||
|
||||
wire core_req_in_fire = | (core_req_valid_in & core_req_ready_in);
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.TYPE (PASSTHRU ? "R" : "P"),
|
||||
.LOCK_ENABLE (1)
|
||||
) req_arb (
|
||||
.TYPE (PASSTHRU ? "R" : "P")
|
||||
) core_req_nc_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (core_req_in_fire),
|
||||
.requests (core_req_valid_in_nc),
|
||||
.requests (core_req_nc_valids),
|
||||
.grant_index (core_req_nc_idx),
|
||||
.grant_onehot (core_req_nc_sel),
|
||||
.grant_valid (core_req_nc_valid)
|
||||
.grant_valid (core_req_nc_valid),
|
||||
.grant_ready (core_req_nc_ready)
|
||||
);
|
||||
|
||||
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_idxs;
|
||||
assign core_req_rw_out = core_req_rw_in;
|
||||
assign core_req_addr_out = core_req_addr_in;
|
||||
assign core_req_byteen_out = core_req_byteen_in;
|
||||
assign core_req_data_out = core_req_data_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
VX_bits_remove #(
|
||||
.N (CORE_TAG_IN_WIDTH),
|
||||
.S (NC_ENABLE),
|
||||
.POS (NC_TAG_BIT)
|
||||
) core_req_tag_nc_remove (
|
||||
.data_in (core_req_tag_in[i]),
|
||||
.data_out (core_req_tag_out[i])
|
||||
);
|
||||
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i];
|
||||
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
|
||||
assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i])
|
||||
: core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i])
|
||||
: core_req_ready_out[i];
|
||||
end
|
||||
// handle memory requests /////////////////////////////////////////////////
|
||||
|
||||
// memory request handling
|
||||
wire mem_req_out_valid;
|
||||
wire mem_req_out_rw;
|
||||
wire [LINE_SIZE-1:0] mem_req_out_byteen;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_out_addr;
|
||||
wire [`ADDR_TYPE_WIDTH-1:0] mem_req_out_atype;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_out_data;
|
||||
wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag;
|
||||
wire mem_req_out_ready;
|
||||
|
||||
assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid;
|
||||
assign mem_req_ready_in = mem_req_ready_out;
|
||||
|
||||
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
|
||||
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
|
||||
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
|
||||
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
|
||||
wire core_req_rw_in_sel;
|
||||
wire core_req_nc_sel_rw;
|
||||
wire [WORD_SIZE-1:0] core_req_nc_sel_byteen;
|
||||
wire [CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr;
|
||||
wire [`ADDR_TYPE_WIDTH-1:0] core_req_nc_sel_atype;
|
||||
wire [CORE_DATA_WIDTH-1:0] core_req_nc_sel_data;
|
||||
wire [CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag;
|
||||
|
||||
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]};
|
||||
assign core_req_nc_mux_in[i] = {
|
||||
core_bus_in_if[i].req_data.rw,
|
||||
core_bus_in_if[i].req_data.byteen,
|
||||
core_bus_in_if[i].req_data.addr,
|
||||
core_bus_in_if[i].req_data.atype,
|
||||
core_bus_in_if[i].req_data.data,
|
||||
core_bus_in_if[i].req_data.tag
|
||||
};
|
||||
end
|
||||
assign {core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel} = core_req_nc_mux_in[core_req_nc_idx];
|
||||
|
||||
wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_tag_in_sel[CORE_TAG_ID_BITS-1:0];
|
||||
|
||||
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
|
||||
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[WSEL_BITS +: MEM_ADDR_WIDTH];
|
||||
assign {
|
||||
core_req_nc_sel_rw,
|
||||
core_req_nc_sel_byteen,
|
||||
core_req_nc_sel_addr,
|
||||
core_req_nc_sel_atype,
|
||||
core_req_nc_sel_data,
|
||||
core_req_nc_sel_tag
|
||||
} = core_req_nc_mux_in[core_req_nc_idx];
|
||||
|
||||
assign core_req_nc_ready = ~mem_bus_in_if.req_valid && mem_req_out_ready;
|
||||
|
||||
assign mem_req_out_valid = mem_bus_in_if.req_valid || core_req_nc_valid;
|
||||
assign mem_req_out_rw = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.rw : core_req_nc_sel_rw;
|
||||
assign mem_req_out_addr = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.addr : core_req_nc_sel_addr[WSEL_BITS +: MEM_ADDR_WIDTH];
|
||||
assign mem_req_out_atype = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.atype : core_req_nc_sel_atype;
|
||||
|
||||
wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass;
|
||||
|
||||
|
||||
wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0];
|
||||
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
reg [WORDS_PER_LINE-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in_r;
|
||||
reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r;
|
||||
reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
|
||||
|
||||
wire [WSEL_BITS-1:0] req_wsel = core_req_addr_in_sel[WSEL_BITS-1:0];
|
||||
|
||||
wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0];
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = '0;
|
||||
mem_req_byteen_in_r[req_wsel] = core_req_byteen_in_sel;
|
||||
mem_req_byteen_in_r[req_wsel] = core_req_nc_sel_byteen;
|
||||
|
||||
mem_req_data_in_r = 'x;
|
||||
mem_req_data_in_r[req_wsel] = core_req_data_in_sel;
|
||||
mem_req_data_in_r[req_wsel] = core_req_nc_sel_data;
|
||||
end
|
||||
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
|
||||
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : mem_req_data_in_r;
|
||||
assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_r;
|
||||
assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r;
|
||||
if (NUM_REQS > 1) begin
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id});
|
||||
end else begin
|
||||
end else begin
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id});
|
||||
end
|
||||
end else begin
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
|
||||
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : core_req_data_in_sel;
|
||||
assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : core_req_nc_sel_byteen;
|
||||
assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : core_req_nc_sel_data;
|
||||
if (NUM_REQS > 1) begin
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id});
|
||||
end else begin
|
||||
|
@ -223,126 +188,164 @@ module VX_cache_bypass #(
|
|||
end
|
||||
end
|
||||
|
||||
wire [MEM_TAG_OUT_NC_WIDTH-1:0] mem_req_tag_bypass;
|
||||
wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign mem_req_tag_bypass = {core_req_tag_in_sel[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass};
|
||||
assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass};
|
||||
end else begin
|
||||
assign mem_req_tag_bypass = mem_req_tag_id_bypass;
|
||||
end
|
||||
|
||||
wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_bypass_nc;
|
||||
wire [(MEM_TAG_IN_WIDTH + 1)-1:0] mem_req_tag_in_nc;
|
||||
if (PASSTHRU != 0) begin
|
||||
assign mem_req_out_tag = mem_req_tag_bypass;
|
||||
`UNUSED_VAR (mem_bus_in_if.req_data.tag)
|
||||
end else begin
|
||||
if (NC_ENABLE) begin
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_OUT_WIDTH-1),
|
||||
.S (1),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) mem_req_tag_in_nc_insert (
|
||||
.data_in (mem_bus_in_if.req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if.req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)),
|
||||
.ins_in (~mem_bus_in_if.req_valid),
|
||||
.data_out (mem_req_out_tag)
|
||||
);
|
||||
end else begin
|
||||
assign mem_req_out_tag = mem_bus_in_if.req_data.tag;
|
||||
end
|
||||
end
|
||||
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_OUT_NC_WIDTH),
|
||||
.S (NC_ENABLE ? 0 : 1),
|
||||
.POS (NC_TAG_BIT)
|
||||
) mem_req_tag_bypass_nc_insert (
|
||||
.data_in (mem_req_tag_bypass),
|
||||
.sel_in (1'b0),
|
||||
.data_out (mem_req_tag_bypass_nc)
|
||||
);
|
||||
assign mem_bus_in_if.req_ready = mem_req_out_ready;
|
||||
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_IN_WIDTH),
|
||||
.POS (NC_TAG_BIT)
|
||||
) mem_req_tag_in_nc_insert (
|
||||
.data_in (mem_req_tag_in),
|
||||
.sel_in (1'b0),
|
||||
.data_out (mem_req_tag_in_nc)
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH),
|
||||
.SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_out_valid),
|
||||
.ready_in (mem_req_out_ready),
|
||||
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}),
|
||||
.data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.atype, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}),
|
||||
.valid_out (mem_bus_out_if.req_valid),
|
||||
.ready_out (mem_bus_out_if.req_ready)
|
||||
);
|
||||
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : mem_req_tag_bypass_nc;
|
||||
// handle core responses //////////////////////////////////////////////////
|
||||
|
||||
// core response handling
|
||||
|
||||
wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_in_nc;
|
||||
wire [NUM_REQS-1:0] core_rsp_in_valid;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_in_data;
|
||||
wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_in_tag;
|
||||
wire [NUM_REQS-1:0] core_rsp_in_ready;
|
||||
|
||||
wire is_mem_rsp_nc;
|
||||
if (PASSTHRU != 0) begin
|
||||
assign is_mem_rsp_nc = mem_rsp_valid_in;
|
||||
assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid;
|
||||
end else begin
|
||||
assign is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
|
||||
if (NC_ENABLE) begin
|
||||
assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid && mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX];
|
||||
end else begin
|
||||
assign is_mem_rsp_nc = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
VX_bits_insert #(
|
||||
.N (CORE_TAG_OUT_WIDTH),
|
||||
.S (NC_ENABLE),
|
||||
.POS (NC_TAG_BIT)
|
||||
) core_rsp_tag_in_nc_insert (
|
||||
.data_in (core_rsp_tag_in[i]),
|
||||
.sel_in ('0),
|
||||
.data_out (core_rsp_tag_in_nc[i])
|
||||
);
|
||||
end
|
||||
wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc;
|
||||
|
||||
wire [MEM_TAG_OUT_NC_WIDTH-1:0] mem_rsp_tag_in_nc;
|
||||
|
||||
VX_bits_remove #(
|
||||
VX_bits_remove #(
|
||||
.N (MEM_TAG_OUT_WIDTH),
|
||||
.S (NC_ENABLE ? 0 : 1),
|
||||
.POS (NC_TAG_BIT)
|
||||
.S (NC_ENABLE),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) mem_rsp_tag_in_nc_remove (
|
||||
.data_in (mem_rsp_tag_in),
|
||||
.data_out (mem_rsp_tag_in_nc)
|
||||
.data_in (mem_bus_out_if.rsp_data.tag),
|
||||
.data_out (mem_rsp_tag_id_nc)
|
||||
);
|
||||
|
||||
wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx;
|
||||
if (NUM_REQS > 1) begin
|
||||
assign rsp_idx = mem_rsp_tag_in_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS];
|
||||
end else begin
|
||||
assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS];
|
||||
end else begin
|
||||
assign rsp_idx = 1'b0;
|
||||
end
|
||||
|
||||
|
||||
reg [NUM_REQS-1:0] rsp_nc_valid_r;
|
||||
always @(*) begin
|
||||
rsp_nc_valid_r = '0;
|
||||
rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc;
|
||||
end
|
||||
|
||||
assign core_rsp_valid_out = core_rsp_valid_in | rsp_nc_valid_r;
|
||||
assign core_rsp_ready_in = core_rsp_ready_out;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i];
|
||||
assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i];
|
||||
end
|
||||
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_in_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
|
||||
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ?
|
||||
core_rsp_data_in[i] : mem_rsp_data_in[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
|
||||
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ?
|
||||
core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ? core_rsp_data_in[i] : mem_rsp_data_in;
|
||||
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data;
|
||||
end
|
||||
end
|
||||
|
||||
wire [(CORE_TAG_ID_BITS + UUID_WIDTH)-1:0] mem_rsp_tag_in_nc2;
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign mem_rsp_tag_in_nc2 = {mem_rsp_tag_id_nc[(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1 -: UUID_WIDTH], mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]};
|
||||
end else begin
|
||||
assign mem_rsp_tag_in_nc2 = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (PASSTHRU) begin
|
||||
assign core_rsp_in_tag[i] = mem_rsp_tag_in_nc2;
|
||||
end else if (NC_ENABLE) begin
|
||||
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2;
|
||||
end else begin
|
||||
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_in_nc[i] : {mem_rsp_tag_in_nc[MEM_TAG_OUT_NC_WIDTH-1 -: UUID_WIDTH], mem_rsp_tag_in_nc[CORE_TAG_ID_BITS-1:0]};
|
||||
end else begin
|
||||
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_in_nc[i] : mem_rsp_tag_in_nc[CORE_TAG_ID_BITS-1:0];
|
||||
end
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH),
|
||||
.SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
) core_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_in_valid[i]),
|
||||
.ready_in (core_rsp_in_ready[i]),
|
||||
.data_in ({core_rsp_in_data[i], core_rsp_in_tag[i]}),
|
||||
.data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus_in_if[i].rsp_valid),
|
||||
.ready_out (core_bus_in_if[i].rsp_ready)
|
||||
);
|
||||
end
|
||||
|
||||
// memory response handling
|
||||
// handle memory responses ////////////////////////////////////////////////
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
assign mem_rsp_valid_out = 1'b0;
|
||||
assign mem_bus_in_if.rsp_valid = 1'b0;
|
||||
assign mem_bus_in_if.rsp_data.data = '0;
|
||||
assign mem_bus_in_if.rsp_data.tag = '0;
|
||||
end else if (NC_ENABLE) begin
|
||||
assign mem_bus_in_if.rsp_valid = mem_bus_out_if.rsp_valid && ~mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX];
|
||||
assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data;
|
||||
assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc[MEM_TAG_IN_WIDTH-1:0];
|
||||
end else begin
|
||||
assign mem_rsp_valid_out = mem_rsp_valid_in && ~mem_rsp_tag_in[NC_TAG_BIT];
|
||||
assign mem_bus_in_if.rsp_valid = mem_bus_out_if.rsp_valid;
|
||||
assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data;
|
||||
assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc;
|
||||
end
|
||||
|
||||
assign mem_rsp_data_out = mem_rsp_data_in;
|
||||
wire [NUM_REQS-1:0] core_rsp_out_valid;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid;
|
||||
end
|
||||
|
||||
VX_bits_remove #(
|
||||
.N (MEM_TAG_IN_WIDTH + 1),
|
||||
.POS (NC_TAG_BIT)
|
||||
) mem_rsp_tag_out_remove (
|
||||
.data_in (mem_rsp_tag_in[(MEM_TAG_IN_WIDTH + 1)-1:0]),
|
||||
.data_out (mem_rsp_tag_out)
|
||||
);
|
||||
|
||||
assign mem_rsp_ready_in = is_mem_rsp_nc ? (~core_rsp_valid_in[rsp_idx] && core_rsp_ready_out[rsp_idx]) : mem_rsp_ready_out;
|
||||
assign mem_bus_out_if.rsp_ready = is_mem_rsp_nc ? (~core_rsp_out_valid[rsp_idx] && core_rsp_in_ready[rsp_idx]) : mem_bus_in_if.rsp_ready;
|
||||
|
||||
endmodule
|
||||
|
|
67
hw/rtl/cache/VX_cache_cluster.sv
vendored
67
hw/rtl/cache/VX_cache_cluster.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -24,20 +24,20 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 16384,
|
||||
parameter CACHE_SIZE = 16384,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 4,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
|
@ -46,6 +46,12 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -55,12 +61,12 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_REG = 0,
|
||||
// Core response output buffer
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_REG = 0
|
||||
) (
|
||||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -74,18 +80,16 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
);
|
||||
localparam NUM_CACHES = `UP(NUM_UNITS);
|
||||
localparam PASSTHRU = (NUM_UNITS == 0);
|
||||
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
`CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH)) :
|
||||
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
|
||||
`STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter"))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
cache_perf_t perf_cache_tmp[1], perf_cache_unit[NUM_CACHES];
|
||||
`PERF_CACHE_ADD (perf_cache_tmp, perf_cache_unit, 1, NUM_CACHES)
|
||||
assign cache_perf = perf_cache_tmp[0];
|
||||
cache_perf_t perf_cache_unit[NUM_CACHES];
|
||||
`PERF_CACHE_ADD (cache_perf, perf_cache_unit, NUM_CACHES)
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if #(
|
||||
|
@ -98,6 +102,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (ARB_TAG_WIDTH)
|
||||
) arb_core_bus_if[NUM_CACHES * NUM_REQS]();
|
||||
|
||||
`RESET_RELAY_EX (cache_arb_reset, reset, NUM_REQS, `MAX_FANOUT);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
|
@ -113,8 +119,6 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
|
||||
end
|
||||
|
||||
`RESET_RELAY (cache_arb_reset, reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (NUM_INPUTS),
|
||||
.NUM_OUTPUTS (NUM_CACHES),
|
||||
|
@ -122,11 +126,11 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
.ARBITER ("R"),
|
||||
.OUT_REG_REQ ((NUM_INPUTS != NUM_CACHES) ? 2 : 0),
|
||||
.OUT_REG_RSP ((NUM_INPUTS != NUM_CACHES) ? 2 : 0)
|
||||
.REQ_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0),
|
||||
.RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0)
|
||||
) cache_arb (
|
||||
.clk (clk),
|
||||
.reset (cache_arb_reset),
|
||||
.reset (cache_arb_reset[i]),
|
||||
.bus_in_if (core_bus_tmp_if),
|
||||
.bus_out_if (arb_core_bus_tmp_if)
|
||||
);
|
||||
|
@ -136,7 +140,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_CACHES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_CACHES; ++i) begin : caches
|
||||
|
||||
`RESET_RELAY (cache_reset, reset);
|
||||
|
||||
|
@ -153,10 +157,13 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (ARB_TAG_WIDTH),
|
||||
.CORE_OUT_REG ((NUM_INPUTS != NUM_CACHES) ? 2 : CORE_OUT_REG),
|
||||
.MEM_OUT_REG ((NUM_CACHES > 1) ? 2 : MEM_OUT_REG),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
.CORE_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF ((NUM_CACHES > 1) ? 2 : MEM_OUT_BUF),
|
||||
.NC_ENABLE (NC_ENABLE),
|
||||
.PASSTHRU (PASSTHRU)
|
||||
) cache_wrap (
|
||||
|
@ -170,8 +177,6 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
);
|
||||
end
|
||||
|
||||
`RESET_RELAY (mem_arb_reset, reset);
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (MEM_TAG_WIDTH + `ARB_SEL_BITS(NUM_CACHES, 1))
|
||||
|
@ -181,13 +186,13 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.NUM_INPUTS (NUM_CACHES),
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (MEM_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (1), // Skip 0 for NC flag
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
.ARBITER ("R"),
|
||||
.OUT_REG_REQ ((NUM_CACHES > 1) ? 2 : 0),
|
||||
.OUT_REG_RSP ((NUM_CACHES > 1) ? 2 : 0)
|
||||
.REQ_OUT_BUF ((NUM_CACHES > 1) ? 2 : 0),
|
||||
.RSP_OUT_BUF ((NUM_CACHES > 1) ? 2 : 0)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_arb_reset),
|
||||
.reset (reset),
|
||||
.bus_in_if (cache_mem_bus_if),
|
||||
.bus_out_if (mem_bus_tmp_if)
|
||||
);
|
||||
|
|
161
hw/rtl/cache/VX_cache_data.sv
vendored
161
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,17 +17,21 @@ module VX_cache_data #(
|
|||
parameter `STRING INSTANCE_ID= "",
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0
|
||||
) (
|
||||
|
@ -40,60 +44,101 @@ module VX_cache_data #(
|
|||
|
||||
input wire stall,
|
||||
|
||||
input wire init,
|
||||
input wire read,
|
||||
input wire fill,
|
||||
input wire fill,
|
||||
input wire flush,
|
||||
input wire write,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel,
|
||||
input wire [WORD_SIZE-1:0] byteen,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
|
||||
input wire [`CS_WORD_WIDTH-1:0] write_data,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] write_data,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
|
||||
input wire [NUM_WAYS-1:0] way_sel,
|
||||
|
||||
output wire [`CS_WORD_WIDTH-1:0] read_data
|
||||
output wire [`CS_WORD_WIDTH-1:0] read_data,
|
||||
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
|
||||
output wire [LINE_SIZE-1:0] dirty_byteen
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_PARAM (WORD_SIZE)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (stall)
|
||||
`UNUSED_VAR (line_addr)
|
||||
`UNUSED_VAR (init)
|
||||
`UNUSED_VAR (read)
|
||||
`UNUSED_VAR (flush)
|
||||
|
||||
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
|
||||
wire [BYTEENW-1:0] wren;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
|
||||
reg [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata_r;
|
||||
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
|
||||
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
|
||||
|
||||
always @(*) begin
|
||||
wdata_r = {`CS_WORDS_PER_LINE{write_data}};
|
||||
wren_r = '0;
|
||||
wren_r[wsel] = byteen;
|
||||
if (WRITEBACK) begin
|
||||
if (DIRTY_BYTES) begin
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata;
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata;
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]);
|
||||
assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]);
|
||||
end
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (LINE_SIZE * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK)
|
||||
) byteen_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (write || fill || flush),
|
||||
.write (init || write || fill || flush),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.wdata (bs_wdata),
|
||||
.rdata (bs_rdata)
|
||||
);
|
||||
|
||||
assign dirty_byteen = bs_rdata[way_idx];
|
||||
end else begin
|
||||
assign dirty_byteen = {LINE_SIZE{1'b1}};
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last
|
||||
// this allows performing onehot encoding of the way index in parallel with BRAM read.
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
assign wdata[i] = fill ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{wdata_r[i]}};
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign wren_w[i][j] = (fill ? {WORD_SIZE{1'b1}} : wren_r[i])
|
||||
& {WORD_SIZE{((NUM_WAYS == 1) || way_sel[j])}};
|
||||
assign flipped_rdata[j][i] = line_rdata[i][j];
|
||||
end
|
||||
end
|
||||
assign wren = wren_w;
|
||||
assign dirty_data = flipped_rdata[way_idx];
|
||||
end else begin
|
||||
assign dirty_byteen = '0;
|
||||
assign dirty_data = '0;
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last.
|
||||
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||
wire [BYTEENW-1:0] line_wren;
|
||||
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i];
|
||||
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
|
||||
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
|
||||
end
|
||||
end
|
||||
assign line_wren = wren_w;
|
||||
end else begin
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (byteen)
|
||||
`UNUSED_VAR (write_byteen)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign wdata = fill_data;
|
||||
assign wren = fill;
|
||||
assign line_wdata = fill_data;
|
||||
assign line_wren = fill;
|
||||
end
|
||||
|
||||
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (NUM_WAYS)
|
||||
|
@ -103,50 +148,52 @@ module VX_cache_data #(
|
|||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata;
|
||||
wire line_read = (read && ~stall)
|
||||
|| (WRITEBACK && (fill || flush));
|
||||
|
||||
wire line_write = write || fill;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (BYTEENW),
|
||||
.NO_RWCHECK (1)
|
||||
.NO_RWCHECK (1),
|
||||
.RW_ASSERT (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (write || fill),
|
||||
.wren (wren),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.write (line_write),
|
||||
.wren (line_wren),
|
||||
.addr (line_sel),
|
||||
.wdata (wdata),
|
||||
.rdata (rdata)
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
assign per_way_rdata = rdata[wsel];
|
||||
assign per_way_rdata = line_rdata[wsel];
|
||||
end else begin
|
||||
`UNUSED_VAR (wsel)
|
||||
assign per_way_rdata = rdata;
|
||||
end
|
||||
|
||||
assign per_way_rdata = line_rdata;
|
||||
end
|
||||
assign read_data = per_way_rdata[way_idx];
|
||||
|
||||
`UNUSED_VAR (stall)
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_DATA
|
||||
always @(posedge clk) begin
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d data-fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data));
|
||||
end
|
||||
if (read && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d data-read: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, read_data, req_uuid));
|
||||
end
|
||||
`TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid));
|
||||
end
|
||||
if (write && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d data-write: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, byteen, write_data, req_uuid));
|
||||
end
|
||||
end
|
||||
`TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
26
hw/rtl/cache/VX_cache_define.vh
vendored
26
hw/rtl/cache/VX_cache_define.vh
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,7 +14,7 @@
|
|||
`ifndef VX_CACHE_DEFINE_VH
|
||||
`define VX_CACHE_DEFINE_VH
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_define.vh"
|
||||
|
||||
`define CS_REQ_SEL_BITS `CLOG2(NUM_REQS)
|
||||
|
||||
|
@ -50,7 +50,7 @@
|
|||
`define CS_TAG_SEL_ADDR_START (1+`CS_LINE_SEL_ADDR_END)
|
||||
`define CS_TAG_SEL_ADDR_END (`CS_WORD_ADDR_WIDTH-1)
|
||||
|
||||
`define CS_LINE_TAG_ADDR(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS]
|
||||
`define CS_LINE_ADDR_TAG(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -64,14 +64,14 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define PERF_CACHE_ADD(dst, src, dcount, scount) \
|
||||
`PERF_COUNTER_ADD (dst, src, reads, `PERF_CTR_BITS, dcount, scount, (((scount + dcount - 1) / dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, writes, `PERF_CTR_BITS, dcount, scount, (((scount + dcount - 1) / dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, read_misses, `PERF_CTR_BITS, dcount, scount, (((scount + dcount - 1) / dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, write_misses, `PERF_CTR_BITS, dcount, scount, (((scount + dcount - 1) / dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, bank_stalls, `PERF_CTR_BITS, dcount, scount, (((scount + dcount - 1) / dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, mshr_stalls, `PERF_CTR_BITS, dcount, scount, (((scount + dcount - 1) / dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, dcount, scount, (((scount + dcount - 1) / dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, dcount, scount, (((scount + dcount - 1) / dcount) > 1))
|
||||
`define PERF_CACHE_ADD(dst, src, count) \
|
||||
`PERF_COUNTER_ADD (dst, src, reads, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, writes, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, read_misses, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, write_misses, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, bank_stalls, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, mshr_stalls, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, count, (count > 1))
|
||||
|
||||
`endif // VX_CACHE_DEFINE_VH
|
||||
|
|
165
hw/rtl/cache/VX_cache_flush.sv
vendored
Normal file
165
hw/rtl/cache/VX_cache_flush.sv
vendored
Normal file
|
@ -0,0 +1,165 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_flush #(
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Bank select latency
|
||||
parameter BANK_SEL_LATENCY = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
|
||||
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
|
||||
input wire [NUM_BANKS-1:0] bank_req_fire,
|
||||
output wire [NUM_BANKS-1:0] flush_begin,
|
||||
input wire [NUM_BANKS-1:0] flush_end
|
||||
);
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_WAIT1 = 1;
|
||||
localparam STATE_FLUSH = 2;
|
||||
localparam STATE_WAIT2 = 3;
|
||||
localparam STATE_DONE = 4;
|
||||
|
||||
reg [2:0] state, state_n;
|
||||
|
||||
// track in-flight core requests
|
||||
|
||||
wire no_inflight_reqs;
|
||||
|
||||
if (BANK_SEL_LATENCY != 0) begin
|
||||
|
||||
localparam NUM_REQS_W = `CLOG2(NUM_REQS+1);
|
||||
localparam NUM_BANKS_W = `CLOG2(NUM_BANKS+1);
|
||||
|
||||
wire [NUM_REQS-1:0] core_bus_out_fire;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_bus_out_fire[i] = core_bus_out_if[i].req_valid && core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
wire [NUM_REQS_W-1:0] core_bus_out_cnt;
|
||||
wire [NUM_BANKS_W-1:0] bank_req_cnt;
|
||||
|
||||
`POP_COUNT(core_bus_out_cnt, core_bus_out_fire);
|
||||
`POP_COUNT(bank_req_cnt, bank_req_fire);
|
||||
`UNUSED_VAR (core_bus_out_cnt)
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (BANK_SEL_LATENCY * NUM_BANKS),
|
||||
.INCRW (NUM_BANKS_W),
|
||||
.DECRW (NUM_BANKS_W)
|
||||
) pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (NUM_BANKS_W'(core_bus_out_cnt)),
|
||||
.decr (bank_req_cnt),
|
||||
.empty (no_inflight_reqs),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
end else begin
|
||||
assign no_inflight_reqs = 0;
|
||||
`UNUSED_VAR (bank_req_fire)
|
||||
end
|
||||
|
||||
reg [NUM_BANKS-1:0] flush_done, flush_done_n;
|
||||
|
||||
wire [NUM_REQS-1:0] flush_req_mask;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
|
||||
end
|
||||
wire flush_req_enable = (| flush_req_mask);
|
||||
|
||||
reg [NUM_REQS-1:0] lock_released, lock_released_n;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
wire input_enable = ~flush_req_enable || lock_released[i];
|
||||
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && input_enable;
|
||||
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
|
||||
assign core_bus_in_if[i].req_ready = core_bus_out_if[i].req_ready && input_enable;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_bus_in_if[i].rsp_valid = core_bus_out_if[i].rsp_valid;
|
||||
assign core_bus_in_if[i].rsp_data = core_bus_out_if[i].rsp_data;
|
||||
assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] core_bus_out_ready;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
state_n = state;
|
||||
flush_done_n = flush_done;
|
||||
lock_released_n = lock_released;
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (flush_req_enable) begin
|
||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_WAIT1: begin
|
||||
if (no_inflight_reqs) begin
|
||||
state_n = STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
// generate a flush request pulse
|
||||
state_n = STATE_WAIT2;
|
||||
end
|
||||
STATE_WAIT2: begin
|
||||
// wait for all banks to finish flushing
|
||||
flush_done_n = flush_done | flush_end;
|
||||
if (flush_done_n == {NUM_BANKS{1'b1}}) begin
|
||||
state_n = STATE_DONE;
|
||||
flush_done_n = '0;
|
||||
// only release current flush requests
|
||||
// and keep normal requests locked
|
||||
lock_released_n = flush_req_mask;
|
||||
end
|
||||
end
|
||||
STATE_DONE: begin
|
||||
// wait until released flush requests are issued
|
||||
// when returning to IDLE state other requests will unlock
|
||||
lock_released_n = lock_released & ~core_bus_out_ready;
|
||||
if (lock_released_n == 0) begin
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
flush_done <= '0;
|
||||
lock_released <= '0;
|
||||
end else begin
|
||||
state <= state_n;
|
||||
flush_done <= flush_done_n;
|
||||
lock_released <= lock_released_n;
|
||||
end
|
||||
end
|
||||
|
||||
assign flush_begin = {NUM_BANKS{state == STATE_FLUSH}};
|
||||
|
||||
endmodule
|
51
hw/rtl/cache/VX_cache_init.sv
vendored
51
hw/rtl/cache/VX_cache_init.sv
vendored
|
@ -1,51 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_init #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] addr_out,
|
||||
output wire valid_out
|
||||
);
|
||||
reg enabled;
|
||||
reg [`CS_LINE_SEL_BITS-1:0] line_ctr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
enabled <= 1;
|
||||
line_ctr <= '0;
|
||||
end else begin
|
||||
if (enabled) begin
|
||||
if (line_ctr == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
|
||||
enabled <= 0;
|
||||
end
|
||||
line_ctr <= line_ctr + `CS_LINE_SEL_BITS'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign addr_out = line_ctr;
|
||||
assign valid_out = enabled;
|
||||
|
||||
endmodule
|
154
hw/rtl/cache/VX_cache_mshr.sv
vendored
154
hw/rtl/cache/VX_cache_mshr.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,25 +13,53 @@
|
|||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
// this is an implementation of a pipelined multi-banked cache
|
||||
// we allocate a free slot from the MSHR before processing a core request
|
||||
// and release the slot when we get a cache hit.
|
||||
// during a memory fill response we initiate the replay sequence
|
||||
// and dequeue all associated pending entries.
|
||||
|
||||
// This is an implementation of a MSHR for pipelined multi-banked cache.
|
||||
// We allocate a free slot from the MSHR before processing a core request
|
||||
// and release the slot when we get a cache hit. This ensure that we do not
|
||||
// enter the cache bank pipeline when the MSHR is full.
|
||||
// During a memory fill response, we initiate the replay sequence
|
||||
// and dequeue all pending entries for the given cache line.
|
||||
//
|
||||
// Pending core requests stored in the MSHR are sorted by the order of
|
||||
// arrival and are dequeued in the same order.
|
||||
// Each entry has a next pointer to the next entry pending for the same cache line.
|
||||
//
|
||||
// During the fill operation, the MSHR will release the MSHR entry at fill_id
|
||||
// which represents the first request in the pending list that initiated the memory fill.
|
||||
//
|
||||
// The dequeue operation directly follows the fill operation and will release
|
||||
// all the subsequent entries linked to fill_id (pending the same cache line).
|
||||
//
|
||||
// During the allocation operation, the MSHR will allocate the next free slot
|
||||
// for the incoming core request. We return the allocated slot id as well as
|
||||
// the slot id of the previous entry for the same cache line. This is used to
|
||||
// link the new entry to the pending list during finalization.
|
||||
//
|
||||
// The lookup operation is used to find all pending entries for a given cache line.
|
||||
// This is used to by the cache bank to determine if a cache miss is already pending
|
||||
// and therefore avoid issuing a memory fill request.
|
||||
//
|
||||
// The finalize operation is used to release the allocated MSHR entry if we had a hit.
|
||||
// If we had a miss and finalize_pending is true, we link the allocated entry to
|
||||
// its corresponding pending list (via finalize_prev).
|
||||
//
|
||||
// Warning: This MSHR implementation is strongly coupled with the bank pipeline
|
||||
// and as such changes to either module requires careful evaluation.
|
||||
// This implementation makes the following assumptions:
|
||||
// (1) two-cycle pipeline: st0 and st1.
|
||||
// (2) core request flow: st0: allocate / lookup, st1: finalize.
|
||||
// (3) the first dequeue after the fill should happen in st0, when the fill is in st1
|
||||
// this is enforced inside the bank by "rdw_hazard_st0".
|
||||
//
|
||||
// This architecture implements three pipeline stages:
|
||||
// - Arbitration: cache bank arbitration before entering pipeline.
|
||||
// fill and dequeue operations are executed at this stage.
|
||||
// - stage 0: cache bank tag access stage.
|
||||
// allocate and lookup operations are executed at this stage.
|
||||
// - stage 1: cache bank tdatag access stage.
|
||||
// finalize operation is executed at this stage.
|
||||
//
|
||||
|
||||
module VX_cache_mshr #(
|
||||
parameter `STRING INSTANCE_ID= "",
|
||||
parameter BANK_ID = 0,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Miss Reserv Queue Knob
|
||||
|
@ -51,26 +79,12 @@ module VX_cache_mshr #(
|
|||
input wire[`UP(UUID_WIDTH)-1:0] fin_req_uuid,
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
// allocate
|
||||
input wire allocate_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] allocate_addr,
|
||||
input wire allocate_rw,
|
||||
input wire [DATA_WIDTH-1:0] allocate_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] allocate_id,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] allocate_tail,
|
||||
output wire allocate_ready,
|
||||
|
||||
// lookup
|
||||
input wire lookup_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr,
|
||||
output wire [MSHR_SIZE-1:0] lookup_matches,
|
||||
|
||||
// memory fill
|
||||
input wire fill_valid,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] fill_id,
|
||||
output wire [`CS_LINE_ADDR_WIDTH-1:0] fill_addr,
|
||||
|
||||
// dequeue
|
||||
|
||||
// dequeue
|
||||
output wire dequeue_valid,
|
||||
output wire [`CS_LINE_ADDR_WIDTH-1:0] dequeue_addr,
|
||||
output wire dequeue_rw,
|
||||
|
@ -78,30 +92,45 @@ module VX_cache_mshr #(
|
|||
output wire [MSHR_ADDR_WIDTH-1:0] dequeue_id,
|
||||
input wire dequeue_ready,
|
||||
|
||||
// allocate
|
||||
input wire allocate_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] allocate_addr,
|
||||
input wire allocate_rw,
|
||||
input wire [DATA_WIDTH-1:0] allocate_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] allocate_id,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] allocate_prev,
|
||||
output wire allocate_ready,
|
||||
|
||||
// lookup
|
||||
input wire lookup_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr,
|
||||
output wire [MSHR_SIZE-1:0] lookup_pending,
|
||||
output wire [MSHR_SIZE-1:0] lookup_rw,
|
||||
|
||||
// finalize
|
||||
input wire finalize_valid,
|
||||
input wire finalize_release,
|
||||
input wire finalize_pending,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] finalize_id,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] finalize_tail
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] finalize_prev
|
||||
);
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
|
||||
reg [`CS_LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0];
|
||||
reg [MSHR_ADDR_WIDTH-1:0] next_index [MSHR_SIZE-1:0];
|
||||
|
||||
reg [MSHR_SIZE-1:0] valid_table, valid_table_n;
|
||||
reg [MSHR_SIZE-1:0] next_table, next_table_x, next_table_n;
|
||||
reg [MSHR_SIZE-1:0] write_table;
|
||||
|
||||
|
||||
reg allocate_rdy, allocate_rdy_n;
|
||||
reg [MSHR_ADDR_WIDTH-1:0] allocate_id_r, allocate_id_n;
|
||||
|
||||
|
||||
reg dequeue_val, dequeue_val_n;
|
||||
reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n;
|
||||
|
||||
wire [MSHR_ADDR_WIDTH-1:0] tail_idx;
|
||||
|
||||
wire [MSHR_ADDR_WIDTH-1:0] prev_idx;
|
||||
|
||||
wire allocate_fire = allocate_valid && allocate_ready;
|
||||
wire dequeue_fire = dequeue_valid && dequeue_ready;
|
||||
|
||||
|
@ -121,9 +150,9 @@ module VX_cache_mshr #(
|
|||
|
||||
VX_onehot_encoder #(
|
||||
.N (MSHR_SIZE)
|
||||
) tail_sel (
|
||||
) prev_sel (
|
||||
.data_in (addr_matches & ~next_table_x),
|
||||
.data_out (tail_idx),
|
||||
.data_out (prev_idx),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
|
@ -152,7 +181,7 @@ module VX_cache_mshr #(
|
|||
valid_table_n[finalize_id] = 0;
|
||||
end
|
||||
if (finalize_pending) begin
|
||||
next_table_x[finalize_tail] = 1;
|
||||
next_table_x[finalize_prev] = 1;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -180,21 +209,21 @@ module VX_cache_mshr #(
|
|||
end
|
||||
|
||||
if (finalize_valid && finalize_pending) begin
|
||||
next_index[finalize_tail] <= finalize_id;
|
||||
next_index[finalize_prev] <= finalize_id;
|
||||
end
|
||||
|
||||
dequeue_id_r <= dequeue_id_n;
|
||||
allocate_id_r <= allocate_id_n;
|
||||
next_table <= next_table_n;
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s-bank%0d inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID,
|
||||
|
||||
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid))
|
||||
|
||||
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s-bank%0d invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID,
|
||||
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
|
||||
|
||||
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s-bank%0d invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, BANK_ID,
|
||||
|
||||
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
|
||||
|
||||
VX_dp_ram #(
|
||||
|
@ -203,10 +232,11 @@ module VX_cache_mshr #(
|
|||
.LUTRAM (1)
|
||||
) entries (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (allocate_valid),
|
||||
`UNUSED_PIN (wren),
|
||||
.waddr (allocate_id_r),
|
||||
.wren (1'b1),
|
||||
.waddr (allocate_id_r),
|
||||
.wdata (allocate_data),
|
||||
.raddr (dequeue_id_r),
|
||||
.rdata (dequeue_data)
|
||||
|
@ -216,18 +246,20 @@ module VX_cache_mshr #(
|
|||
|
||||
assign allocate_ready = allocate_rdy;
|
||||
assign allocate_id = allocate_id_r;
|
||||
assign allocate_tail = tail_idx;
|
||||
assign allocate_prev = prev_idx;
|
||||
|
||||
assign dequeue_valid = dequeue_val;
|
||||
assign dequeue_addr = addr_table[dequeue_id_r];
|
||||
assign dequeue_rw = write_table[dequeue_id_r];
|
||||
assign dequeue_id = dequeue_id_r;
|
||||
|
||||
assign lookup_matches = addr_matches & ~write_table;
|
||||
// return pending entries for the given cache line
|
||||
assign lookup_pending = addr_matches;
|
||||
assign lookup_rw = write_table;
|
||||
|
||||
`UNUSED_VAR (lookup_valid)
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_MSHR
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
reg show_table;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -236,22 +268,22 @@ module VX_cache_mshr #(
|
|||
show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire;
|
||||
end
|
||||
if (allocate_fire)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-allocate: addr=0x%0h, tail=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_tail, allocate_id, lkp_req_uuid));
|
||||
`TRACE(3, ("%d: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid));
|
||||
if (lookup_valid)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_matches, lkp_req_uuid));
|
||||
`TRACE(3, ("%d: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid));
|
||||
if (finalize_valid)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-finalize release=%b, pending=%b, tail=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
finalize_release, finalize_pending, finalize_tail, finalize_id, fin_req_uuid));
|
||||
`TRACE(3, ("%d: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid));
|
||||
if (fill_valid)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id));
|
||||
if (dequeue_fire)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`TRACE(3, ("%d: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid));
|
||||
if (show_table) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-table", $time, INSTANCE_ID, BANK_ID));
|
||||
`TRACE(3, ("%d: %s table", $time, INSTANCE_ID));
|
||||
for (integer i = 0; i < MSHR_SIZE; ++i) begin
|
||||
if (valid_table[i]) begin
|
||||
`TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID)));
|
||||
|
@ -264,7 +296,7 @@ module VX_cache_mshr #(
|
|||
end
|
||||
end
|
||||
`TRACE(3, ("\n"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
|
|
142
hw/rtl/cache/VX_cache_tags.sv
vendored
142
hw/rtl/cache/VX_cache_tags.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,15 +17,17 @@ module VX_cache_tags #(
|
|||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
parameter WORD_SIZE = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0
|
||||
) (
|
||||
|
@ -38,79 +40,137 @@ module VX_cache_tags #(
|
|||
|
||||
input wire stall,
|
||||
|
||||
// read/fill
|
||||
// init/fill/lookup
|
||||
input wire init,
|
||||
input wire flush,
|
||||
input wire fill,
|
||||
input wire write,
|
||||
input wire lookup,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire fill,
|
||||
input wire init,
|
||||
output wire [NUM_WAYS-1:0] way_sel,
|
||||
output wire [NUM_WAYS-1:0] tag_matches
|
||||
input wire [NUM_WAYS-1:0] way_sel,
|
||||
output wire [NUM_WAYS-1:0] tag_matches,
|
||||
|
||||
// eviction
|
||||
output wire evict_dirty,
|
||||
output wire [NUM_WAYS-1:0] evict_way,
|
||||
output wire [`CS_TAG_SEL_BITS-1:0] evict_tag
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (lookup)
|
||||
|
||||
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS;
|
||||
// valid, dirty, tag
|
||||
localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_TAG_ADDR(line_addr);
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||
wire [NUM_WAYS-1:0] read_valid;
|
||||
wire [NUM_WAYS-1:0] read_dirty;
|
||||
|
||||
if (NUM_WAYS > 1) begin
|
||||
reg [NUM_WAYS-1:0] repl_way;
|
||||
reg [NUM_WAYS-1:0] evict_way_r;
|
||||
// cyclic assignment of replacement way
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
repl_way <= 1;
|
||||
end else if (~stall) begin // hold the value on stalls prevent filling different slots twice
|
||||
repl_way <= {repl_way[NUM_WAYS-2:0], repl_way[NUM_WAYS-1]};
|
||||
evict_way_r <= 1;
|
||||
end else if (~stall) begin // holding the value on stalls prevents filling different slots twice
|
||||
evict_way_r <= {evict_way_r[NUM_WAYS-2:0], evict_way_r[NUM_WAYS-1]};
|
||||
end
|
||||
end
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
assign way_sel[i] = fill && repl_way[i];
|
||||
end
|
||||
|
||||
assign evict_way = fill ? evict_way_r : way_sel;
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (`CS_TAG_SEL_BITS),
|
||||
.N (NUM_WAYS)
|
||||
) evict_tag_sel (
|
||||
.data_in (read_tag),
|
||||
.sel_in (evict_way),
|
||||
.data_out (evict_tag)
|
||||
);
|
||||
end else begin
|
||||
`UNUSED_VAR (stall)
|
||||
assign way_sel = fill;
|
||||
assign evict_way = 1'b1;
|
||||
assign evict_tag = read_tag;
|
||||
end
|
||||
|
||||
// fill and flush need to also read in writeback mode
|
||||
wire fill_s = fill && (!WRITEBACK || ~stall);
|
||||
wire flush_s = flush && (!WRITEBACK || ~stall);
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
wire [`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||
wire read_valid;
|
||||
|
||||
wire do_fill = fill_s && evict_way[i];
|
||||
wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode
|
||||
wire do_write = WRITEBACK && write && tag_matches[i];
|
||||
|
||||
wire line_read = (WRITEBACK && (fill_s || flush_s));
|
||||
wire line_write = init || do_fill || do_flush || do_write;
|
||||
wire line_valid = ~(init || flush);
|
||||
|
||||
wire [TAG_WIDTH-1:0] line_wdata;
|
||||
wire [TAG_WIDTH-1:0] line_rdata;
|
||||
|
||||
if (WRITEBACK) begin
|
||||
assign line_wdata = {line_valid, write, line_tag};
|
||||
assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata;
|
||||
end else begin
|
||||
assign line_wdata = {line_valid, line_tag};
|
||||
assign {read_valid[i], read_tag[i]} = line_rdata;
|
||||
assign read_dirty[i] = 1'b0;
|
||||
end
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (TAG_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.NO_RWCHECK (1)
|
||||
.NO_RWCHECK (1),
|
||||
.RW_ASSERT (1)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (way_sel[i] || init),
|
||||
`UNUSED_PIN (wren),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.write (line_write),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.wdata ({~init, line_tag}),
|
||||
.rdata ({read_valid, read_tag})
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
|
||||
assign tag_matches[i] = read_valid && (line_tag == read_tag);
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_TAG
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
|
||||
end
|
||||
|
||||
assign evict_dirty = | (read_dirty & evict_way);
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel};
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag));
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)));
|
||||
end
|
||||
if (init) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
|
||||
`TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty));
|
||||
end
|
||||
if (lookup && ~stall) begin
|
||||
if (tag_matches != 0) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag, req_uuid));
|
||||
if (write)
|
||||
`TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
else
|
||||
`TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
end else begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
if (write)
|
||||
`TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
else
|
||||
`TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
61
hw/rtl/cache/VX_cache_top.sv
vendored
61
hw/rtl/cache/VX_cache_top.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -20,20 +20,20 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 16384,
|
||||
parameter CACHE_SIZE = 16384,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 4,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 4,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 16,
|
||||
parameter MSHR_SIZE = 16,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
|
@ -42,20 +42,26 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = 16,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_REG = 2,
|
||||
// Core response output buffer
|
||||
parameter CORE_OUT_BUF = 2,
|
||||
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_REG = 2,
|
||||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 2,
|
||||
|
||||
parameter MEM_TAG_WIDTH = `CLOG2(MSHR_SIZE) + `CLOG2(NUM_BANKS)
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -69,6 +75,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
input wire [NUM_REQS-1:0] core_req_rw,
|
||||
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] core_req_atype,
|
||||
input wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire [NUM_REQS-1:0] core_req_ready,
|
||||
|
@ -81,17 +88,17 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire mem_req_rw,
|
||||
output wire [LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready
|
||||
);
|
||||
VX_mem_bus_if #(
|
||||
|
@ -110,6 +117,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
assign core_bus_if[i].req_data.rw = core_req_rw[i];
|
||||
assign core_bus_if[i].req_data.byteen = core_req_byteen[i];
|
||||
assign core_bus_if[i].req_data.addr = core_req_addr[i];
|
||||
assign core_bus_if[i].req_data.atype = core_req_atype[i];
|
||||
assign core_bus_if[i].req_data.data = core_req_data[i];
|
||||
assign core_bus_if[i].req_data.tag = core_req_tag[i];
|
||||
assign core_req_ready[i] = core_bus_if[i].req_ready;
|
||||
|
@ -125,17 +133,18 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
|
||||
// Memory request
|
||||
assign mem_req_valid = mem_bus_if.req_valid;
|
||||
assign mem_req_rw = mem_bus_if.req_data.rw;
|
||||
assign mem_req_rw = mem_bus_if.req_data.rw;
|
||||
assign mem_req_byteen = mem_bus_if.req_data.byteen;
|
||||
assign mem_req_addr = mem_bus_if.req_data.addr;
|
||||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
|
||||
`UNUSED_VAR (mem_bus_if.req_data.atype)
|
||||
|
||||
// Memory response
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_data.data = mem_rsp_data;
|
||||
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
|
||||
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_bus_if.rsp_ready;
|
||||
|
||||
VX_cache #(
|
||||
|
@ -153,8 +162,10 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.CORE_OUT_REG (CORE_OUT_REG),
|
||||
.MEM_OUT_REG (MEM_OUT_REG)
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.CORE_OUT_BUF (CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (MEM_OUT_BUF)
|
||||
) cache (
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (cache_perf),
|
||||
|
|
406
hw/rtl/cache/VX_cache_wrap.sv
vendored
406
hw/rtl/cache/VX_cache_wrap.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,24 +16,27 @@
|
|||
module VX_cache_wrap import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
|
||||
parameter TAG_SEL_IDX = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 4096,
|
||||
parameter CACHE_SIZE = 4096,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
|
@ -42,6 +45,12 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -49,19 +58,18 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_TAG_BIT = 0,
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
// Force bypass for all requests
|
||||
parameter PASSTHRU = 0,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_REG = 0,
|
||||
// Core response output buffer
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_REG = 0
|
||||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0
|
||||
) (
|
||||
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -74,283 +82,91 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
VX_mem_bus_if.master mem_bus_if
|
||||
);
|
||||
|
||||
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid parameter: NUM_BANKS=%d, NUM_REQS=%d", NUM_BANKS, NUM_REQS))
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam CORE_TAG_X_WIDTH = TAG_WIDTH - NC_ENABLE;
|
||||
localparam MEM_TAG_X_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
`CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH)) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
|
||||
localparam NC_BYPASS = (NC_ENABLE || PASSTHRU);
|
||||
localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1);
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid;
|
||||
wire [NUM_REQS-1:0] core_req_rw;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
||||
wire [NUM_REQS-1:0] core_req_ready;
|
||||
localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_valid[i] = core_bus_if[i].req_valid;
|
||||
assign core_req_rw[i] = core_bus_if[i].req_data.rw;
|
||||
assign core_req_addr[i] = core_bus_if[i].req_data.addr;
|
||||
assign core_req_byteen[i] = core_bus_if[i].req_data.byteen;
|
||||
assign core_req_data[i] = core_bus_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus_if[i].req_data.tag;
|
||||
assign core_bus_if[i].req_ready = core_req_ready[i];
|
||||
end
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) core_bus_cache_if[NUM_REQS]();
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (CACHE_MEM_TAG_WIDTH)
|
||||
) mem_bus_cache_if();
|
||||
|
||||
// Core response buffering
|
||||
wire [NUM_REQS-1:0] core_rsp_valid_s;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_s;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_s;
|
||||
if (NC_OR_BYPASS) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
||||
`RESET_RELAY (core_rsp_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
|
||||
.SIZE ((NC_BYPASS && !DIRECT_PASSTHRU) ? `OUT_REG_TO_EB_SIZE(CORE_OUT_REG) : 0),
|
||||
.OUT_REG (`OUT_REG_TO_EB_REG(CORE_OUT_REG))
|
||||
) core_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (core_rsp_reset),
|
||||
.valid_in (core_rsp_valid_s[i]),
|
||||
.ready_in (core_rsp_ready_s[i]),
|
||||
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
|
||||
.data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus_if[i].rsp_valid),
|
||||
.ready_out (core_bus_if[i].rsp_ready)
|
||||
);
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Memory request buffering
|
||||
wire mem_req_valid_s;
|
||||
wire mem_req_rw_s;
|
||||
wire [LINE_SIZE-1:0] mem_req_byteen_s;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
|
||||
wire mem_req_ready_s;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH),
|
||||
.SIZE ((NC_BYPASS && !DIRECT_PASSTHRU) ? `OUT_REG_TO_EB_SIZE(MEM_OUT_REG) : 0),
|
||||
.OUT_REG (`OUT_REG_TO_EB_REG(MEM_OUT_REG))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_valid_s),
|
||||
.ready_in (mem_req_ready_s),
|
||||
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s}),
|
||||
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}),
|
||||
.valid_out (mem_bus_if.req_valid),
|
||||
.ready_out (mem_bus_if.req_ready)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Core request
|
||||
wire [NUM_REQS-1:0] core_req_valid_b;
|
||||
wire [NUM_REQS-1:0] core_req_rw_b;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr_b;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_b;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data_b;
|
||||
wire [NUM_REQS-1:0][CORE_TAG_X_WIDTH-1:0] core_req_tag_b;
|
||||
wire [NUM_REQS-1:0] core_req_ready_b;
|
||||
|
||||
// Core response
|
||||
wire [NUM_REQS-1:0] core_rsp_valid_b;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_b;
|
||||
wire [NUM_REQS-1:0][CORE_TAG_X_WIDTH-1:0] core_rsp_tag_b;
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_b;
|
||||
|
||||
// Memory request
|
||||
wire mem_req_valid_b;
|
||||
wire mem_req_rw_b;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
|
||||
wire [LINE_SIZE-1:0] mem_req_byteen_b;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data_b;
|
||||
wire [MEM_TAG_X_WIDTH-1:0] mem_req_tag_b;
|
||||
wire mem_req_ready_b;
|
||||
|
||||
// Memory response
|
||||
wire mem_rsp_valid_b;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_b;
|
||||
wire [MEM_TAG_X_WIDTH-1:0] mem_rsp_tag_b;
|
||||
wire mem_rsp_ready_b;
|
||||
|
||||
if (NC_BYPASS) begin
|
||||
|
||||
`RESET_RELAY (nc_bypass_reset, reset);
|
||||
|
||||
VX_cache_bypass #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NC_TAG_BIT (NC_TAG_BIT),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
|
||||
.NC_ENABLE (NC_ENABLE),
|
||||
.PASSTHRU (PASSTHRU),
|
||||
.NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE),
|
||||
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
|
||||
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
|
||||
.CORE_DATA_SIZE (WORD_SIZE),
|
||||
.CORE_TAG_IN_WIDTH (TAG_WIDTH),
|
||||
|
||||
.CORE_TAG_WIDTH (TAG_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
|
||||
.MEM_DATA_SIZE (LINE_SIZE),
|
||||
.MEM_TAG_IN_WIDTH (MEM_TAG_X_WIDTH),
|
||||
.MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH),
|
||||
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH),
|
||||
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
|
||||
.CORE_OUT_BUF (CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (MEM_OUT_BUF)
|
||||
) cache_bypass (
|
||||
.clk (clk),
|
||||
.reset (nc_bypass_reset),
|
||||
.clk (clk),
|
||||
.reset (nc_bypass_reset),
|
||||
|
||||
// Core request in
|
||||
.core_req_valid_in (core_req_valid),
|
||||
.core_req_rw_in (core_req_rw),
|
||||
.core_req_byteen_in (core_req_byteen),
|
||||
.core_req_addr_in (core_req_addr),
|
||||
.core_req_data_in (core_req_data),
|
||||
.core_req_tag_in (core_req_tag),
|
||||
.core_req_ready_in (core_req_ready),
|
||||
.core_bus_in_if (core_bus_if),
|
||||
.core_bus_out_if(core_bus_cache_if),
|
||||
|
||||
// Core request out
|
||||
.core_req_valid_out (core_req_valid_b),
|
||||
.core_req_rw_out (core_req_rw_b),
|
||||
.core_req_byteen_out(core_req_byteen_b),
|
||||
.core_req_addr_out (core_req_addr_b),
|
||||
.core_req_data_out (core_req_data_b),
|
||||
.core_req_tag_out (core_req_tag_b),
|
||||
.core_req_ready_out (core_req_ready_b),
|
||||
|
||||
// Core response in
|
||||
.core_rsp_valid_in (core_rsp_valid_b),
|
||||
.core_rsp_data_in (core_rsp_data_b),
|
||||
.core_rsp_tag_in (core_rsp_tag_b),
|
||||
.core_rsp_ready_in (core_rsp_ready_b),
|
||||
|
||||
// Core response out
|
||||
.core_rsp_valid_out (core_rsp_valid_s),
|
||||
.core_rsp_data_out (core_rsp_data_s),
|
||||
.core_rsp_tag_out (core_rsp_tag_s),
|
||||
.core_rsp_ready_out (core_rsp_ready_s),
|
||||
|
||||
// Memory request in
|
||||
.mem_req_valid_in (mem_req_valid_b),
|
||||
.mem_req_rw_in (mem_req_rw_b),
|
||||
.mem_req_addr_in (mem_req_addr_b),
|
||||
.mem_req_byteen_in (mem_req_byteen_b),
|
||||
.mem_req_data_in (mem_req_data_b),
|
||||
.mem_req_tag_in (mem_req_tag_b),
|
||||
.mem_req_ready_in (mem_req_ready_b),
|
||||
|
||||
// Memory request out
|
||||
.mem_req_valid_out (mem_req_valid_s),
|
||||
.mem_req_addr_out (mem_req_addr_s),
|
||||
.mem_req_rw_out (mem_req_rw_s),
|
||||
.mem_req_byteen_out (mem_req_byteen_s),
|
||||
.mem_req_data_out (mem_req_data_s),
|
||||
.mem_req_tag_out (mem_req_tag_s),
|
||||
.mem_req_ready_out (mem_req_ready_s),
|
||||
|
||||
// Memory response in
|
||||
.mem_rsp_valid_in (mem_bus_if.rsp_valid),
|
||||
.mem_rsp_data_in (mem_bus_if.rsp_data.data),
|
||||
.mem_rsp_tag_in (mem_bus_if.rsp_data.tag),
|
||||
.mem_rsp_ready_in (mem_bus_if.rsp_ready),
|
||||
|
||||
// Memory response out
|
||||
.mem_rsp_valid_out (mem_rsp_valid_b),
|
||||
.mem_rsp_data_out (mem_rsp_data_b),
|
||||
.mem_rsp_tag_out (mem_rsp_tag_b),
|
||||
.mem_rsp_ready_out (mem_rsp_ready_b)
|
||||
);
|
||||
end else begin
|
||||
assign core_req_valid_b = core_req_valid;
|
||||
assign core_req_rw_b = core_req_rw;
|
||||
assign core_req_addr_b = core_req_addr;
|
||||
assign core_req_byteen_b= core_req_byteen;
|
||||
assign core_req_data_b = core_req_data;
|
||||
assign core_req_tag_b = core_req_tag;
|
||||
assign core_req_ready = core_req_ready_b;
|
||||
|
||||
assign core_rsp_valid_s = core_rsp_valid_b;
|
||||
assign core_rsp_data_s = core_rsp_data_b;
|
||||
assign core_rsp_tag_s = core_rsp_tag_b;
|
||||
assign core_rsp_ready_b = core_rsp_ready_s;
|
||||
|
||||
assign mem_req_valid_s = mem_req_valid_b;
|
||||
assign mem_req_addr_s = mem_req_addr_b;
|
||||
assign mem_req_rw_s = mem_req_rw_b;
|
||||
assign mem_req_byteen_s = mem_req_byteen_b;
|
||||
assign mem_req_data_s = mem_req_data_b;
|
||||
assign mem_req_ready_b = mem_req_ready_s;
|
||||
|
||||
// Add explicit NC=0 flag to the memory request tag
|
||||
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_WIDTH-1),
|
||||
.POS (NC_TAG_BIT)
|
||||
) mem_req_tag_insert (
|
||||
.data_in (mem_req_tag_b),
|
||||
.sel_in (1'b0),
|
||||
.data_out (mem_req_tag_s)
|
||||
.mem_bus_in_if (mem_bus_cache_if),
|
||||
.mem_bus_out_if (mem_bus_if)
|
||||
);
|
||||
|
||||
assign mem_rsp_valid_b = mem_bus_if.rsp_valid;
|
||||
assign mem_rsp_data_b = mem_bus_if.rsp_data.data;
|
||||
assign mem_bus_if.rsp_ready = mem_rsp_ready_b;
|
||||
end else begin
|
||||
|
||||
// Remove NC flag from the memory response tag
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
|
||||
end
|
||||
|
||||
VX_bits_remove #(
|
||||
.N (MEM_TAG_WIDTH),
|
||||
.POS (NC_TAG_BIT)
|
||||
) mem_rsp_tag_remove (
|
||||
.data_in (mem_bus_if.rsp_data.tag),
|
||||
.data_out (mem_rsp_tag_b)
|
||||
);
|
||||
end
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if);
|
||||
end
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
|
||||
`UNUSED_VAR (core_req_valid_b)
|
||||
`UNUSED_VAR (core_req_rw_b)
|
||||
`UNUSED_VAR (core_req_addr_b)
|
||||
`UNUSED_VAR (core_req_byteen_b)
|
||||
`UNUSED_VAR (core_req_data_b)
|
||||
`UNUSED_VAR (core_req_tag_b)
|
||||
assign core_req_ready_b = '0;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_valid)
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_data)
|
||||
assign core_bus_cache_if[i].req_ready = 0;
|
||||
|
||||
assign core_rsp_valid_b = '0;
|
||||
assign core_rsp_data_b = '0;
|
||||
assign core_rsp_tag_b = '0;
|
||||
`UNUSED_VAR (core_rsp_ready_b)
|
||||
assign core_bus_cache_if[i].rsp_valid = 0;
|
||||
assign core_bus_cache_if[i].rsp_data = '0;
|
||||
`UNUSED_VAR (core_bus_cache_if[i].rsp_ready)
|
||||
end
|
||||
|
||||
assign mem_req_valid_b = 0;
|
||||
assign mem_req_addr_b = '0;
|
||||
assign mem_req_rw_b = '0;
|
||||
assign mem_req_byteen_b = '0;
|
||||
assign mem_req_data_b = '0;
|
||||
assign mem_req_tag_b = '0;
|
||||
`UNUSED_VAR (mem_req_ready_b)
|
||||
assign mem_bus_cache_if.req_valid = 0;
|
||||
assign mem_bus_cache_if.req_data = '0;
|
||||
`UNUSED_VAR (mem_bus_cache_if.req_ready)
|
||||
|
||||
`UNUSED_VAR (mem_rsp_valid_b)
|
||||
`UNUSED_VAR (mem_rsp_data_b)
|
||||
`UNUSED_VAR (mem_rsp_tag_b)
|
||||
assign mem_rsp_ready_b = 0;
|
||||
`UNUSED_VAR (mem_bus_cache_if.rsp_valid)
|
||||
`UNUSED_VAR (mem_bus_cache_if.rsp_data)
|
||||
assign mem_bus_cache_if.rsp_ready = 0;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign cache_perf = '0;
|
||||
|
@ -358,46 +174,6 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
|
||||
end else begin
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (CORE_TAG_X_WIDTH)
|
||||
) core_bus_wrap_if[NUM_REQS]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (MEM_TAG_X_WIDTH)
|
||||
) mem_bus_wrap_if();
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_bus_wrap_if[i].req_valid = core_req_valid_b[i];
|
||||
assign core_bus_wrap_if[i].req_data.rw = core_req_rw_b[i];
|
||||
assign core_bus_wrap_if[i].req_data.addr = core_req_addr_b[i];
|
||||
assign core_bus_wrap_if[i].req_data.byteen = core_req_byteen_b[i];
|
||||
assign core_bus_wrap_if[i].req_data.data = core_req_data_b[i];
|
||||
assign core_bus_wrap_if[i].req_data.tag = core_req_tag_b[i];
|
||||
assign core_req_ready_b[i] = core_bus_wrap_if[i].req_ready;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_valid_b[i] = core_bus_wrap_if[i].rsp_valid;
|
||||
assign core_rsp_data_b[i] = core_bus_wrap_if[i].rsp_data.data;
|
||||
assign core_rsp_tag_b[i] = core_bus_wrap_if[i].rsp_data.tag;
|
||||
assign core_bus_wrap_if[i].rsp_ready = core_rsp_ready_b[i];
|
||||
end
|
||||
|
||||
assign mem_req_valid_b = mem_bus_wrap_if.req_valid;
|
||||
assign mem_req_addr_b = mem_bus_wrap_if.req_data.addr;
|
||||
assign mem_req_rw_b = mem_bus_wrap_if.req_data.rw;
|
||||
assign mem_req_byteen_b = mem_bus_wrap_if.req_data.byteen;
|
||||
assign mem_req_data_b = mem_bus_wrap_if.req_data.data;
|
||||
assign mem_req_tag_b = mem_bus_wrap_if.req_data.tag;
|
||||
assign mem_bus_wrap_if.req_ready = mem_req_ready_b;
|
||||
|
||||
assign mem_bus_wrap_if.rsp_valid = mem_rsp_valid_b;
|
||||
assign mem_bus_wrap_if.rsp_data.data = mem_rsp_data_b;
|
||||
assign mem_bus_wrap_if.rsp_data.tag = mem_rsp_tag_b;
|
||||
assign mem_rsp_ready_b = mem_bus_wrap_if.rsp_ready;
|
||||
|
||||
`RESET_RELAY (cache_reset, reset);
|
||||
|
||||
VX_cache #(
|
||||
|
@ -413,25 +189,25 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (CORE_TAG_X_WIDTH),
|
||||
.CORE_OUT_REG (NC_BYPASS ? 1 : CORE_OUT_REG),
|
||||
.MEM_OUT_REG (NC_BYPASS ? 1 : MEM_OUT_REG)
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF)
|
||||
) cache (
|
||||
.clk (clk),
|
||||
.reset (cache_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (cache_perf),
|
||||
`endif
|
||||
|
||||
.core_bus_if (core_bus_wrap_if),
|
||||
.mem_bus_if (mem_bus_wrap_if)
|
||||
.core_bus_if (core_bus_cache_if),
|
||||
.mem_bus_if (mem_bus_cache_if)
|
||||
);
|
||||
|
||||
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_BANK
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
wire [`UP(UUID_WIDTH)-1:0] core_req_uuid;
|
||||
|
@ -451,20 +227,20 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (core_req_fire) begin
|
||||
if (core_bus_if[i].req_data.rw)
|
||||
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
|
||||
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid));
|
||||
end
|
||||
if (core_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
|
||||
end
|
||||
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid;
|
||||
wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid;
|
||||
|
||||
if ((UUID_WIDTH != 0) && (NC_BYPASS != 0)) begin
|
||||
if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin
|
||||
assign mem_req_uuid = mem_bus_if.req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign mem_rsp_uuid = mem_bus_if.rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
|
@ -478,17 +254,17 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_bus_if.req_data.rw)
|
||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,23 +13,23 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_int_unit #(
|
||||
parameter CORE_ID = 0,
|
||||
module VX_alu_int #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter BLOCK_IDX = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
||||
// Inputs
|
||||
VX_execute_if.slave execute_if,
|
||||
|
||||
// Outputs
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if,
|
||||
VX_branch_ctl_if.master branch_ctl_if
|
||||
);
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam LANE_BITS = `CLOG2(NUM_LANES);
|
||||
localparam LANE_WIDTH = `UP(LANE_BITS);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
|
@ -40,9 +40,9 @@ module VX_int_unit #(
|
|||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] add_result;
|
||||
wire [NUM_LANES-1:0][`XLEN:0] sub_result; // +1 bit for branch compare
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] shr_result;
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] shr_zic_result;
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] msc_result;
|
||||
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] add_result_w;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] sub_result_w;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] shr_result_w;
|
||||
|
@ -52,26 +52,24 @@ module VX_int_unit #(
|
|||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_result_r;
|
||||
|
||||
`ifdef XLEN_64
|
||||
wire is_alu_w = `INST_ALU_IS_W(execute_if.data.op_mod);
|
||||
wire is_alu_w = execute_if.data.op_args.alu.is_w;
|
||||
`else
|
||||
wire is_alu_w = 0;
|
||||
`endif
|
||||
|
||||
`UNUSED_VAR (execute_if.data.op_mod)
|
||||
|
||||
wire [`INST_ALU_BITS-1:0] alu_op = `INST_ALU_BITS'(execute_if.data.op_type);
|
||||
wire [`INST_BR_BITS-1:0] br_op = `INST_BR_BITS'(execute_if.data.op_type);
|
||||
wire is_br_op = `INST_ALU_IS_BR(execute_if.data.op_mod);
|
||||
wire is_br_op = (execute_if.data.op_args.alu.xtype == `ALU_TYPE_BRANCH);
|
||||
wire is_sub_op = `INST_ALU_IS_SUB(alu_op);
|
||||
wire is_signed = `INST_ALU_SIGNED(alu_op);
|
||||
wire is_signed = `INST_ALU_SIGNED(alu_op);
|
||||
wire [1:0] op_class = is_br_op ? `INST_BR_CLASS(alu_op) : `INST_ALU_CLASS(alu_op);
|
||||
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in1 = execute_if.data.rs1_data;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2 = execute_if.data.rs2_data;
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in1_PC = execute_if.data.use_PC ? {NUM_LANES{execute_if.data.PC}} : alu_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_imm = execute_if.data.use_imm ? {NUM_LANES{execute_if.data.imm}} : alu_in2;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_br = (execute_if.data.use_imm && ~is_br_op) ? {NUM_LANES{execute_if.data.imm}} : alu_in2;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in1_PC = execute_if.data.op_args.alu.use_PC ? {NUM_LANES{execute_if.data.PC, 1'd0}} : alu_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_imm = execute_if.data.op_args.alu.use_imm ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_args.alu.imm)}} : alu_in2;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_br = (execute_if.data.op_args.alu.use_imm && ~is_br_op) ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_args.alu.imm)}} : alu_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
||||
|
@ -85,9 +83,20 @@ module VX_int_unit #(
|
|||
assign sub_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] - alu_in2_imm[i][31:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN:0] shr_in1 = {is_signed && alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
assign shr_result[i] = `XLEN'($signed(shr_in1) >>> alu_in2_imm[i][SHIFT_IMM_BITS-1:0]);
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN:0] shr_in1 = {is_signed && alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
always @(*) begin
|
||||
case (alu_op[1:0])
|
||||
`ifdef EXT_ZICOND_ENABLE
|
||||
2'b10, 2'b11: begin // CZERO
|
||||
shr_zic_result[i] = alu_in1[i] & {`XLEN{alu_op[0] ^ (| alu_in2[i])}};
|
||||
end
|
||||
`endif
|
||||
default: begin // SRL, SRA, SRLI, SRAI
|
||||
shr_zic_result[i] = `XLEN'($signed(shr_in1) >>> alu_in2_imm[i][SHIFT_IMM_BITS-1:0]);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
wire [32:0] shr_in1_w = {is_signed && alu_in1[i][31], alu_in1[i][31:0]};
|
||||
wire [31:0] shr_res_w = 32'($signed(shr_in1_w) >>> alu_in2_imm[i][4:0]);
|
||||
assign shr_result_w[i] = `XLEN'($signed(shr_res_w));
|
||||
|
@ -102,48 +111,51 @@ module VX_int_unit #(
|
|||
2'b11: msc_result[i] = alu_in1[i] << alu_in2_imm[i][SHIFT_IMM_BITS-1:0]; // SLL
|
||||
endcase
|
||||
end
|
||||
assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0]));
|
||||
assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0])); // SLLW
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] slt_br_result = `XLEN'({is_br_op && ~(| sub_result[i][`XLEN-1:0]), sub_result[i][`XLEN]});
|
||||
wire [`XLEN-1:0] sub_slt_br_result = (is_sub_op && ~is_br_op) ? sub_result[i][`XLEN-1:0] : slt_br_result;
|
||||
always @(*) begin
|
||||
case ({is_alu_w, op_class})
|
||||
case ({is_alu_w, op_class})
|
||||
3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
|
||||
3'b001: alu_result[i] = sub_slt_br_result; // SUB, SLTU, SLTI, BR*
|
||||
3'b010: alu_result[i] = shr_result[i]; // SRL, SRA, SRLI, SRAI
|
||||
3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO*
|
||||
3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI
|
||||
3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW
|
||||
3'b101: alu_result[i] = sub_result_w[i]; // SUBW
|
||||
3'b110: alu_result[i] = shr_result_w[i]; // SRLW, SRAW, SRLIW, SRAIW
|
||||
3'b111: alu_result[i] = msc_result_w[i]; // SLLW
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// branch
|
||||
|
||||
wire [`XLEN-1:0] PC_r, imm_r;
|
||||
wire [`PC_BITS-1:0] PC_r;
|
||||
wire [`INST_BR_BITS-1:0] br_op_r;
|
||||
wire [`PC_BITS-1:0] cbr_dest, cbr_dest_r;
|
||||
wire [LANE_WIDTH-1:0] tid, tid_r;
|
||||
wire is_br_op_r;
|
||||
|
||||
assign cbr_dest = add_result[0][1 +: `PC_BITS];
|
||||
|
||||
if (LANE_BITS != 0) begin
|
||||
assign tid = execute_if.data.tid[0 +: LANE_BITS];
|
||||
end else begin
|
||||
assign tid = 0;
|
||||
end
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `NR_BITS + 1 + PID_WIDTH + 1 + 1 + (NUM_LANES * `XLEN) + `XLEN + `XLEN + 1 + `INST_BR_BITS + LANE_WIDTH)
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `NR_BITS + 1 + PID_WIDTH + 1 + 1 + (NUM_LANES * `XLEN) + `PC_BITS + `PC_BITS + 1 + `INST_BR_BITS + LANE_WIDTH)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (execute_if.valid),
|
||||
.ready_in (execute_if.ready),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, alu_result, execute_if.data.PC, execute_if.data.imm, is_br_op, br_op, tid}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, alu_result_r, PC_r, imm_r, is_br_op_r, br_op_r, tid_r}),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, alu_result, execute_if.data.PC, cbr_dest, is_br_op, br_op, tid}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, alu_result_r, PC_r, cbr_dest_r, is_br_op_r, br_op_r, tid_r}),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready)
|
||||
);
|
||||
|
@ -152,38 +164,38 @@ module VX_int_unit #(
|
|||
wire is_br_neg = `INST_BR_IS_NEG(br_op_r);
|
||||
wire is_br_less = `INST_BR_IS_LESS(br_op_r);
|
||||
wire is_br_static = `INST_BR_IS_STATIC(br_op_r);
|
||||
wire [`XLEN-1:0] br_result = alu_result_r[tid_r];
|
||||
|
||||
wire [`XLEN-1:0] br_result = alu_result_r[tid_r];
|
||||
wire is_less = br_result[0];
|
||||
wire is_equal = br_result[1];
|
||||
|
||||
wire br_enable = is_br_op_r && commit_if.valid && commit_if.ready && commit_if.data.eop;
|
||||
wire br_taken = ((is_br_less ? is_less : is_equal) ^ is_br_neg) | is_br_static;
|
||||
wire [`XLEN-1:0] br_dest = is_br_static ? br_result : (PC_r + imm_r);
|
||||
wire [`PC_BITS-1:0] br_dest = is_br_static ? br_result[1 +: `PC_BITS] : cbr_dest_r;
|
||||
wire [`NW_WIDTH-1:0] br_wid;
|
||||
`ASSIGN_BLOCKED_WID (br_wid, commit_if.data.wid, BLOCK_IDX, `NUM_ALU_BLOCKS)
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_WIDTH + 1 + `XLEN)
|
||||
.DATAW (1 + `NW_WIDTH + 1 + `PC_BITS)
|
||||
) branch_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({br_enable, br_wid, br_taken, br_dest}),
|
||||
.data_in ({br_enable, br_wid, br_taken, br_dest}),
|
||||
.data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign commit_if.data.data[i] = (is_br_op_r && is_br_static) ? (PC_r + 4) : alu_result_r[i];
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign commit_if.data.data[i] = (is_br_op_r && is_br_static) ? {(PC_r + `PC_BITS'(2)), 1'd0} : alu_result_r[i];
|
||||
end
|
||||
|
||||
assign commit_if.data.PC = PC_r;
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (branch_ctl_if.valid) begin
|
||||
`TRACE(1, ("%d: core%0d-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, CORE_ID, branch_ctl_if.wid, commit_if.data.PC, branch_ctl_if.taken, branch_ctl_if.dest, commit_if.data.uuid));
|
||||
if (br_enable) begin
|
||||
`TRACE(1, ("%d: %s-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,23 +13,23 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_muldiv_unit #(
|
||||
parameter CORE_ID = 0,
|
||||
module VX_alu_muldiv #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
||||
// Inputs
|
||||
VX_execute_if.slave execute_if,
|
||||
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam TAGW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + PID_WIDTH + 1 + 1;
|
||||
localparam TAG_WIDTH = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + PID_WIDTH + 1 + 1;
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
|
@ -38,7 +38,7 @@ module VX_muldiv_unit #(
|
|||
wire is_mulx_op = `INST_M_IS_MULX(muldiv_op);
|
||||
wire is_signed_op = `INST_M_SIGNED(muldiv_op);
|
||||
`ifdef XLEN_64
|
||||
wire is_alu_w = `INST_ALU_IS_W(execute_if.data.op_mod);
|
||||
wire is_alu_w = execute_if.data.op_args.alu.is_w;
|
||||
`else
|
||||
wire is_alu_w = 0;
|
||||
`endif
|
||||
|
@ -47,43 +47,43 @@ module VX_muldiv_unit #(
|
|||
wire [`UUID_WIDTH-1:0] mul_uuid_out;
|
||||
wire [`NW_WIDTH-1:0] mul_wid_out;
|
||||
wire [NUM_LANES-1:0] mul_tmask_out;
|
||||
wire [`XLEN-1:0] mul_PC_out;
|
||||
wire [`PC_BITS-1:0] mul_PC_out;
|
||||
wire [`NR_BITS-1:0] mul_rd_out;
|
||||
wire mul_wb_out;
|
||||
wire [PID_WIDTH-1:0] mul_pid_out;
|
||||
wire mul_sop_out, mul_eop_out;
|
||||
|
||||
|
||||
wire mul_valid_in = execute_if.valid && is_mulx_op;
|
||||
wire mul_ready_in;
|
||||
wire mul_valid_out;
|
||||
wire mul_ready_out;
|
||||
|
||||
|
||||
wire is_mulh_in = `INST_M_IS_MULH(muldiv_op);
|
||||
wire is_signed_mul_a = `INST_M_SIGNED_A(muldiv_op);
|
||||
wire is_signed_mul_b = is_signed_op;
|
||||
|
||||
`ifdef IMUL_DPI
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] mul_result_tmp;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] mul_result_tmp;
|
||||
|
||||
wire mul_fire_in = mul_valid_in && mul_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] mul_resultl, mul_resulth;
|
||||
wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i];
|
||||
wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i];
|
||||
always @(*) begin
|
||||
reg [`XLEN-1:0] mul_resultl, mul_resulth;
|
||||
wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i];
|
||||
wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i];
|
||||
always @(*) begin
|
||||
dpi_imul (mul_fire_in, is_signed_mul_a, is_signed_mul_b, mul_in1, mul_in2, mul_resultl, mul_resulth);
|
||||
end
|
||||
assign mul_result_tmp[i] = is_mulh_in ? mul_resulth : (is_alu_w ? `XLEN'($signed(mul_resultl[31:0])) : mul_resultl);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + (NUM_LANES * `XLEN)),
|
||||
.DATAW (1 + TAG_WIDTH + (NUM_LANES * `XLEN)),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, mul_result_tmp}),
|
||||
|
@ -92,8 +92,8 @@ module VX_muldiv_unit #(
|
|||
|
||||
assign mul_ready_in = mul_ready_out || ~mul_valid_out;
|
||||
|
||||
`else
|
||||
|
||||
`else
|
||||
|
||||
wire [NUM_LANES-1:0][2*(`XLEN+1)-1:0] mul_result_tmp;
|
||||
wire is_mulh_out;
|
||||
wire is_mul_w_out;
|
||||
|
@ -106,7 +106,7 @@ module VX_muldiv_unit #(
|
|||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign mul_in1[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]} : {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]};
|
||||
assign mul_in2[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]} : {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]};
|
||||
end
|
||||
end
|
||||
|
||||
wire mul_strode;
|
||||
wire mul_busy;
|
||||
|
@ -115,7 +115,7 @@ module VX_muldiv_unit #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mul_valid_in),
|
||||
.ready_in (mul_ready_in),
|
||||
.ready_in (mul_ready_in),
|
||||
.valid_out (mul_valid_out),
|
||||
.ready_out (mul_ready_out),
|
||||
.strobe (mul_strode),
|
||||
|
@ -128,31 +128,31 @@ module VX_muldiv_unit #(
|
|||
.SIGNED (1)
|
||||
) serial_mul (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (reset),
|
||||
|
||||
.strobe (mul_strode),
|
||||
.busy (mul_busy),
|
||||
|
||||
.busy (mul_busy),
|
||||
|
||||
.dataa (mul_in1),
|
||||
.datab (mul_in2),
|
||||
.result (mul_result_tmp)
|
||||
);
|
||||
|
||||
reg [TAGW+2-1:0] mul_tag_r;
|
||||
reg [TAG_WIDTH+2-1:0] mul_tag_r;
|
||||
always @(posedge clk) begin
|
||||
if (mul_valid_in && mul_ready_in) begin
|
||||
mul_tag_r <= {execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, is_mulh_in, is_alu_w, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop};
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign {mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out, is_mul_w_out, mul_pid_out, mul_sop_out, mul_eop_out} = mul_tag_r;
|
||||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN:0] mul_in1 = {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]};
|
||||
wire [`XLEN:0] mul_in2 = {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]};
|
||||
|
||||
wire [`XLEN:0] mul_in2 = {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]};
|
||||
|
||||
VX_multiplier #(
|
||||
.A_WIDTH (`XLEN+1),
|
||||
.B_WIDTH (`XLEN+1),
|
||||
|
@ -165,11 +165,11 @@ module VX_muldiv_unit #(
|
|||
.dataa (mul_in1),
|
||||
.datab (mul_in2),
|
||||
.result (mul_result_tmp[i])
|
||||
);
|
||||
);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1),
|
||||
.DATAW (1 + TAG_WIDTH + 1 + 1),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
|
@ -186,8 +186,8 @@ module VX_muldiv_unit #(
|
|||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`ifdef XLEN_64
|
||||
assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] :
|
||||
(is_mul_w_out ? `XLEN'($signed(mul_result_tmp[i][31:0])) :
|
||||
assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] :
|
||||
(is_mul_w_out ? `XLEN'($signed(mul_result_tmp[i][31:0])) :
|
||||
mul_result_tmp[i][`XLEN-1:0]);
|
||||
`else
|
||||
assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] : mul_result_tmp[i][`XLEN-1:0];
|
||||
|
@ -203,7 +203,7 @@ module VX_muldiv_unit #(
|
|||
wire [`UUID_WIDTH-1:0] div_uuid_out;
|
||||
wire [`NW_WIDTH-1:0] div_wid_out;
|
||||
wire [NUM_LANES-1:0] div_tmask_out;
|
||||
wire [`XLEN-1:0] div_PC_out;
|
||||
wire [`PC_BITS-1:0] div_PC_out;
|
||||
wire [`NR_BITS-1:0] div_rd_out;
|
||||
wire div_wb_out;
|
||||
wire [PID_WIDTH-1:0] div_pid_out;
|
||||
|
@ -211,7 +211,7 @@ module VX_muldiv_unit #(
|
|||
|
||||
wire is_rem_op = `INST_M_IS_REM(muldiv_op);
|
||||
|
||||
wire div_valid_in = execute_if.valid && ~is_mulx_op;
|
||||
wire div_valid_in = execute_if.valid && ~is_mulx_op;
|
||||
wire div_ready_in;
|
||||
wire div_valid_out;
|
||||
wire div_ready_out;
|
||||
|
@ -226,25 +226,25 @@ module VX_muldiv_unit #(
|
|||
`else
|
||||
assign div_in1[i] = execute_if.data.rs1_data[i];
|
||||
assign div_in2[i] = execute_if.data.rs2_data[i];
|
||||
`endif
|
||||
`endif
|
||||
end
|
||||
|
||||
`ifdef IDIV_DPI
|
||||
`ifdef IDIV_DPI
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_result_in;
|
||||
wire div_fire_in = div_valid_in && div_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] div_quotient, div_remainder;
|
||||
always @(*) begin
|
||||
reg [`XLEN-1:0] div_quotient, div_remainder;
|
||||
always @(*) begin
|
||||
dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder);
|
||||
end
|
||||
assign div_result_in[i] = is_rem_op ? (is_alu_w ? `XLEN'($signed(div_remainder[31:0])) : div_remainder) :
|
||||
assign div_result_in[i] = is_rem_op ? (is_alu_w ? `XLEN'($signed(div_remainder[31:0])) : div_remainder) :
|
||||
(is_alu_w ? `XLEN'($signed(div_quotient[31:0])) : div_quotient);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + (NUM_LANES * `XLEN)),
|
||||
.DATAW (1 + TAG_WIDTH + (NUM_LANES * `XLEN)),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) div_shift_reg (
|
||||
|
@ -260,7 +260,7 @@ module VX_muldiv_unit #(
|
|||
`else
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_quotient, div_remainder;
|
||||
wire is_rem_op_out;
|
||||
wire is_rem_op_out;
|
||||
wire is_div_w_out;
|
||||
wire div_strode;
|
||||
wire div_busy;
|
||||
|
@ -285,31 +285,31 @@ module VX_muldiv_unit #(
|
|||
) serial_div (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
|
||||
.strobe (div_strode),
|
||||
.busy (div_busy),
|
||||
|
||||
.is_signed (is_signed_op),
|
||||
.is_signed (is_signed_op),
|
||||
.numer (div_in1),
|
||||
.denom (div_in2),
|
||||
|
||||
.quotient (div_quotient),
|
||||
.remainder (div_remainder)
|
||||
.remainder (div_remainder)
|
||||
);
|
||||
|
||||
reg [TAGW+2-1:0] div_tag_r;
|
||||
reg [TAG_WIDTH+2-1:0] div_tag_r;
|
||||
always @(posedge clk) begin
|
||||
if (div_valid_in && div_ready_in) begin
|
||||
div_tag_r <= {execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, is_rem_op, is_alu_w, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop};
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign {div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out, is_div_w_out, div_pid_out, div_sop_out, div_eop_out} = div_tag_r;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`ifdef XLEN_64
|
||||
assign div_result_out[i] = is_rem_op_out ? (is_div_w_out ? `XLEN'($signed(div_remainder[i][31:0])) : div_remainder[i]) :
|
||||
(is_div_w_out ? `XLEN'($signed(div_quotient[i][31:0])) : div_quotient[i]);
|
||||
assign div_result_out[i] = is_rem_op_out ? (is_div_w_out ? `XLEN'($signed(div_remainder[i][31:0])) : div_remainder[i]) :
|
||||
(is_div_w_out ? `XLEN'($signed(div_quotient[i][31:0])) : div_quotient[i]);
|
||||
`else
|
||||
assign div_result_out[i] = is_rem_op_out ? div_remainder[i] : div_quotient[i];
|
||||
`UNUSED_VAR (is_div_w_out)
|
||||
|
@ -323,8 +323,9 @@ module VX_muldiv_unit #(
|
|||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (TAGW + (NUM_LANES * `XLEN)),
|
||||
.OUT_REG (1)
|
||||
.DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (1)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -337,5 +338,5 @@ module VX_muldiv_unit #(
|
|||
.ready_out (commit_if.ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
|
||||
endmodule
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,73 +14,71 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
||||
// Inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS]
|
||||
);
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = `NUM_ALU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_ALU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
|
||||
localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS);
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) execute_if[BLOCK_SIZE]();
|
||||
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
) per_block_execute_if[BLOCK_SIZE]();
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (PARTIAL_BW ? 1 : 0)
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 0)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
.reset (reset),
|
||||
.dispatch_if(dispatch_if),
|
||||
.execute_if (execute_if)
|
||||
.execute_if (per_block_execute_if)
|
||||
);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_block_if[BLOCK_SIZE]();
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
|
||||
wire is_muldiv_op;
|
||||
`RESET_RELAY_EN (block_reset, reset,(BLOCK_SIZE > 1));
|
||||
|
||||
wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV);
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_execute_if();
|
||||
|
||||
assign int_execute_if.valid = execute_if[block_idx].valid && ~is_muldiv_op;
|
||||
assign int_execute_if.data = execute_if[block_idx].data;
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_commit_if();
|
||||
|
||||
`RESET_RELAY (int_reset, reset);
|
||||
assign int_execute_if.valid = per_block_execute_if[block_idx].valid && ~is_muldiv_op;
|
||||
assign int_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
|
||||
VX_int_unit #(
|
||||
.CORE_ID (CORE_ID),
|
||||
VX_alu_int #(
|
||||
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
|
||||
.BLOCK_IDX (block_idx),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_unit (
|
||||
) alu_int (
|
||||
.clk (clk),
|
||||
.reset (int_reset),
|
||||
.reset (block_reset),
|
||||
.execute_if (int_execute_if),
|
||||
.branch_ctl_if (branch_ctl_if[block_idx]),
|
||||
.commit_if (int_commit_if)
|
||||
|
@ -88,84 +86,78 @@ module VX_alu_unit #(
|
|||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
assign is_muldiv_op = `INST_ALU_IS_M(execute_if[block_idx].data.op_mod);
|
||||
|
||||
`RESET_RELAY (mdv_reset, reset);
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_execute_if();
|
||||
|
||||
assign mdv_execute_if.valid = execute_if[block_idx].valid && is_muldiv_op;
|
||||
assign mdv_execute_if.data = execute_if[block_idx].data;
|
||||
) muldiv_execute_if();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_commit_if();
|
||||
) muldiv_commit_if();
|
||||
|
||||
VX_muldiv_unit #(
|
||||
.CORE_ID (CORE_ID),
|
||||
assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
|
||||
assign muldiv_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
|
||||
VX_alu_muldiv #(
|
||||
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_unit (
|
||||
) muldiv_unit (
|
||||
.clk (clk),
|
||||
.reset (mdv_reset),
|
||||
.execute_if (mdv_execute_if),
|
||||
.commit_if (mdv_commit_if)
|
||||
);
|
||||
|
||||
assign execute_if[block_idx].ready = is_muldiv_op ? mdv_execute_if.ready : int_execute_if.ready;
|
||||
|
||||
`else
|
||||
|
||||
assign is_muldiv_op = 0;
|
||||
assign execute_if[block_idx].ready = int_execute_if.ready;
|
||||
.reset (block_reset),
|
||||
.execute_if (muldiv_execute_if),
|
||||
.commit_if (muldiv_commit_if)
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
assign per_block_execute_if[block_idx].ready =
|
||||
`ifdef EXT_M_ENABLE
|
||||
is_muldiv_op ? muldiv_execute_if.ready :
|
||||
`endif
|
||||
int_execute_if.ready;
|
||||
|
||||
// send response
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.OUT_REG (PARTIAL_BW ? 1 : 3)
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 3),
|
||||
.ARBITER ("F")
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in ({
|
||||
.reset (block_reset),
|
||||
.valid_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.valid,
|
||||
muldiv_commit_if.valid,
|
||||
`endif
|
||||
int_commit_if.valid
|
||||
}),
|
||||
.ready_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.ready,
|
||||
muldiv_commit_if.ready,
|
||||
`endif
|
||||
int_commit_if.ready
|
||||
}),
|
||||
.data_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.data,
|
||||
muldiv_commit_if.data,
|
||||
`endif
|
||||
int_commit_if.data
|
||||
}),
|
||||
.data_out (commit_block_if[block_idx].data),
|
||||
.valid_out (commit_block_if[block_idx].valid),
|
||||
.ready_out (commit_block_if[block_idx].ready),
|
||||
.data_out (per_block_commit_if[block_idx].data),
|
||||
.valid_out (per_block_commit_if[block_idx].valid),
|
||||
.ready_out (per_block_commit_if[block_idx].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
end
|
||||
|
||||
`RESET_RELAY (commit_reset, reset);
|
||||
|
||||
VX_gather_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (PARTIAL_BW ? 3 : 0)
|
||||
.OUT_BUF (PARTIAL_BW ? 3 : 0)
|
||||
) gather_unit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.commit_in_if (commit_block_if),
|
||||
.reset (reset),
|
||||
.commit_in_if (per_block_commit_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,101 +13,82 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_commit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if.slave alu_commit_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.slave lsu_commit_if [`ISSUE_WIDTH],
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if.slave fpu_commit_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
VX_commit_if.slave sfu_commit_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.slave commit_if [`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
|
||||
// outputs
|
||||
VX_writeback_if.master writeback_if [`ISSUE_WIDTH],
|
||||
VX_commit_csr_if.master commit_csr_if,
|
||||
VX_commit_sched_if.master commit_sched_if,
|
||||
|
||||
// simulation helper signals
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value
|
||||
VX_commit_sched_if.master commit_sched_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1;
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1;
|
||||
localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1);
|
||||
localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1;
|
||||
|
||||
// commit arbitration
|
||||
|
||||
VX_commit_if commit_if[`ISSUE_WIDTH]();
|
||||
VX_commit_if commit_arb_if[`ISSUE_WIDTH]();
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] commit_fire;
|
||||
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] commit_wid;
|
||||
wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] commit_tmask;
|
||||
wire [`ISSUE_WIDTH-1:0] commit_eop;
|
||||
wire [`ISSUE_WIDTH-1:0] per_issue_commit_fire;
|
||||
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] per_issue_commit_wid;
|
||||
wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] per_issue_commit_tmask;
|
||||
wire [`ISSUE_WIDTH-1:0] per_issue_commit_eop;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
|
||||
wire [`NUM_EX_UNITS-1:0] valid_in;
|
||||
wire [`NUM_EX_UNITS-1:0][DATAW-1:0] data_in;
|
||||
wire [`NUM_EX_UNITS-1:0] ready_in;
|
||||
|
||||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
|
||||
assign valid_in[j] = commit_if[j * `ISSUE_WIDTH + i].valid;
|
||||
assign data_in[j] = commit_if[j * `ISSUE_WIDTH + i].data;
|
||||
assign commit_if[j * `ISSUE_WIDTH + i].ready = ready_in[j];
|
||||
end
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (`NUM_EX_UNITS),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_REG (1)
|
||||
.OUT_BUF (1)
|
||||
) commit_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.valid_in ({
|
||||
sfu_commit_if[i].valid,
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_if[i].valid,
|
||||
`endif
|
||||
alu_commit_if[i].valid,
|
||||
lsu_commit_if[i].valid
|
||||
}),
|
||||
.ready_in ({
|
||||
sfu_commit_if[i].ready,
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_if[i].ready,
|
||||
`endif
|
||||
alu_commit_if[i].ready,
|
||||
lsu_commit_if[i].ready
|
||||
}),
|
||||
.data_in ({
|
||||
sfu_commit_if[i].data,
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_if[i].data,
|
||||
`endif
|
||||
alu_commit_if[i].data,
|
||||
lsu_commit_if[i].data
|
||||
}),
|
||||
.data_out (commit_if[i].data),
|
||||
.valid_out (commit_if[i].valid),
|
||||
.ready_out (commit_if[i].ready),
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.data_in (data_in),
|
||||
.data_out (commit_arb_if[i].data),
|
||||
.valid_out (commit_arb_if[i].valid),
|
||||
.ready_out (commit_arb_if[i].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
assign commit_fire[i] = commit_if[i].valid && commit_if[i].ready;
|
||||
assign commit_tmask[i]= {`NUM_THREADS{commit_fire[i]}} & commit_if[i].data.tmask;
|
||||
assign commit_wid[i] = commit_if[i].data.wid;
|
||||
assign commit_eop[i] = commit_if[i].data.eop;
|
||||
assign per_issue_commit_fire[i] = commit_arb_if[i].valid && commit_arb_if[i].ready;
|
||||
assign per_issue_commit_tmask[i]= {`NUM_THREADS{per_issue_commit_fire[i]}} & commit_arb_if[i].data.tmask;
|
||||
assign per_issue_commit_wid[i] = commit_arb_if[i].data.wid;
|
||||
assign per_issue_commit_eop[i] = commit_arb_if[i].data.eop;
|
||||
end
|
||||
|
||||
// CSRs update
|
||||
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0][COMMIT_SIZEW-1:0] commit_size, commit_size_r;
|
||||
wire [COMMIT_ALL_SIZEW-1:0] commit_size_all_r, commit_size_all_rr;
|
||||
wire commit_fire_any, commit_fire_any_r, commit_fire_any_rr;
|
||||
|
||||
assign commit_fire_any = (| commit_fire);
|
||||
assign commit_fire_any = (| per_issue_commit_fire);
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
wire [COMMIT_SIZEW-1:0] count;
|
||||
`POP_COUNT(count, commit_tmask[i]);
|
||||
`POP_COUNT(count, per_issue_commit_tmask[i]);
|
||||
assign commit_size[i] = count;
|
||||
end
|
||||
|
||||
|
@ -155,69 +136,56 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
assign commit_csr_if.instret = instret;
|
||||
|
||||
// Committed instructions
|
||||
// Track committed instructions
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] committed = commit_fire & commit_eop;
|
||||
reg [`NUM_WARPS-1:0] committed_warps;
|
||||
|
||||
always @(*) begin
|
||||
committed_warps = 0;
|
||||
for (integer i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
if (per_issue_commit_fire[i] && per_issue_commit_eop[i]) begin
|
||||
committed_warps[per_issue_commit_wid[i]] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (`ISSUE_WIDTH * (1 + `NW_WIDTH)),
|
||||
.RESETW (`ISSUE_WIDTH)
|
||||
.DATAW (`NUM_WARPS),
|
||||
.RESETW (`NUM_WARPS)
|
||||
) committed_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({committed, commit_wid}),
|
||||
.data_out ({commit_sched_if.committed, commit_sched_if.committed_wid})
|
||||
.data_in (committed_warps),
|
||||
.data_out ({commit_sched_if.committed_warps})
|
||||
);
|
||||
|
||||
// Writeback
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign writeback_if[i].valid = commit_if[i].valid && commit_if[i].data.wb;
|
||||
assign writeback_if[i].data.uuid = commit_if[i].data.uuid;
|
||||
assign writeback_if[i].data.wis = wid_to_wis(commit_if[i].data.wid);
|
||||
assign writeback_if[i].data.PC = commit_if[i].data.PC;
|
||||
assign writeback_if[i].data.tmask= commit_if[i].data.tmask;
|
||||
assign writeback_if[i].data.rd = commit_if[i].data.rd;
|
||||
assign writeback_if[i].data.data = commit_if[i].data.data;
|
||||
assign writeback_if[i].data.sop = commit_if[i].data.sop;
|
||||
assign writeback_if[i].data.eop = commit_if[i].data.eop;
|
||||
assign commit_if[i].ready = 1'b1; // writeback has no backpressure
|
||||
assign writeback_if[i].valid = commit_arb_if[i].valid && commit_arb_if[i].data.wb;
|
||||
assign writeback_if[i].data.uuid = commit_arb_if[i].data.uuid;
|
||||
assign writeback_if[i].data.wis = wid_to_wis(commit_arb_if[i].data.wid);
|
||||
assign writeback_if[i].data.PC = commit_arb_if[i].data.PC;
|
||||
assign writeback_if[i].data.tmask= commit_arb_if[i].data.tmask;
|
||||
assign writeback_if[i].data.rd = commit_arb_if[i].data.rd;
|
||||
assign writeback_if[i].data.data = commit_arb_if[i].data.data;
|
||||
assign writeback_if[i].data.sop = commit_arb_if[i].data.sop;
|
||||
assign writeback_if[i].data.eop = commit_arb_if[i].data.eop;
|
||||
assign commit_arb_if[i].ready = 1'b1; // writeback has no backpressure
|
||||
end
|
||||
|
||||
// simulation helper signal to get RISC-V tests Pass/Fail status
|
||||
reg [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value_r;
|
||||
always @(posedge clk) begin
|
||||
if (writeback_if[0].valid) begin
|
||||
sim_wb_value_r[writeback_if[0].data.rd] <= writeback_if[0].data.data[0];
|
||||
end
|
||||
end
|
||||
assign sim_wb_value = sim_wb_value_r;
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (alu_commit_if[i].valid && alu_commit_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", $time, CORE_ID, alu_commit_if[i].data.wid, alu_commit_if[i].data.PC, alu_commit_if[i].data.tmask, alu_commit_if[i].data.wb, alu_commit_if[i].data.rd, alu_commit_if[i].data.sop, alu_commit_if[i].data.eop));
|
||||
`TRACE_ARRAY1D(1, alu_commit_if[i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", alu_commit_if[i].data.uuid));
|
||||
end
|
||||
if (lsu_commit_if[i].valid && lsu_commit_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", $time, CORE_ID, lsu_commit_if[i].data.wid, lsu_commit_if[i].data.PC, lsu_commit_if[i].data.tmask, lsu_commit_if[i].data.wb, lsu_commit_if[i].data.rd, lsu_commit_if[i].data.sop, lsu_commit_if[i].data.eop));
|
||||
`TRACE_ARRAY1D(1, lsu_commit_if[i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", lsu_commit_if[i].data.uuid));
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_commit_if[i].valid && fpu_commit_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", $time, CORE_ID, fpu_commit_if[i].data.wid, fpu_commit_if[i].data.PC, fpu_commit_if[i].data.tmask, fpu_commit_if[i].data.wb, fpu_commit_if[i].data.rd, fpu_commit_if[i].data.sop, fpu_commit_if[i].data.eop));
|
||||
`TRACE_ARRAY1D(1, fpu_commit_if[i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", fpu_commit_if[i].data.uuid));
|
||||
end
|
||||
`endif
|
||||
if (sfu_commit_if[i].valid && sfu_commit_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=SFU, tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", $time, CORE_ID, sfu_commit_if[i].data.wid, sfu_commit_if[i].data.PC, sfu_commit_if[i].data.tmask, sfu_commit_if[i].data.wb, sfu_commit_if[i].data.rd, sfu_commit_if[i].data.sop, sfu_commit_if[i].data.eop));
|
||||
`TRACE_ARRAY1D(1, sfu_commit_if[i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", sfu_commit_if[i].data.uuid));
|
||||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
|
||||
always @(posedge clk) begin
|
||||
if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin
|
||||
`TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}));
|
||||
trace_ex_type(1, j);
|
||||
`TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", commit_if[j * `ISSUE_WIDTH + i].data.uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,11 +17,12 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
`endif
|
||||
|
||||
module VX_core import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
module VX_core import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -40,10 +41,6 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
VX_gbar_bus_if.master gbar_bus_if,
|
||||
`endif
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak,
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
|
@ -55,30 +52,21 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
VX_commit_sched_if commit_sched_if();
|
||||
VX_commit_csr_if commit_csr_if();
|
||||
VX_branch_ctl_if branch_ctl_if[`NUM_ALU_BLOCKS]();
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
|
||||
VX_dispatch_if alu_dispatch_if[`ISSUE_WIDTH]();
|
||||
VX_commit_if alu_commit_if[`ISSUE_WIDTH]();
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
|
||||
VX_dispatch_if lsu_dispatch_if[`ISSUE_WIDTH]();
|
||||
VX_commit_if lsu_commit_if[`ISSUE_WIDTH]();
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_dispatch_if fpu_dispatch_if[`ISSUE_WIDTH]();
|
||||
VX_commit_if fpu_commit_if[`ISSUE_WIDTH]();
|
||||
`endif
|
||||
VX_dispatch_if sfu_dispatch_if[`ISSUE_WIDTH]();
|
||||
VX_commit_if sfu_commit_if[`ISSUE_WIDTH]();
|
||||
|
||||
VX_dispatch_if dispatch_if[`NUM_EX_UNITS * `ISSUE_WIDTH]();
|
||||
VX_commit_if commit_if[`NUM_EX_UNITS * `ISSUE_WIDTH]();
|
||||
VX_writeback_if writeback_if[`ISSUE_WIDTH]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_bus_tmp_if[DCACHE_NUM_REQS]();
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lsu_mem_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_tmp_if();
|
||||
VX_pipeline_perf_if pipeline_perf_if();
|
||||
VX_pipeline_perf_if pipeline_perf_if();
|
||||
|
||||
assign mem_perf_tmp_if.icache = mem_perf_if.icache;
|
||||
assign mem_perf_tmp_if.dcache = mem_perf_if.dcache;
|
||||
|
@ -107,19 +95,21 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
`SCOPE_IO_SWITCH (3)
|
||||
|
||||
VX_schedule #(
|
||||
.INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID)
|
||||
) schedule (
|
||||
.clk (clk),
|
||||
.reset (schedule_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_schedule_if (pipeline_perf_if.schedule),
|
||||
`endif
|
||||
.sched_perf (pipeline_perf_if.sched),
|
||||
`endif
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
.base_dcrs (base_dcrs),
|
||||
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
|
||||
.decode_sched_if(decode_sched_if),
|
||||
.commit_sched_if(commit_sched_if),
|
||||
|
||||
|
@ -127,13 +117,13 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (gbar_bus_if),
|
||||
`endif
|
||||
.sched_csr_if (sched_csr_if),
|
||||
.sched_csr_if (sched_csr_if),
|
||||
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
VX_fetch #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-fetch", INSTANCE_ID))
|
||||
) fetch (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
|
@ -144,7 +134,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_decode #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID))
|
||||
) decode (
|
||||
.clk (clk),
|
||||
.reset (decode_reset),
|
||||
|
@ -154,7 +144,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_issue #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-issue", INSTANCE_ID))
|
||||
) issue (
|
||||
`SCOPE_IO_BIND (1)
|
||||
|
||||
|
@ -162,110 +152,192 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
.reset (issue_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_issue_if (pipeline_perf_if.issue),
|
||||
.issue_perf (pipeline_perf_if.issue),
|
||||
`endif
|
||||
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
|
||||
.alu_dispatch_if(alu_dispatch_if),
|
||||
.lsu_dispatch_if(lsu_dispatch_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_dispatch_if(fpu_dispatch_if),
|
||||
`endif
|
||||
.sfu_dispatch_if(sfu_dispatch_if)
|
||||
.dispatch_if (dispatch_if)
|
||||
);
|
||||
|
||||
VX_execute #(
|
||||
.INSTANCE_ID ($sformatf("%s-execute", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID)
|
||||
) execute (
|
||||
`SCOPE_IO_BIND (2)
|
||||
|
||||
|
||||
.clk (clk),
|
||||
.reset (execute_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_tmp_if),
|
||||
.pipeline_perf_if(pipeline_perf_if),
|
||||
`endif
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_tmp_if),
|
||||
.pipeline_perf_if(pipeline_perf_if),
|
||||
`endif
|
||||
.lsu_mem_if (lsu_mem_if),
|
||||
|
||||
.dcache_bus_if (dcache_bus_tmp_if),
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_dispatch_if(fpu_dispatch_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
`endif
|
||||
.dispatch_if (dispatch_if),
|
||||
.commit_if (commit_if),
|
||||
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.sched_csr_if (sched_csr_if),
|
||||
|
||||
.alu_dispatch_if(alu_dispatch_if),
|
||||
.lsu_dispatch_if(lsu_dispatch_if),
|
||||
.sfu_dispatch_if(sfu_dispatch_if),
|
||||
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
.sfu_commit_if (sfu_commit_if),
|
||||
|
||||
.sim_ebreak (sim_ebreak)
|
||||
);
|
||||
.branch_ctl_if (branch_ctl_if)
|
||||
);
|
||||
|
||||
VX_commit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID))
|
||||
) commit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
`endif
|
||||
.sfu_commit_if (sfu_commit_if),
|
||||
|
||||
.writeback_if (writeback_if),
|
||||
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.commit_sched_if(commit_sched_if),
|
||||
.commit_if (commit_if),
|
||||
|
||||
.sim_wb_value (sim_wb_value)
|
||||
.writeback_if (writeback_if),
|
||||
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.commit_sched_if(commit_sched_if)
|
||||
);
|
||||
|
||||
`ifdef SM_ENABLE
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lsu_dcache_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
VX_smem_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) smem_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef LMEM_ENABLE
|
||||
|
||||
`RESET_RELAY (lmem_unit_reset, reset);
|
||||
|
||||
VX_lmem_unit #(
|
||||
.INSTANCE_ID (INSTANCE_ID)
|
||||
) lmem_unit (
|
||||
.clk (clk),
|
||||
.reset (lmem_unit_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (mem_perf_tmp_if.smem),
|
||||
.cache_perf (mem_perf_tmp_if.lmem),
|
||||
`endif
|
||||
.dcache_bus_in_if (dcache_bus_tmp_if),
|
||||
.dcache_bus_out_if (dcache_bus_if)
|
||||
.lsu_mem_in_if (lsu_mem_if),
|
||||
.lsu_mem_out_if (lsu_dcache_if)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i], dcache_bus_tmp_if[i]);
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
`ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]);
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (DCACHE_CHANNELS),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_coalesced_if();
|
||||
|
||||
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin
|
||||
|
||||
`RESET_RELAY (mem_coalescer_reset, reset);
|
||||
|
||||
VX_mem_coalescer #(
|
||||
.INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)),
|
||||
.NUM_REQS (`NUM_LSU_LANES),
|
||||
.DATA_IN_SIZE (LSU_WORD_SIZE),
|
||||
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),
|
||||
.ADDR_WIDTH (LSU_ADDR_WIDTH),
|
||||
.ATYPE_WIDTH (`ADDR_TYPE_WIDTH),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.QUEUE_SIZE (`LSUQ_OUT_SIZE)
|
||||
) mem_coalescer (
|
||||
.clk (clk),
|
||||
.reset (mem_coalescer_reset),
|
||||
|
||||
// Input request
|
||||
.in_req_valid (lsu_dcache_if[i].req_valid),
|
||||
.in_req_mask (lsu_dcache_if[i].req_data.mask),
|
||||
.in_req_rw (lsu_dcache_if[i].req_data.rw),
|
||||
.in_req_byteen (lsu_dcache_if[i].req_data.byteen),
|
||||
.in_req_addr (lsu_dcache_if[i].req_data.addr),
|
||||
.in_req_atype (lsu_dcache_if[i].req_data.atype),
|
||||
.in_req_data (lsu_dcache_if[i].req_data.data),
|
||||
.in_req_tag (lsu_dcache_if[i].req_data.tag),
|
||||
.in_req_ready (lsu_dcache_if[i].req_ready),
|
||||
|
||||
// Input response
|
||||
.in_rsp_valid (lsu_dcache_if[i].rsp_valid),
|
||||
.in_rsp_mask (lsu_dcache_if[i].rsp_data.mask),
|
||||
.in_rsp_data (lsu_dcache_if[i].rsp_data.data),
|
||||
.in_rsp_tag (lsu_dcache_if[i].rsp_data.tag),
|
||||
.in_rsp_ready (lsu_dcache_if[i].rsp_ready),
|
||||
|
||||
// Output request
|
||||
.out_req_valid (dcache_coalesced_if.req_valid),
|
||||
.out_req_mask (dcache_coalesced_if.req_data.mask),
|
||||
.out_req_rw (dcache_coalesced_if.req_data.rw),
|
||||
.out_req_byteen (dcache_coalesced_if.req_data.byteen),
|
||||
.out_req_addr (dcache_coalesced_if.req_data.addr),
|
||||
.out_req_atype (dcache_coalesced_if.req_data.atype),
|
||||
.out_req_data (dcache_coalesced_if.req_data.data),
|
||||
.out_req_tag (dcache_coalesced_if.req_data.tag),
|
||||
.out_req_ready (dcache_coalesced_if.req_ready),
|
||||
|
||||
// Output response
|
||||
.out_rsp_valid (dcache_coalesced_if.rsp_valid),
|
||||
.out_rsp_mask (dcache_coalesced_if.rsp_data.mask),
|
||||
.out_rsp_data (dcache_coalesced_if.rsp_data.data),
|
||||
.out_rsp_tag (dcache_coalesced_if.rsp_data.tag),
|
||||
.out_rsp_ready (dcache_coalesced_if.rsp_ready)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]);
|
||||
|
||||
end
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_bus_tmp_if[DCACHE_CHANNELS]();
|
||||
|
||||
`RESET_RELAY (lsu_adapter_reset, reset);
|
||||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (DCACHE_CHANNELS),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH),
|
||||
.ARBITER ("P"),
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (0)
|
||||
) lsu_adapter (
|
||||
.clk (clk),
|
||||
.reset (lsu_adapter_reset),
|
||||
.lsu_mem_if (dcache_coalesced_if),
|
||||
.mem_bus_if (dcache_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]);
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle;
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_wr_req_per_cycle;
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_rsp_per_cycle;
|
||||
wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle;
|
||||
wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_wr_req_per_cycle;
|
||||
wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_rsp_per_cycle;
|
||||
|
||||
wire [1:0] perf_icache_pending_read_cycle;
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)+1-1:0] perf_dcache_pending_read_cycle;
|
||||
wire [`CLOG2(LSU_NUM_REQS+1)+1-1:0] perf_dcache_pending_read_cycle;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_icache_pending_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_dcache_pending_reads;
|
||||
|
@ -277,14 +349,16 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
wire perf_icache_req_fire = icache_bus_if.req_valid && icache_bus_if.req_ready;
|
||||
wire perf_icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
|
||||
wire [DCACHE_NUM_REQS-1:0] perf_dcache_rd_req_fire, perf_dcache_rd_req_fire_r;
|
||||
wire [DCACHE_NUM_REQS-1:0] perf_dcache_wr_req_fire, perf_dcache_wr_req_fire_r;
|
||||
wire [DCACHE_NUM_REQS-1:0] perf_dcache_rsp_fire;
|
||||
wire [LSU_NUM_REQS-1:0] perf_dcache_rd_req_fire, perf_dcache_rd_req_fire_r;
|
||||
wire [LSU_NUM_REQS-1:0] perf_dcache_wr_req_fire, perf_dcache_wr_req_fire_r;
|
||||
wire [LSU_NUM_REQS-1:0] perf_dcache_rsp_fire;
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
assign perf_dcache_rd_req_fire[i] = dcache_bus_if[i].req_valid && dcache_bus_if[i].req_ready && ~dcache_bus_if[i].req_data.rw;
|
||||
assign perf_dcache_wr_req_fire[i] = dcache_bus_if[i].req_valid && dcache_bus_if[i].req_ready && dcache_bus_if[i].req_data.rw;
|
||||
assign perf_dcache_rsp_fire[i] = dcache_bus_if[i].rsp_valid && dcache_bus_if[i].rsp_ready;
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
|
||||
assign perf_dcache_rd_req_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].req_valid && lsu_mem_if[i].req_data.mask[j] && lsu_mem_if[i].req_ready && ~lsu_mem_if[i].req_data.rw;
|
||||
assign perf_dcache_wr_req_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].req_valid && lsu_mem_if[i].req_data.mask[j] && lsu_mem_if[i].req_ready && lsu_mem_if[i].req_data.rw;
|
||||
assign perf_dcache_rsp_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].rsp_valid && lsu_mem_if[i].rsp_data.mask[j] && lsu_mem_if[i].rsp_ready;
|
||||
end
|
||||
end
|
||||
|
||||
`BUFFER(perf_dcache_rd_req_fire_r, perf_dcache_rd_req_fire);
|
||||
|
@ -293,7 +367,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
`POP_COUNT(perf_dcache_rd_req_per_cycle, perf_dcache_rd_req_fire_r);
|
||||
`POP_COUNT(perf_dcache_wr_req_per_cycle, perf_dcache_wr_req_fire_r);
|
||||
`POP_COUNT(perf_dcache_rsp_per_cycle, perf_dcache_rsp_fire);
|
||||
|
||||
|
||||
assign perf_icache_pending_read_cycle = perf_icache_req_fire - perf_icache_rsp_fire;
|
||||
assign perf_dcache_pending_read_cycle = perf_dcache_rd_req_per_cycle - perf_dcache_rsp_per_cycle;
|
||||
|
||||
|
@ -306,7 +380,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
perf_dcache_pending_reads <= $signed(perf_dcache_pending_reads) + `PERF_CTR_BITS'($signed(perf_dcache_pending_read_cycle));
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_icache_lat;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_dcache_lat;
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,9 +17,9 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
`endif
|
||||
|
||||
module VX_core_top import VX_gpu_pkg::*; #(
|
||||
module VX_core_top import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
) (
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -32,13 +32,14 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr,
|
||||
output wire [DCACHE_NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] dcache_req_atype,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_req_tag,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] dcache_req_tag,
|
||||
input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready,
|
||||
|
||||
input wire [DCACHE_NUM_REQS-1:0] dcache_rsp_valid,
|
||||
input wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_rsp_data,
|
||||
input wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_rsp_tag,
|
||||
input wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] dcache_rsp_tag,
|
||||
output wire [DCACHE_NUM_REQS-1:0] dcache_rsp_ready,
|
||||
|
||||
output wire icache_req_valid,
|
||||
|
@ -57,34 +58,29 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
`ifdef GBAR_ENABLE
|
||||
output wire gbar_req_valid,
|
||||
output wire [`NB_WIDTH-1:0] gbar_req_id,
|
||||
output wire [`NC_WIDTH-1:0] gbar_req_size_m1,
|
||||
output wire [`NC_WIDTH-1:0] gbar_req_size_m1,
|
||||
output wire [`NC_WIDTH-1:0] gbar_req_core_id,
|
||||
input wire gbar_req_ready,
|
||||
input wire gbar_rsp_valid,
|
||||
input wire [`NB_WIDTH-1:0] gbar_rsp_id,
|
||||
`endif
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak,
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if gbar_bus_if();
|
||||
|
||||
assign gbar_req_valid = gbar_bus_if.req_valid;
|
||||
assign gbar_req_id = gbar_bus_if.req_id;
|
||||
assign gbar_req_size_m1 = gbar_bus_if.req_size_m1;
|
||||
assign gbar_req_size_m1 = gbar_bus_if.req_size_m1;
|
||||
assign gbar_req_core_id = gbar_bus_if.req_core_id;
|
||||
assign gbar_bus_if.req_ready = gbar_req_ready;
|
||||
assign gbar_bus_if.rsp_valid = gbar_rsp_valid;
|
||||
assign gbar_bus_if.rsp_id = gbar_rsp_id;
|
||||
`endif
|
||||
|
||||
VX_dcr_bus_if dcr_bus_if();
|
||||
VX_dcr_bus_if dcr_bus_if();
|
||||
|
||||
assign dcr_bus_if.write_valid = dcr_write_valid;
|
||||
assign dcr_bus_if.write_addr = dcr_write_addr;
|
||||
|
@ -92,7 +88,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_bus_if[DCACHE_NUM_REQS]();
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
|
@ -100,6 +96,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw;
|
||||
assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen;
|
||||
assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr;
|
||||
assign dcache_req_atype[i] = dcache_bus_if[i].req_data.atype;
|
||||
assign dcache_req_data[i] = dcache_bus_if[i].req_data.data;
|
||||
assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag;
|
||||
assign dcache_bus_if[i].req_ready = dcache_req_ready[i];
|
||||
|
@ -122,6 +119,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
assign icache_req_data = icache_bus_if.req_data.data;
|
||||
assign icache_req_tag = icache_bus_if.req_data.tag;
|
||||
assign icache_bus_if.req_ready = icache_req_ready;
|
||||
`UNUSED_VAR (icache_bus_if.req_data.atype)
|
||||
|
||||
assign icache_bus_if.rsp_valid = icache_rsp_valid;
|
||||
assign icache_bus_if.rsp_data.tag = icache_rsp_tag;
|
||||
|
@ -129,33 +127,34 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
assign icache_rsp_ready = icache_bus_if.rsp_ready;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_if();
|
||||
VX_mem_perf_if mem_perf_if();
|
||||
assign mem_perf_if.icache = '0;
|
||||
assign mem_perf_if.dcache = '0;
|
||||
assign mem_perf_if.l2cache = '0;
|
||||
assign mem_perf_if.l3cache = '0;
|
||||
assign mem_perf_if.smem = '0;
|
||||
assign mem_perf_if.lmem = '0;
|
||||
assign mem_perf_if.mem = '0;
|
||||
`endif
|
||||
|
||||
`ifdef SCOPE
|
||||
wire [0:0] scope_reset_w = 1'b0;
|
||||
wire [0:0] scope_bus_in_w = 1'b0;
|
||||
wire [0:0] scope_reset_w = 1'b0;
|
||||
wire [0:0] scope_bus_in_w = 1'b0;
|
||||
wire [0:0] scope_bus_out_w;
|
||||
`UNUSED_VAR (scope_bus_out_w)
|
||||
`endif
|
||||
|
||||
VX_core #(
|
||||
.INSTANCE_ID ($sformatf("core")),
|
||||
.CORE_ID (CORE_ID)
|
||||
) core (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
`endif
|
||||
|
||||
|
||||
.dcr_bus_if (dcr_bus_if),
|
||||
|
||||
.dcache_bus_if (dcache_bus_if),
|
||||
|
@ -166,8 +165,6 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
.gbar_bus_if (gbar_bus_if),
|
||||
`endif
|
||||
|
||||
.sim_ebreak (sim_ebreak),
|
||||
.sim_wb_value (sim_wb_value),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,12 +17,22 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
`define CSR_READ_64(addr, dst, src) \
|
||||
addr : dst = `XLEN'(src)
|
||||
`else
|
||||
`define CSR_READ_64(addr, dst, src) \
|
||||
addr : dst = src[31:0]; \
|
||||
addr+12'h80 : dst = 32'(src[$bits(src)-1:32])
|
||||
`endif
|
||||
|
||||
module VX_csr_data
|
||||
import VX_gpu_pkg::*;
|
||||
`ifdef EXT_F_ENABLE
|
||||
import VX_fpu_pkg::*;
|
||||
`endif
|
||||
#(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -38,7 +48,7 @@ import VX_fpu_pkg::*;
|
|||
VX_commit_csr_if.slave commit_csr_if,
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if [`NUM_FPU_BLOCKS],
|
||||
VX_fpu_csr_if.slave fpu_csr_if [`NUM_FPU_BLOCKS],
|
||||
`endif
|
||||
|
||||
input wire [`PERF_CTR_BITS-1:0] cycles,
|
||||
|
@ -49,14 +59,14 @@ import VX_fpu_pkg::*;
|
|||
input wire [`UUID_WIDTH-1:0] read_uuid,
|
||||
input wire [`NW_WIDTH-1:0] read_wid,
|
||||
input wire [`VX_CSR_ADDR_BITS-1:0] read_addr,
|
||||
output wire [31:0] read_data_ro,
|
||||
output wire [31:0] read_data_rw,
|
||||
output wire [`XLEN-1:0] read_data_ro,
|
||||
output wire [`XLEN-1:0] read_data_rw,
|
||||
|
||||
input wire write_enable,
|
||||
input wire write_enable,
|
||||
input wire [`UUID_WIDTH-1:0] write_uuid,
|
||||
input wire [`NW_WIDTH-1:0] write_wid,
|
||||
input wire [`VX_CSR_ADDR_BITS-1:0] write_addr,
|
||||
input wire [31:0] write_data
|
||||
input wire [`XLEN-1:0] write_data
|
||||
);
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
@ -65,16 +75,20 @@ import VX_fpu_pkg::*;
|
|||
|
||||
// CSRs Write /////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
reg [`XLEN-1:0] mscratch;
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FP_FLAGS_BITS-1:0] fcsr, fcsr_n;
|
||||
wire [`NUM_FPU_BLOCKS-1:0] fpu_write_enable;
|
||||
wire [`NUM_FPU_BLOCKS-1:0][`NW_WIDTH-1:0] fpu_write_wid;
|
||||
fflags_t [`NUM_FPU_BLOCKS-1:0] fpu_write_fflags;
|
||||
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
assign fpu_write_enable[i] = fpu_to_csr_if[i].write_enable;
|
||||
assign fpu_write_wid[i] = fpu_to_csr_if[i].write_wid;
|
||||
assign fpu_write_fflags[i] = fpu_to_csr_if[i].write_fflags;
|
||||
assign fpu_write_enable[i] = fpu_csr_if[i].write_enable;
|
||||
assign fpu_write_wid[i] = fpu_csr_if[i].write_wid;
|
||||
assign fpu_write_fflags[i] = fpu_csr_if[i].write_fflags;
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
fcsr_n = fcsr;
|
||||
for (integer i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
|
@ -82,7 +96,7 @@ import VX_fpu_pkg::*;
|
|||
fcsr_n[fpu_write_wid[i]][`FP_FLAGS_BITS-1:0] = fcsr[fpu_write_wid[i]][`FP_FLAGS_BITS-1:0]
|
||||
| fpu_write_fflags[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`VX_CSR_FFLAGS: fcsr_n[write_wid][`FP_FLAGS_BITS-1:0] = write_data[`FP_FLAGS_BITS-1:0];
|
||||
|
@ -92,9 +106,9 @@ import VX_fpu_pkg::*;
|
|||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
assign fpu_to_csr_if[i].read_frm = fcsr[fpu_to_csr_if[i].read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS];
|
||||
assign fpu_csr_if[i].read_frm = fcsr[fpu_csr_if[i].read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS];
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
@ -107,6 +121,9 @@ import VX_fpu_pkg::*;
|
|||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
mscratch <= base_dcrs.startup_arg;
|
||||
end
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -123,9 +140,14 @@ import VX_fpu_pkg::*;
|
|||
`VX_CSR_MTVEC,
|
||||
`VX_CSR_MEPC,
|
||||
`VX_CSR_PMPCFG0,
|
||||
`VX_CSR_PMPADDR0: /* do nothing!*/;
|
||||
`VX_CSR_PMPADDR0: begin
|
||||
// do nothing!
|
||||
end
|
||||
`VX_CSR_MSCRATCH: begin
|
||||
mscratch <= write_data;
|
||||
end
|
||||
default: begin
|
||||
`ASSERT(0, ("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid));
|
||||
`ASSERT(0, ("%t: *** %s invalid CSR write address: %0h (#%0d)", $time, INSTANCE_ID, write_addr, write_uuid));
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
@ -133,38 +155,42 @@ import VX_fpu_pkg::*;
|
|||
|
||||
// CSRs read //////////////////////////////////////////////////////////////
|
||||
|
||||
reg [31:0] read_data_ro_r;
|
||||
reg [31:0] read_data_rw_r;
|
||||
reg [`XLEN-1:0] read_data_ro_r;
|
||||
reg [`XLEN-1:0] read_data_rw_r;
|
||||
reg read_addr_valid_r;
|
||||
|
||||
always @(*) begin
|
||||
read_data_ro_r = '0;
|
||||
read_data_rw_r = '0;
|
||||
read_addr_valid_r = 1;
|
||||
case (read_addr)
|
||||
`VX_CSR_MVENDORID : read_data_ro_r = 32'(`VENDOR_ID);
|
||||
`VX_CSR_MARCHID : read_data_ro_r = 32'(`ARCHITECTURE_ID);
|
||||
`VX_CSR_MIMPID : read_data_ro_r = 32'(`IMPLEMENTATION_ID);
|
||||
`VX_CSR_MISA : read_data_ro_r = (((`CLOG2(`XLEN)-4) << (`XLEN-2)) | `MISA_STD);
|
||||
case (read_addr)
|
||||
`VX_CSR_MVENDORID : read_data_ro_r = `XLEN'(`VENDOR_ID);
|
||||
`VX_CSR_MARCHID : read_data_ro_r = `XLEN'(`ARCHITECTURE_ID);
|
||||
`VX_CSR_MIMPID : read_data_ro_r = `XLEN'(`IMPLEMENTATION_ID);
|
||||
`VX_CSR_MISA : read_data_ro_r = `XLEN'({2'(`CLOG2(`XLEN/16)), 30'(`MISA_STD)});
|
||||
`ifdef EXT_F_ENABLE
|
||||
`VX_CSR_FFLAGS : read_data_rw_r = 32'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]);
|
||||
`VX_CSR_FRM : read_data_rw_r = 32'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]);
|
||||
`VX_CSR_FCSR : read_data_rw_r = 32'(fcsr[read_wid]);
|
||||
`VX_CSR_FFLAGS : read_data_rw_r = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]);
|
||||
`VX_CSR_FRM : read_data_rw_r = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]);
|
||||
`VX_CSR_FCSR : read_data_rw_r = `XLEN'(fcsr[read_wid]);
|
||||
`endif
|
||||
`VX_CSR_WARP_ID : read_data_ro_r = 32'(read_wid);
|
||||
`VX_CSR_CORE_ID : read_data_ro_r = 32'(CORE_ID);
|
||||
`VX_CSR_THREAD_MASK: read_data_ro_r = 32'(thread_masks[read_wid]);
|
||||
`VX_CSR_WARP_MASK : read_data_ro_r = 32'(active_warps);
|
||||
`VX_CSR_NUM_THREADS: read_data_ro_r = 32'(`NUM_THREADS);
|
||||
`VX_CSR_NUM_WARPS : read_data_ro_r = 32'(`NUM_WARPS);
|
||||
`VX_CSR_NUM_CORES : read_data_ro_r = 32'(`NUM_CORES * `NUM_CLUSTERS);
|
||||
`VX_CSR_MCYCLE : read_data_ro_r = 32'(cycles[31:0]);
|
||||
`VX_CSR_MCYCLE_H : read_data_ro_r = 32'(cycles[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MSCRATCH : read_data_rw_r = mscratch;
|
||||
|
||||
`VX_CSR_WARP_ID : read_data_ro_r = `XLEN'(read_wid);
|
||||
`VX_CSR_CORE_ID : read_data_ro_r = `XLEN'(CORE_ID);
|
||||
`VX_CSR_ACTIVE_THREADS: read_data_ro_r = `XLEN'(thread_masks[read_wid]);
|
||||
`VX_CSR_ACTIVE_WARPS: read_data_ro_r = `XLEN'(active_warps);
|
||||
`VX_CSR_NUM_THREADS: read_data_ro_r = `XLEN'(`NUM_THREADS);
|
||||
`VX_CSR_NUM_WARPS : read_data_ro_r = `XLEN'(`NUM_WARPS);
|
||||
`VX_CSR_NUM_CORES : read_data_ro_r = `XLEN'(`NUM_CORES * `NUM_CLUSTERS);
|
||||
`VX_CSR_LOCAL_MEM_BASE: read_data_ro_r = `XLEN'(`LMEM_BASE_ADDR);
|
||||
|
||||
`CSR_READ_64(`VX_CSR_MCYCLE, read_data_ro_r, cycles);
|
||||
|
||||
`VX_CSR_MPM_RESERVED : read_data_ro_r = 'x;
|
||||
`VX_CSR_MPM_RESERVED_H : read_data_ro_r = 'x;
|
||||
`VX_CSR_MINSTRET : read_data_ro_r = 32'(commit_csr_if.instret[31:0]);
|
||||
`VX_CSR_MINSTRET_H : read_data_ro_r = 32'(commit_csr_if.instret[`PERF_CTR_BITS-1:32]);
|
||||
|
||||
`VX_CSR_MPM_RESERVED_H : read_data_ro_r = 'x;
|
||||
|
||||
`CSR_READ_64(`VX_CSR_MINSTRET, read_data_ro_r, commit_csr_if.instret);
|
||||
|
||||
`VX_CSR_SATP,
|
||||
`VX_CSR_MSTATUS,
|
||||
`VX_CSR_MNSTATUS,
|
||||
|
@ -174,7 +200,7 @@ import VX_fpu_pkg::*;
|
|||
`VX_CSR_MTVEC,
|
||||
`VX_CSR_MEPC,
|
||||
`VX_CSR_PMPCFG0,
|
||||
`VX_CSR_PMPADDR0 : read_data_ro_r = 32'(0);
|
||||
`VX_CSR_PMPADDR0 : read_data_ro_r = `XLEN'(0);
|
||||
|
||||
default: begin
|
||||
read_addr_valid_r = 0;
|
||||
|
@ -186,107 +212,65 @@ import VX_fpu_pkg::*;
|
|||
`VX_DCR_MPM_CLASS_CORE: begin
|
||||
case (read_addr)
|
||||
// PERF: pipeline
|
||||
`VX_CSR_MPM_SCHED_ID : read_data_ro_r = pipeline_perf_if.sched_idles[31:0];
|
||||
`VX_CSR_MPM_SCHED_ID_H : read_data_ro_r = 32'(pipeline_perf_if.sched_idles[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SCHED_ST : read_data_ro_r = pipeline_perf_if.sched_stalls[31:0];
|
||||
`VX_CSR_MPM_SCHED_ST_H : read_data_ro_r = 32'(pipeline_perf_if.sched_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_IBUF_ST : read_data_ro_r = pipeline_perf_if.ibf_stalls[31:0];
|
||||
`VX_CSR_MPM_IBUF_ST_H : read_data_ro_r = 32'(pipeline_perf_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SCRB_ST : read_data_ro_r = pipeline_perf_if.scb_stalls[31:0];
|
||||
`VX_CSR_MPM_SCRB_ST_H : read_data_ro_r = 32'(pipeline_perf_if.scb_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SCRB_ALU : read_data_ro_r = pipeline_perf_if.units_uses[`EX_ALU][31:0];
|
||||
`VX_CSR_MPM_SCRB_ALU_H : read_data_ro_r = 32'(pipeline_perf_if.units_uses[`EX_ALU][`PERF_CTR_BITS-1:32]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched.idles);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched.stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.issue.ibf_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.issue.scb_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_r, pipeline_perf_if.issue.opd_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_ALU]);
|
||||
`ifdef EXT_F_ENABLE
|
||||
`VX_CSR_MPM_SCRB_FPU : read_data_ro_r = pipeline_perf_if.units_uses[`EX_FPU][31:0];
|
||||
`VX_CSR_MPM_SCRB_FPU_H : read_data_ro_r = 32'(pipeline_perf_if.units_uses[`EX_FPU][`PERF_CTR_BITS-1:32]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_FPU]);
|
||||
`else
|
||||
`VX_CSR_MPM_SCRB_FPU : read_data_ro_r = '0;
|
||||
`VX_CSR_MPM_SCRB_FPU_H : read_data_ro_r = '0;
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, `PERF_CTR_BITS'(0));
|
||||
`endif
|
||||
`VX_CSR_MPM_SCRB_LSU : read_data_ro_r = pipeline_perf_if.units_uses[`EX_LSU][31:0];
|
||||
`VX_CSR_MPM_SCRB_LSU_H : read_data_ro_r = 32'(pipeline_perf_if.units_uses[`EX_LSU][`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SCRB_SFU : read_data_ro_r = pipeline_perf_if.units_uses[`EX_SFU][31:0];
|
||||
`VX_CSR_MPM_SCRB_SFU_H : read_data_ro_r = 32'(pipeline_perf_if.units_uses[`EX_SFU][`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SCRB_CSRS : read_data_ro_r = pipeline_perf_if.sfu_uses[`SFU_CSRS][31:0];
|
||||
`VX_CSR_MPM_SCRB_CSRS_H : read_data_ro_r = 32'(pipeline_perf_if.sfu_uses[`SFU_CSRS][`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SCRB_WCTL : read_data_ro_r = pipeline_perf_if.sfu_uses[`SFU_WCTL][31:0];
|
||||
`VX_CSR_MPM_SCRB_WCTL_H : read_data_ro_r = 32'(pipeline_perf_if.sfu_uses[`SFU_WCTL][`PERF_CTR_BITS-1:32]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_LSU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_SFU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]);
|
||||
// PERF: memory
|
||||
`VX_CSR_MPM_IFETCHES : read_data_ro_r = pipeline_perf_if.ifetches[31:0];
|
||||
`VX_CSR_MPM_IFETCHES_H : read_data_ro_r = 32'(pipeline_perf_if.ifetches[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_LOADS : read_data_ro_r = pipeline_perf_if.loads[31:0];
|
||||
`VX_CSR_MPM_LOADS_H : read_data_ro_r = 32'(pipeline_perf_if.loads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_STORES : read_data_ro_r = pipeline_perf_if.stores[31:0];
|
||||
`VX_CSR_MPM_STORES_H : read_data_ro_r = 32'(pipeline_perf_if.stores[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_IFETCH_LT : read_data_ro_r = pipeline_perf_if.ifetch_latency[31:0];
|
||||
`VX_CSR_MPM_IFETCH_LT_H : read_data_ro_r = 32'(pipeline_perf_if.ifetch_latency[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_LOAD_LT : read_data_ro_r = pipeline_perf_if.load_latency[31:0];
|
||||
`VX_CSR_MPM_LOAD_LT_H : read_data_ro_r = 32'(pipeline_perf_if.load_latency[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_r, pipeline_perf_if.ifetches);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_r, pipeline_perf_if.loads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_STORES, read_data_ro_r, pipeline_perf_if.stores);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IFETCH_LT, read_data_ro_r, pipeline_perf_if.ifetch_latency);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LOAD_LT, read_data_ro_r, pipeline_perf_if.load_latency);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`VX_DCR_MPM_CLASS_MEM: begin
|
||||
case (read_addr)
|
||||
// PERF: icache
|
||||
`VX_CSR_MPM_ICACHE_READS : read_data_ro_r = mem_perf_if.icache.reads[31:0];
|
||||
`VX_CSR_MPM_ICACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.icache.reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_ICACHE_MISS_R : read_data_ro_r = mem_perf_if.icache.read_misses[31:0];
|
||||
`VX_CSR_MPM_ICACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.icache.read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_ICACHE_MSHR_ST : read_data_ro_r = mem_perf_if.icache.mshr_stalls[31:0];
|
||||
`VX_CSR_MPM_ICACHE_MSHR_ST_H: read_data_ro_r = 32'(mem_perf_if.icache.mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_READS, read_data_ro_r, mem_perf_if.icache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_MISS_R, read_data_ro_r, mem_perf_if.icache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_ICACHE_MSHR_ST, read_data_ro_r, mem_perf_if.icache.mshr_stalls);
|
||||
// PERF: dcache
|
||||
`VX_CSR_MPM_DCACHE_READS : read_data_ro_r = mem_perf_if.dcache.reads[31:0];
|
||||
`VX_CSR_MPM_DCACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.dcache.reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_WRITES : read_data_ro_r = mem_perf_if.dcache.writes[31:0];
|
||||
`VX_CSR_MPM_DCACHE_WRITES_H : read_data_ro_r = 32'(mem_perf_if.dcache.writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_MISS_R : read_data_ro_r = mem_perf_if.dcache.read_misses[31:0];
|
||||
`VX_CSR_MPM_DCACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.dcache.read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_MISS_W : read_data_ro_r = mem_perf_if.dcache.write_misses[31:0];
|
||||
`VX_CSR_MPM_DCACHE_MISS_W_H : read_data_ro_r = 32'(mem_perf_if.dcache.write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_BANK_ST : read_data_ro_r = mem_perf_if.dcache.bank_stalls[31:0];
|
||||
`VX_CSR_MPM_DCACHE_BANK_ST_H: read_data_ro_r = 32'(mem_perf_if.dcache.bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_MSHR_ST : read_data_ro_r = mem_perf_if.dcache.mshr_stalls[31:0];
|
||||
`VX_CSR_MPM_DCACHE_MSHR_ST_H: read_data_ro_r = 32'(mem_perf_if.dcache.mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: smem
|
||||
`VX_CSR_MPM_SMEM_READS : read_data_ro_r = mem_perf_if.smem.reads[31:0];
|
||||
`VX_CSR_MPM_SMEM_READS_H : read_data_ro_r = 32'(mem_perf_if.smem.reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SMEM_WRITES : read_data_ro_r = mem_perf_if.smem.writes[31:0];
|
||||
`VX_CSR_MPM_SMEM_WRITES_H : read_data_ro_r = 32'(mem_perf_if.smem.writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SMEM_BANK_ST : read_data_ro_r = mem_perf_if.smem.bank_stalls[31:0];
|
||||
`VX_CSR_MPM_SMEM_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.smem.bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: l2cache
|
||||
`VX_CSR_MPM_L2CACHE_READS : read_data_ro_r = mem_perf_if.l2cache.reads[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.l2cache.reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_WRITES : read_data_ro_r = mem_perf_if.l2cache.writes[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_WRITES_H: read_data_ro_r = 32'(mem_perf_if.l2cache.writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_MISS_R : read_data_ro_r = mem_perf_if.l2cache.read_misses[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_MISS_R_H: read_data_ro_r = 32'(mem_perf_if.l2cache.read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_MISS_W : read_data_ro_r = mem_perf_if.l2cache.write_misses[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_MISS_W_H: read_data_ro_r = 32'(mem_perf_if.l2cache.write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_BANK_ST : read_data_ro_r = mem_perf_if.l2cache.bank_stalls[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_BANK_ST_H: read_data_ro_r = 32'(mem_perf_if.l2cache.bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_MSHR_ST : read_data_ro_r = mem_perf_if.l2cache.mshr_stalls[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_MSHR_ST_H: read_data_ro_r = 32'(mem_perf_if.l2cache.mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_READS, read_data_ro_r, mem_perf_if.dcache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_WRITES, read_data_ro_r, mem_perf_if.dcache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_R, read_data_ro_r, mem_perf_if.dcache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_W, read_data_ro_r, mem_perf_if.dcache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_BANK_ST, read_data_ro_r, mem_perf_if.dcache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MSHR_ST, read_data_ro_r, mem_perf_if.dcache.mshr_stalls);
|
||||
// PERF: lmem
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_READS, read_data_ro_r, mem_perf_if.lmem.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_WRITES, read_data_ro_r, mem_perf_if.lmem.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_BANK_ST, read_data_ro_r, mem_perf_if.lmem.bank_stalls);
|
||||
// PERF: l2cache
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_READS, read_data_ro_r, mem_perf_if.l2cache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_WRITES, read_data_ro_r, mem_perf_if.l2cache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_R, read_data_ro_r, mem_perf_if.l2cache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_W, read_data_ro_r, mem_perf_if.l2cache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_BANK_ST, read_data_ro_r, mem_perf_if.l2cache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l2cache.mshr_stalls);
|
||||
// PERF: l3cache
|
||||
`VX_CSR_MPM_L3CACHE_READS : read_data_ro_r = mem_perf_if.l3cache.reads[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.l3cache.reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_WRITES : read_data_ro_r = mem_perf_if.l3cache.writes[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_WRITES_H: read_data_ro_r = 32'(mem_perf_if.l3cache.writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_MISS_R : read_data_ro_r = mem_perf_if.l3cache.read_misses[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_MISS_R_H: read_data_ro_r = 32'(mem_perf_if.l3cache.read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_MISS_W : read_data_ro_r = mem_perf_if.l3cache.write_misses[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_MISS_W_H: read_data_ro_r = 32'(mem_perf_if.l3cache.write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_BANK_ST : read_data_ro_r = mem_perf_if.l3cache.bank_stalls[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_BANK_ST_H: read_data_ro_r = 32'(mem_perf_if.l3cache.bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_MSHR_ST : read_data_ro_r = mem_perf_if.l3cache.mshr_stalls[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_MSHR_ST_H: read_data_ro_r = 32'(mem_perf_if.l3cache.mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_READS, read_data_ro_r, mem_perf_if.l3cache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_WRITES, read_data_ro_r, mem_perf_if.l3cache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_R, read_data_ro_r, mem_perf_if.l3cache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_W, read_data_ro_r, mem_perf_if.l3cache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_BANK_ST, read_data_ro_r, mem_perf_if.l3cache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l3cache.mshr_stalls);
|
||||
// PERF: memory
|
||||
`VX_CSR_MPM_MEM_READS : read_data_ro_r = mem_perf_if.mem.reads[31:0];
|
||||
`VX_CSR_MPM_MEM_READS_H : read_data_ro_r = 32'(mem_perf_if.mem.reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_MEM_WRITES : read_data_ro_r = mem_perf_if.mem.writes[31:0];
|
||||
`VX_CSR_MPM_MEM_WRITES_H : read_data_ro_r = 32'(mem_perf_if.mem.writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_MEM_LT : read_data_ro_r = mem_perf_if.mem.latency[31:0];
|
||||
`VX_CSR_MPM_MEM_LT_H : read_data_ro_r = 32'(mem_perf_if.mem.latency[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_READS, read_data_ro_r, mem_perf_if.mem.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_r, mem_perf_if.mem.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_r, mem_perf_if.mem.latency);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
@ -307,7 +291,7 @@ import VX_fpu_pkg::*;
|
|||
|
||||
`ifdef PERF_ENABLE
|
||||
`UNUSED_VAR (mem_perf_if.icache);
|
||||
`UNUSED_VAR (mem_perf_if.smem);
|
||||
`UNUSED_VAR (mem_perf_if.lmem);
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,6 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_unit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
|
@ -26,9 +27,9 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
VX_mem_perf_if.slave mem_perf_if,
|
||||
VX_pipeline_perf_if.slave pipeline_perf_if,
|
||||
`endif
|
||||
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if [`NUM_FPU_BLOCKS],
|
||||
VX_fpu_csr_if.slave fpu_csr_if [`NUM_FPU_BLOCKS],
|
||||
`endif
|
||||
|
||||
VX_commit_csr_if.slave commit_csr_if,
|
||||
|
@ -36,41 +37,44 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
VX_execute_if.slave execute_if,
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * 32 + PID_WIDTH + 1 + 1;
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
reg [NUM_LANES-1:0][31:0] csr_read_data;
|
||||
reg [31:0] csr_write_data;
|
||||
wire [31:0] csr_read_data_ro, csr_read_data_rw;
|
||||
wire [31:0] csr_req_data;
|
||||
reg csr_rd_enable;
|
||||
wire csr_wr_enable;
|
||||
wire csr_req_ready;
|
||||
|
||||
// wait for all pending instructions to complete
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] csr_read_data;
|
||||
reg [`XLEN-1:0] csr_write_data;
|
||||
wire [`XLEN-1:0] csr_read_data_ro, csr_read_data_rw;
|
||||
wire [`XLEN-1:0] csr_req_data;
|
||||
reg csr_rd_enable;
|
||||
wire csr_wr_enable;
|
||||
wire csr_req_ready;
|
||||
|
||||
wire [`VX_CSR_ADDR_BITS-1:0] csr_addr = execute_if.data.op_args.csr.addr;
|
||||
wire [`NRI_BITS-1:0] csr_imm = execute_if.data.op_args.csr.imm;
|
||||
|
||||
wire is_fpu_csr = (csr_addr <= `VX_CSR_FCSR);
|
||||
|
||||
// wait for all pending instructions for current warp to complete
|
||||
assign sched_csr_if.alm_empty_wid = execute_if.data.wid;
|
||||
wire no_pending_instr = sched_csr_if.alm_empty;
|
||||
|
||||
wire no_pending_instr = sched_csr_if.alm_empty || ~is_fpu_csr;
|
||||
|
||||
wire csr_req_valid = execute_if.valid && no_pending_instr;
|
||||
assign execute_if.ready = csr_req_ready && no_pending_instr;
|
||||
|
||||
wire [`VX_CSR_ADDR_BITS-1:0] csr_addr = execute_if.data.imm[`VX_CSR_ADDR_BITS-1:0];
|
||||
wire [`NRI_BITS-1:0] csr_imm = execute_if.data.imm[`VX_CSR_ADDR_BITS +: `NRI_BITS];
|
||||
|
||||
wire [NUM_LANES-1:0][31:0] rs1_data;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] rs1_data;
|
||||
`UNUSED_VAR (rs1_data)
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign rs1_data[i] = execute_if.data.rs1_data[i][31:0];
|
||||
assign rs1_data[i] = execute_if.data.rs1_data[i];
|
||||
end
|
||||
|
||||
wire csr_write_enable = (execute_if.data.op_type == `INST_SFU_CSRRW);
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.CORE_ID (CORE_ID)
|
||||
) csr_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -86,14 +90,14 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
.cycles (sched_csr_if.cycles),
|
||||
.active_warps (sched_csr_if.active_warps),
|
||||
.thread_masks (sched_csr_if.thread_masks),
|
||||
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
`endif
|
||||
.fpu_csr_if (fpu_csr_if),
|
||||
`endif
|
||||
|
||||
.read_enable (csr_req_valid && csr_rd_enable),
|
||||
.read_uuid (execute_if.data.uuid),
|
||||
.read_wid (execute_if.data.wid),
|
||||
.read_wid (execute_if.data.wid),
|
||||
.read_addr (csr_addr),
|
||||
.read_data_ro (csr_read_data_ro),
|
||||
.read_data_rw (csr_read_data_rw),
|
||||
|
@ -107,16 +111,16 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
// CSR read
|
||||
|
||||
wire [NUM_LANES-1:0][31:0] wtid, gtid;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] wtid, gtid;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
if (PID_BITS != 0) begin
|
||||
assign wtid[i] = 32'(execute_if.data.pid * NUM_LANES + i);
|
||||
assign wtid[i] = `XLEN'(execute_if.data.pid * NUM_LANES + i);
|
||||
end else begin
|
||||
assign wtid[i] = 32'(i);
|
||||
assign wtid[i] = `XLEN'(i);
|
||||
end
|
||||
assign gtid[i] = (32'(CORE_ID) << (`NW_BITS + `NT_BITS)) + (32'(execute_if.data.wid) << `NT_BITS) + wtid[i];
|
||||
end
|
||||
assign gtid[i] = (`XLEN'(CORE_ID) << (`NW_BITS + `NT_BITS)) + (`XLEN'(execute_if.data.wid) << `NT_BITS) + wtid[i];
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
csr_rd_enable = 0;
|
||||
|
@ -132,8 +136,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
// CSR write
|
||||
|
||||
assign csr_req_data = execute_if.data.use_imm ? 32'(csr_imm) : rs1_data[0];
|
||||
|
||||
assign csr_req_data = execute_if.data.op_args.csr.use_imm ? `XLEN'(csr_imm) : rs1_data[0];
|
||||
assign csr_wr_enable = (csr_write_enable || (| csr_req_data));
|
||||
|
||||
always @(*) begin
|
||||
|
@ -152,12 +155,9 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
// unlock the warp
|
||||
assign sched_csr_if.unlock_warp = csr_req_valid && csr_req_ready && execute_if.data.eop;
|
||||
assign sched_csr_if.unlock_warp = csr_req_valid && csr_req_ready && execute_if.data.eop && is_fpu_csr;
|
||||
assign sched_csr_if.unlock_wid = execute_if.data.wid;
|
||||
|
||||
// send response
|
||||
wire [NUM_LANES-1:0][31:0] csr_commit_data;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2)
|
||||
|
@ -166,14 +166,10 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
.reset (reset),
|
||||
.valid_in (csr_req_valid),
|
||||
.ready_in (csr_req_ready),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, csr_read_data, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, csr_commit_data, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, csr_read_data, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.data, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign commit_if.data.data[i] = `XLEN'(csr_commit_data[i]);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -12,9 +12,8 @@
|
|||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_dcr_data import VX_gpu_pkg::*; (
|
||||
module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -35,6 +34,10 @@ module VX_dcr_data import VX_gpu_pkg::*; (
|
|||
`VX_DCR_BASE_STARTUP_ADDR0 : dcrs.startup_addr[31:0] <= dcr_bus_if.write_data;
|
||||
`ifdef XLEN_64
|
||||
`VX_DCR_BASE_STARTUP_ADDR1 : dcrs.startup_addr[63:32] <= dcr_bus_if.write_data;
|
||||
`endif
|
||||
`VX_DCR_BASE_STARTUP_ARG0 : dcrs.startup_arg[31:0] <= dcr_bus_if.write_data;
|
||||
`ifdef XLEN_64
|
||||
`VX_DCR_BASE_STARTUP_ARG1 : dcrs.startup_arg[63:32] <= dcr_bus_if.write_data;
|
||||
`endif
|
||||
`VX_DCR_BASE_MPM_CLASS : dcrs.mpm_class <= dcr_bus_if.write_data[7:0];
|
||||
default:;
|
||||
|
@ -44,12 +47,12 @@ module VX_dcr_data import VX_gpu_pkg::*; (
|
|||
|
||||
assign base_dcrs = dcrs;
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (dcr_bus_if.write_valid) begin
|
||||
`TRACE(1, ("%d: base-dcr: state=", $time));
|
||||
trace_base_dcr(1, dcr_bus_if.write_addr);
|
||||
`TRACE(1, (", data=0x%0h\n", dcr_bus_if.write_data));
|
||||
`TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -12,7 +12,6 @@
|
|||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define USED_IREG(x) \
|
||||
|
@ -28,8 +27,8 @@
|
|||
use_``x = 1
|
||||
`endif
|
||||
|
||||
module VX_decode #(
|
||||
parameter CORE_ID = 0
|
||||
module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -37,27 +36,26 @@ module VX_decode #(
|
|||
// inputs
|
||||
VX_fetch_if.slave fetch_if,
|
||||
|
||||
// outputs
|
||||
// outputs
|
||||
VX_decode_if.master decode_if,
|
||||
VX_decode_sched_if.master decode_sched_if
|
||||
);
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + (`NR_BITS * 4) + `XLEN + 1 + 1;
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + (`NR_BITS * 4);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`INST_OP_BITS-1:0] op_type;
|
||||
reg [`INST_MOD_BITS-1:0] op_mod;
|
||||
|
||||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`INST_OP_BITS-1:0] op_type;
|
||||
op_args_t op_args;
|
||||
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [`XLEN-1:0] imm;
|
||||
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
|
||||
reg use_rd, use_rs1, use_rs2, use_rs3;
|
||||
reg is_wstall;
|
||||
|
||||
wire [31:0] instr = fetch_if.data.instr;
|
||||
wire [6:0] opcode = instr[6:0];
|
||||
wire [6:0] opcode = instr[6:0];
|
||||
wire [1:0] func2 = instr[26:25];
|
||||
wire [2:0] func3 = instr[14:12];
|
||||
wire [4:0] func5 = instr[31:27];
|
||||
|
@ -78,6 +76,7 @@ module VX_decode #(
|
|||
`UNUSED_VAR (use_rs3)
|
||||
|
||||
wire is_itype_sh = func3[0] && ~func3[1];
|
||||
wire is_fpu_csr = (u_12 <= `VX_CSR_FCSR);
|
||||
|
||||
wire [19:0] ui_imm = instr[31:12];
|
||||
`ifdef XLEN_64
|
||||
|
@ -85,7 +84,7 @@ module VX_decode #(
|
|||
wire [11:0] iw_imm = is_itype_sh ? {7'b0, instr[24:20]} : u_12;
|
||||
`else
|
||||
wire [11:0] i_imm = is_itype_sh ? {7'b0, instr[24:20]} : u_12;
|
||||
`endif
|
||||
`endif
|
||||
wire [11:0] s_imm = {func7, rd};
|
||||
wire [12:0] b_imm = {instr[31], instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
|
@ -121,9 +120,9 @@ module VX_decode #(
|
|||
always @(*) begin
|
||||
case (u_12)
|
||||
12'h000: s_type = `INST_OP_BITS'(`INST_BR_ECALL);
|
||||
12'h001: s_type = `INST_OP_BITS'(`INST_BR_EBREAK);
|
||||
12'h002: s_type = `INST_OP_BITS'(`INST_BR_URET);
|
||||
12'h102: s_type = `INST_OP_BITS'(`INST_BR_SRET);
|
||||
12'h001: s_type = `INST_OP_BITS'(`INST_BR_EBREAK);
|
||||
12'h002: s_type = `INST_OP_BITS'(`INST_BR_URET);
|
||||
12'h102: s_type = `INST_OP_BITS'(`INST_BR_SRET);
|
||||
12'h302: s_type = `INST_OP_BITS'(`INST_BR_MRET);
|
||||
default: s_type = 'x;
|
||||
endcase
|
||||
|
@ -145,171 +144,212 @@ module VX_decode #(
|
|||
end
|
||||
`endif
|
||||
|
||||
`STATIC_ASSERT($bits(alu_args_t) == $bits(op_args_t), ("alu_args_t size mismatch: current=%0d, expected=%0d", $bits(alu_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(fpu_args_t) == $bits(op_args_t), ("fpu_args_t size mismatch: current=%0d, expected=%0d", $bits(fpu_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(lsu_args_t) == $bits(op_args_t), ("lsu_args_t size mismatch: current=%0d, expected=%0d", $bits(lsu_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(csr_args_t) == $bits(op_args_t), ("csr_args_t size mismatch: current=%0d, expected=%0d", $bits(csr_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(wctl_args_t) == $bits(op_args_t), ("wctl_args_t size mismatch: current=%0d, expected=%0d", $bits(wctl_args_t), $bits(op_args_t)));
|
||||
|
||||
always @(*) begin
|
||||
|
||||
ex_type = '0;
|
||||
op_type = 'x;
|
||||
op_mod = '0;
|
||||
op_args = 'x;
|
||||
rd_r = '0;
|
||||
rs1_r = '0;
|
||||
rs2_r = '0;
|
||||
rs3_r = '0;
|
||||
imm = 'x;
|
||||
use_imm = 0;
|
||||
use_PC = 0;
|
||||
use_rd = 0;
|
||||
use_rs1 = 0;
|
||||
use_rs2 = 0;
|
||||
use_rs3 = 0;
|
||||
is_wstall = 0;
|
||||
|
||||
case (opcode)
|
||||
case (opcode)
|
||||
`INST_I: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{(`XLEN-12){i_imm[11]}}, i_imm};
|
||||
op_args.alu.xtype = `ALU_TYPE_ARITH;
|
||||
op_args.alu.is_w = 0;
|
||||
op_args.alu.use_PC = 0;
|
||||
op_args.alu.use_imm = 1;
|
||||
op_args.alu.imm = `SEXT(`IMM_BITS, i_imm);
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_R: begin
|
||||
`INST_R: begin
|
||||
ex_type = `EX_ALU;
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (func7[0]) begin
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_mod[1] = 1;
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
end
|
||||
op_args.alu.is_w = 0;
|
||||
op_args.alu.use_PC = 0;
|
||||
op_args.alu.use_imm = 0;
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
case (func7)
|
||||
`ifdef EXT_M_ENABLE
|
||||
`INST_R_F7_MUL: begin
|
||||
// MUL, MULH, MULHSU, MULHU
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_args.alu.xtype = `ALU_TYPE_MULDIV;
|
||||
end
|
||||
`endif
|
||||
`ifdef EXT_ZICOND_ENABLE
|
||||
`INST_R_F7_ZICOND: begin
|
||||
// CZERO-EQZ, CZERO-NEZ
|
||||
op_type = func3[1] ? `INST_OP_BITS'(`INST_ALU_CZNE) : `INST_OP_BITS'(`INST_ALU_CZEQ);
|
||||
op_args.alu.xtype = `ALU_TYPE_ARITH;
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
op_args.alu.xtype = `ALU_TYPE_ARITH;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
`ifdef XLEN_64
|
||||
`INST_I_W: begin
|
||||
// ADDIW, SLLIW, SRLIW, SRAIW
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
op_mod[2] = 1;
|
||||
op_args.alu.xtype = `ALU_TYPE_ARITH;
|
||||
op_args.alu.is_w = 1;
|
||||
op_args.alu.use_PC = 0;
|
||||
op_args.alu.use_imm = 1;
|
||||
op_args.alu.imm = `SEXT(`IMM_BITS, iw_imm);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{(`XLEN-12){iw_imm[11]}}, iw_imm};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_R_W: begin
|
||||
ex_type = `EX_ALU;
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (func7[0]) begin
|
||||
// MULW, DIVW, DIVUW, REMW, REMUW
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_mod[1] = 1;
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
// ADDW, SUBW, SLLW, SRLW, SRAW
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
end
|
||||
op_mod[2] = 1;
|
||||
op_args.alu.is_w = 1;
|
||||
op_args.alu.use_PC = 0;
|
||||
op_args.alu.use_imm = 0;
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
case (func7)
|
||||
`ifdef EXT_M_ENABLE
|
||||
`INST_R_F7_MUL: begin
|
||||
// MULW, DIVW, DIVUW, REMW, REMUW
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_args.alu.xtype = `ALU_TYPE_MULDIV;
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
// ADDW, SUBW, SLLW, SRLW, SRAW
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
op_args.alu.xtype = `ALU_TYPE_ARITH;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
`endif
|
||||
`INST_LUI: begin
|
||||
`INST_LUI: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_LUI);
|
||||
op_args.alu.xtype = `ALU_TYPE_ARITH;
|
||||
op_args.alu.is_w = 0;
|
||||
op_args.alu.use_PC = 0;
|
||||
op_args.alu.use_imm = 1;
|
||||
op_args.alu.imm = {{`IMM_BITS-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{`XLEN-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_AUIPC: begin
|
||||
`INST_AUIPC: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_AUIPC);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
imm = {{`XLEN-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
op_args.alu.xtype = `ALU_TYPE_ARITH;
|
||||
op_args.alu.is_w = 0;
|
||||
op_args.alu.use_PC = 1;
|
||||
op_args.alu.use_imm = 1;
|
||||
op_args.alu.imm = {{`IMM_BITS-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JAL: begin
|
||||
`INST_JAL: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JAL);
|
||||
op_mod[0] = 1;
|
||||
op_args.alu.xtype = `ALU_TYPE_BRANCH;
|
||||
op_args.alu.is_w = 0;
|
||||
op_args.alu.use_PC = 1;
|
||||
op_args.alu.use_imm = 1;
|
||||
op_args.alu.imm = `SEXT(`IMM_BITS, jal_imm);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{(`XLEN-21){jal_imm[20]}}, jal_imm};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JALR: begin
|
||||
`INST_JALR: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JALR);
|
||||
op_mod[0] = 1;
|
||||
op_args.alu.xtype = `ALU_TYPE_BRANCH;
|
||||
op_args.alu.is_w = 0;
|
||||
op_args.alu.use_PC = 0;
|
||||
op_args.alu.use_imm = 1;
|
||||
op_args.alu.imm = `SEXT(`IMM_BITS, u_12);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{(`XLEN-12){u_12[11]}}, u_12};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_B: begin
|
||||
`INST_B: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(b_type);
|
||||
op_mod[0] = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
op_args.alu.xtype = `ALU_TYPE_BRANCH;
|
||||
op_args.alu.is_w = 0;
|
||||
op_args.alu.use_PC = 1;
|
||||
op_args.alu.use_imm = 1;
|
||||
op_args.alu.imm = `SEXT(`IMM_BITS, b_imm);
|
||||
is_wstall = 1;
|
||||
imm = {{(`XLEN-13){b_imm[12]}}, b_imm};
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`INST_FENCE: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_LSU_FENCE;
|
||||
op_args.lsu.is_store = 0;
|
||||
op_args.lsu.is_float = 0;
|
||||
op_args.lsu.offset = 0;
|
||||
end
|
||||
`INST_SYS : begin
|
||||
if (func3[1:0] != 0) begin
|
||||
`INST_SYS : begin
|
||||
if (func3[1:0] != 0) begin
|
||||
ex_type = `EX_SFU;
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_CSR(func3[1:0]));
|
||||
op_args.csr.addr = u_12;
|
||||
op_args.csr.use_imm = func3[2];
|
||||
use_rd = 1;
|
||||
is_wstall = 1;
|
||||
use_imm = func3[2];
|
||||
imm[`VX_CSR_ADDR_BITS-1:0] = u_12; // addr
|
||||
is_wstall = is_fpu_csr; // only stall for FPU CSRs
|
||||
`USED_IREG (rd);
|
||||
if (func3[2]) begin
|
||||
imm[`VX_CSR_ADDR_BITS +: `NRI_BITS] = rs1; // imm
|
||||
op_args.csr.imm = rs1;
|
||||
end else begin
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(s_type);
|
||||
op_mod[0] = 1;
|
||||
op_args.alu.xtype = `ALU_TYPE_BRANCH;
|
||||
op_args.alu.is_w = 0;
|
||||
op_args.alu.use_imm = 1;
|
||||
op_args.alu.use_PC = 1;
|
||||
op_args.alu.imm = `IMM_BITS'd4;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = `XLEN'd4;
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FL,
|
||||
`INST_FL,
|
||||
`endif
|
||||
`INST_L: begin
|
||||
`INST_L: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b0, func3});
|
||||
op_args.lsu.is_store = 0;
|
||||
op_args.lsu.is_float = opcode[2];
|
||||
op_args.lsu.offset = u_12;
|
||||
use_rd = 1;
|
||||
imm = {{(`XLEN-12){u_12[11]}}, u_12};
|
||||
use_imm = 1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
`USED_FREG (rd);
|
||||
|
@ -319,13 +359,14 @@ module VX_decode #(
|
|||
`USED_IREG (rs1);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FS,
|
||||
`INST_FS,
|
||||
`endif
|
||||
`INST_S: begin
|
||||
`INST_S: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b1, func3});
|
||||
imm = {{(`XLEN-12){s_imm[11]}}, s_imm};
|
||||
use_imm = 1;
|
||||
op_args.lsu.is_store = 1;
|
||||
op_args.lsu.is_float = opcode[2];
|
||||
op_args.lsu.offset = s_imm;
|
||||
`USED_IREG (rs1);
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
|
@ -338,24 +379,23 @@ module VX_decode #(
|
|||
`INST_FMADD,
|
||||
`INST_FMSUB,
|
||||
`INST_FNMSUB,
|
||||
`INST_FNMADD: begin
|
||||
`INST_FNMADD: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_type = `INST_OP_BITS'({2'b11, opcode[3:2]});
|
||||
op_mod = `INST_MOD_BITS'(func3);
|
||||
imm[0] = func2[0]; // destination is double?
|
||||
op_args.fpu.frm = func3;
|
||||
op_args.fpu.fmt[0] = func2[0]; // float / double
|
||||
use_rd = 1;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
`USED_FREG (rs3);
|
||||
end
|
||||
`INST_FCI: begin
|
||||
`INST_FCI: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_mod = `INST_MOD_BITS'(func3);
|
||||
`ifdef FLEN_64
|
||||
imm[0] = func2[0]; // destination is double?
|
||||
`endif
|
||||
use_rd = 1;
|
||||
op_args.fpu.frm = func3;
|
||||
op_args.fpu.fmt[0] = func2[0]; // float / double
|
||||
op_args.fpu.fmt[1] = rs2[1]; // int32 / int64
|
||||
use_rd = 1;
|
||||
case (func5)
|
||||
5'b00000, // FADD
|
||||
5'b00001, // FSUB
|
||||
|
@ -369,7 +409,7 @@ module VX_decode #(
|
|||
5'b00100: begin
|
||||
// NCP: FSGNJ=0, FSGNJN=1, FSGNJX=2
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = `INST_MOD_BITS'(func3[1:0]);
|
||||
op_args.fpu.frm = `INST_FRM_BITS'(func3[1:0]);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
|
@ -377,67 +417,61 @@ module VX_decode #(
|
|||
5'b00101: begin
|
||||
// NCP: FMIN=6, FMAX=7
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = func3[0] ? 7 : 6;
|
||||
op_args.fpu.frm = `INST_FRM_BITS'(func3[0] ? 7 : 6);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
end
|
||||
`ifdef FLEN_64
|
||||
5'b01000: begin
|
||||
// CVT.S.D, CVT.D.S
|
||||
5'b01000: begin
|
||||
// FCVT.S.D, FCVT.D.S
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_F2F);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
`endif
|
||||
5'b01011: begin
|
||||
// SQRT
|
||||
5'b01011: begin
|
||||
// FSQRT
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
end
|
||||
5'b10100: begin
|
||||
// CMP
|
||||
// FCMP
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_CMP);
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
end
|
||||
5'b11000: begin
|
||||
// CVT.W.X, CVT.WU.X
|
||||
// FCVT.W.X, FCVT.WU.X
|
||||
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_F2U) : `INST_OP_BITS'(`INST_FPU_F2I);
|
||||
`ifdef XLEN_64
|
||||
imm[1] = rs2[1]; // is 64-bit integer
|
||||
`endif
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b11010: begin
|
||||
// CVT.X.W, CVT.X.WU
|
||||
// FCVT.X.W, FCVT.X.WU
|
||||
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_U2F) : `INST_OP_BITS'(`INST_FPU_I2F);
|
||||
`ifdef XLEN_64
|
||||
imm[1] = rs2[1]; // is 64-bit integer
|
||||
`endif
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
5'b11100: begin
|
||||
5'b11100: begin
|
||||
if (func3[0]) begin
|
||||
// NCP: FCLASS=3
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 3;
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_args.fpu.frm = `INST_FRM_BITS'(3);
|
||||
end else begin
|
||||
// NCP: FMV.X.W=4
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 4;
|
||||
op_args.fpu.frm = `INST_FRM_BITS'(4);
|
||||
end
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b11110: begin
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b11110: begin
|
||||
// NCP: FMV.W.X=5
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 5;
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_args.fpu.frm = `INST_FRM_BITS'(5);
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
|
@ -445,7 +479,7 @@ module VX_decode #(
|
|||
endcase
|
||||
end
|
||||
`endif
|
||||
`INST_EXT1: begin
|
||||
`INST_EXT1: begin
|
||||
case (func7)
|
||||
7'h00: begin
|
||||
ex_type = `EX_SFU;
|
||||
|
@ -463,8 +497,9 @@ module VX_decode #(
|
|||
3'h2: begin // SPLIT
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_SPLIT);
|
||||
use_rd = 1;
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rd);
|
||||
op_args.wctl.is_neg = rs2[0];
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
3'h3: begin // JOIN
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_JOIN);
|
||||
|
@ -477,6 +512,7 @@ module VX_decode #(
|
|||
end
|
||||
3'h5: begin // PRED
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_PRED);
|
||||
op_args.wctl.is_neg = rd[0];
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
|
@ -486,25 +522,6 @@ module VX_decode #(
|
|||
default:;
|
||||
endcase
|
||||
end
|
||||
`INST_EXT2: begin
|
||||
case (func3)
|
||||
3'h1: begin
|
||||
case (func2)
|
||||
2'h0: begin // CMOV
|
||||
ex_type = `EX_SFU;
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_CMOV);
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
`USED_IREG (rs3);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
@ -520,8 +537,8 @@ module VX_decode #(
|
|||
.reset (reset),
|
||||
.valid_in (fetch_if.valid),
|
||||
.ready_in (fetch_if.ready),
|
||||
.data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_mod, use_PC, imm, use_imm, wb, rd_r, rs1_r, rs2_r, rs3_r}),
|
||||
.data_out ({decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_mod, decode_if.data.use_PC, decode_if.data.imm, decode_if.data.use_imm, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3}),
|
||||
.data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_args, wb, rd_r, rs1_r, rs2_r, rs3_r}),
|
||||
.data_out ({decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3}),
|
||||
.valid_out (decode_if.valid),
|
||||
.ready_out (decode_if.ready)
|
||||
);
|
||||
|
@ -533,18 +550,21 @@ module VX_decode #(
|
|||
assign decode_sched_if.valid = fetch_fire;
|
||||
assign decode_sched_if.wid = fetch_if.data.wid;
|
||||
assign decode_sched_if.is_wstall = is_wstall;
|
||||
|
||||
`ifndef L1_ENABLE
|
||||
assign fetch_if.ibuf_pop = decode_if.ibuf_pop;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
`TRACE(1, ("%d: core%0d-decode: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, CORE_ID, decode_if.data.wid, decode_if.data.PC, instr));
|
||||
`TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr));
|
||||
trace_ex_type(1, decode_if.data.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_mod, decode_if.data.rd, decode_if.data.rs2, decode_if.data.use_imm, decode_if.data.imm);
|
||||
`TRACE(1, (", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=0x%0h, opds=%b%b%b%b, use_pc=%b, use_imm=%b (#%0d)\n",
|
||||
decode_if.data.op_mod, decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, decode_if.data.imm, use_rd, use_rs1, use_rs2, use_rs3, decode_if.data.use_PC, decode_if.data.use_imm, decode_if.data.uuid));
|
||||
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, opds=%b%b%b%b",
|
||||
decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3));
|
||||
trace_op_args(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", decode_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -23,213 +23,85 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
output wire [`PERF_CTR_BITS-1:0] perf_stalls [`NUM_EX_UNITS],
|
||||
`endif
|
||||
// inputs
|
||||
VX_operands_if.slave operands_if [`ISSUE_WIDTH],
|
||||
VX_operands_if.slave operands_if,
|
||||
|
||||
// outputs
|
||||
VX_dispatch_if.master alu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_dispatch_if.master lsu_dispatch_if [`ISSUE_WIDTH],
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_dispatch_if.master fpu_dispatch_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
VX_dispatch_if.master sfu_dispatch_if [`ISSUE_WIDTH]
|
||||
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH;
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0][`NT_WIDTH-1:0] last_active_tid;
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`NT_WIDTH-1:0] tids;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign tids[i] = `NT_WIDTH'(i);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
VX_find_first #(
|
||||
.N (`NUM_THREADS),
|
||||
.DATAW (`NT_WIDTH),
|
||||
.REVERSE (1)
|
||||
) last_tid_select (
|
||||
.valid_in (operands_if[i].data.tmask),
|
||||
.data_in (tids),
|
||||
.data_out (last_active_tid[i]),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
end
|
||||
|
||||
// ALU dispatch
|
||||
wire [`NT_WIDTH-1:0] last_active_tid;
|
||||
|
||||
VX_operands_if alu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign alu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_ALU);
|
||||
assign alu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (alu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (alu_reset),
|
||||
.valid_in (alu_operands_if[i].valid),
|
||||
.ready_in (alu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(alu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (alu_dispatch_if[i].data),
|
||||
.valid_out (alu_dispatch_if[i].valid),
|
||||
.ready_out (alu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
// LSU dispatch
|
||||
|
||||
VX_operands_if lsu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign lsu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_LSU);
|
||||
assign lsu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (lsu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (lsu_reset),
|
||||
.valid_in (lsu_operands_if[i].valid),
|
||||
.ready_in (lsu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(lsu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (lsu_dispatch_if[i].data),
|
||||
.valid_out (lsu_dispatch_if[i].valid),
|
||||
.ready_out (lsu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
// FPU dispatch
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
||||
VX_operands_if fpu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign fpu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_FPU);
|
||||
assign fpu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (fpu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) fpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.valid_in (fpu_operands_if[i].valid),
|
||||
.ready_in (fpu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(fpu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (fpu_dispatch_if[i].data),
|
||||
.valid_out (fpu_dispatch_if[i].valid),
|
||||
.ready_out (fpu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
`endif
|
||||
|
||||
// SFU dispatch
|
||||
|
||||
VX_operands_if sfu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign sfu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_SFU);
|
||||
assign sfu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (sfu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) sfu_buffer (
|
||||
.clk (clk),
|
||||
.reset (sfu_reset),
|
||||
.valid_in (sfu_operands_if[i].valid),
|
||||
.ready_in (sfu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(sfu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (sfu_dispatch_if[i].data),
|
||||
.valid_out (sfu_dispatch_if[i].valid),
|
||||
.ready_out (sfu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
// can take next request?
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign operands_if[i].ready = (alu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_ALU))
|
||||
|| (lsu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_LSU))
|
||||
`ifdef EXT_F_ENABLE
|
||||
|| (fpu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_FPU))
|
||||
`endif
|
||||
|| (sfu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_SFU));
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [`NUM_EX_UNITS-1:0] perf_unit_stalls_per_cycle, perf_unit_stalls_per_cycle_r;
|
||||
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_stalls_per_cycle;
|
||||
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_r;
|
||||
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(*) begin
|
||||
perf_issue_unit_stalls_per_cycle[i] = '0;
|
||||
if (operands_if[i].valid && ~operands_if[i].ready) begin
|
||||
perf_issue_unit_stalls_per_cycle[i][operands_if[i].data.ex_type] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (`NUM_EX_UNITS),
|
||||
.N (`ISSUE_WIDTH),
|
||||
.OP ("|")
|
||||
) reduce (
|
||||
.data_in (perf_issue_unit_stalls_per_cycle),
|
||||
.data_out (perf_unit_stalls_per_cycle)
|
||||
VX_find_first #(
|
||||
.N (`NUM_THREADS),
|
||||
.DATAW (`NT_WIDTH),
|
||||
.REVERSE (1)
|
||||
) last_tid_select (
|
||||
.valid_in (operands_if.data.tmask),
|
||||
.data_in (tids),
|
||||
.data_out (last_active_tid),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
`BUFFER(perf_unit_stalls_per_cycle_r, perf_unit_stalls_per_cycle);
|
||||
wire [`NUM_EX_UNITS-1:0] operands_reset;
|
||||
assign operands_if.ready = operands_reset[operands_if.data.ex_type];
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
||||
|
||||
`RESET_RELAY (buffer_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2), // 2-cycle EB for area reduction
|
||||
.LUTRAM (1)
|
||||
) buffer (
|
||||
.clk (clk),
|
||||
.reset (buffer_reset),
|
||||
.valid_in (operands_if.valid && (operands_if.data.ex_type == `EX_BITS'(i))),
|
||||
.ready_in (operands_reset[i]),
|
||||
.data_in ({
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.wis,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.PC,
|
||||
operands_if.data.op_type,
|
||||
operands_if.data.op_args,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.rd,
|
||||
last_active_tid,
|
||||
operands_if.data.rs1_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs3_data
|
||||
}),
|
||||
.data_out (dispatch_if[i].data),
|
||||
.valid_out (dispatch_if[i].valid),
|
||||
.ready_out (dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_r;
|
||||
|
||||
wire operands_if_stall = operands_if.valid && ~operands_if.ready;
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_stalls_r[i] <= '0;
|
||||
end else begin
|
||||
perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(perf_unit_stalls_per_cycle_r[i]);
|
||||
perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(operands_if_stall && operands_if.data.ex_type == `EX_BITS'(i));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i=0; i < `NUM_EX_UNITS; ++i) begin
|
||||
assign perf_stalls[i] = perf_stalls_r[i];
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (operands_if[i].valid && operands_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, wis_to_wid(operands_if[i].data.wis, i), operands_if[i].data.PC));
|
||||
trace_ex_type(1, operands_if[i].data.ex_type);
|
||||
`TRACE(1, (", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if[i].data.op_mod, operands_if[i].data.tmask, operands_if[i].data.wb, operands_if[i].data.rd));
|
||||
`TRACE_ARRAY1D(1, operands_if[i].data.rs1_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs2_data="));
|
||||
`TRACE_ARRAY1D(1, operands_if[i].data.rs2_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs3_data="));
|
||||
`TRACE_ARRAY1D(1, operands_if[i].data.rs3_data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if[i].data.uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,23 +13,24 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
||||
module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
||||
parameter BLOCK_SIZE = 1,
|
||||
parameter NUM_LANES = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter OUT_BUF = 0,
|
||||
parameter MAX_FANOUT = `MAX_FANOUT
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
// inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// outputs
|
||||
VX_execute_if.master execute_if [BLOCK_SIZE]
|
||||
|
||||
);
|
||||
`STATIC_ASSERT ((`NUM_THREADS == NUM_LANES * (`NUM_THREADS / NUM_LANES)), ("invalid parameter"))
|
||||
`STATIC_ASSERT (`IS_DIVISBLE(`ISSUE_WIDTH, BLOCK_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT (`IS_DIVISBLE(`NUM_THREADS, NUM_LANES), ("invalid parameter"))
|
||||
localparam BLOCK_SIZE_W = `LOG2UP(BLOCK_SIZE);
|
||||
localparam NUM_PACKETS = `NUM_THREADS / NUM_LANES;
|
||||
localparam PID_BITS = `CLOG2(NUM_PACKETS);
|
||||
|
@ -37,9 +38,9 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
localparam BATCH_COUNT = `ISSUE_WIDTH / BLOCK_SIZE;
|
||||
localparam BATCH_COUNT_W= `LOG2UP(BATCH_COUNT);
|
||||
localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH);
|
||||
localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN);
|
||||
localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam FANOUT_ENABLE= (`NUM_THREADS > (MAX_FANOUT + MAX_FANOUT/2));
|
||||
localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN);
|
||||
localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam FANOUT_ENABLE= (`NUM_THREADS > (MAX_FANOUT + MAX_FANOUT /2));
|
||||
|
||||
localparam DATA_TMASK_OFF = IN_DATAW - (`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS);
|
||||
localparam DATA_REGS_OFF = 0;
|
||||
|
@ -53,7 +54,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
assign dispatch_data[i] = dispatch_if[i].data;
|
||||
assign dispatch_if[i].ready = dispatch_ready[i];
|
||||
end
|
||||
|
||||
|
||||
wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices;
|
||||
wire [BLOCK_SIZE-1:0] block_ready;
|
||||
wire [BLOCK_SIZE-1:0][NUM_LANES-1:0] block_tmask;
|
||||
|
@ -64,7 +65,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
wire [BLOCK_SIZE-1:0] block_done;
|
||||
|
||||
wire batch_done = (& block_done);
|
||||
|
||||
|
||||
logic [BATCH_COUNT_W-1:0] batch_idx;
|
||||
if (BATCH_COUNT != 1) begin
|
||||
always @(posedge clk) begin
|
||||
|
@ -78,12 +79,14 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
assign batch_idx = 0;
|
||||
`UNUSED_VAR (batch_done)
|
||||
end
|
||||
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
|
||||
wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx);
|
||||
assign issue_indices[block_idx] = issue_idx;
|
||||
|
||||
`RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1));
|
||||
|
||||
wire valid_p, ready_p;
|
||||
|
||||
if (`NUM_THREADS != NUM_LANES) begin
|
||||
|
@ -99,7 +102,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
wire fire_eop = fire_p && is_last_p;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
if (block_reset) begin
|
||||
sent_mask_p <= '0;
|
||||
is_first_p <= 1;
|
||||
end else begin
|
||||
|
@ -114,7 +117,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
wire [NUM_PACKETS-1:0][NUM_LANES-1:0] per_packet_tmask;
|
||||
wire [NUM_PACKETS-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] per_packet_regs;
|
||||
wire [NUM_PACKETS-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] per_packet_regs;
|
||||
|
||||
wire [`NUM_THREADS-1:0] dispatch_tmask = dispatch_data[issue_idx][DATA_TMASK_OFF +: `NUM_THREADS];
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
|
@ -134,7 +137,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
wire [NUM_PACKETS-1:0] packet_valids;
|
||||
wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids;
|
||||
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin
|
||||
assign packet_valids[i] = (| per_packet_tmask[i]);
|
||||
assign packet_ids[i] = PID_WIDTH'(i);
|
||||
end
|
||||
|
@ -143,7 +146,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
.N (NUM_PACKETS),
|
||||
.DATAW (PID_WIDTH),
|
||||
.REVERSE (0)
|
||||
) find_first (
|
||||
) find_first (
|
||||
.valid_in (packet_valids & ~sent_mask_p),
|
||||
.data_in (packet_ids),
|
||||
.data_out (start_p_n),
|
||||
|
@ -154,12 +157,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
.N (NUM_PACKETS),
|
||||
.DATAW (PID_WIDTH),
|
||||
.REVERSE (1)
|
||||
) find_last (
|
||||
) find_last (
|
||||
.valid_in (packet_valids),
|
||||
.data_in (packet_ids),
|
||||
.data_out (end_p),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + PID_WIDTH),
|
||||
|
@ -171,14 +174,14 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
.enable (1'b1),
|
||||
.data_in ({dispatch_valid[issue_idx], start_p_n}),
|
||||
.data_out ({dispatch_valid_r, start_p})
|
||||
);
|
||||
);
|
||||
|
||||
wire [NUM_LANES-1:0] tmask_p = per_packet_tmask[start_p];
|
||||
wire [2:0][NUM_LANES-1:0][`XLEN-1:0] regs_p = per_packet_regs[start_p];
|
||||
|
||||
wire block_enable = (BATCH_COUNT == 1 || ~(& sent_mask_p));
|
||||
|
||||
assign valid_p = dispatch_valid_r && block_enable;
|
||||
|
||||
assign valid_p = dispatch_valid_r && block_enable;
|
||||
assign block_tmask[block_idx] = tmask_p;
|
||||
assign block_regs[block_idx] = regs_p;
|
||||
assign block_pid[block_idx] = start_p;
|
||||
|
@ -214,27 +217,25 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
assign isw = block_idx;
|
||||
end
|
||||
|
||||
`RESET_RELAY(buf_out_reset, reset);
|
||||
|
||||
wire [`NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_data[issue_idx][DATA_TMASK_OFF+`NUM_THREADS +: ISSUE_WIS_W], isw);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (OUT_DATAW),
|
||||
.SIZE (`OUT_REG_TO_EB_SIZE(OUT_REG)),
|
||||
.OUT_REG (`OUT_REG_TO_EB_REG(OUT_REG))
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) buf_out (
|
||||
.clk (clk),
|
||||
.reset (buf_out_reset),
|
||||
.reset (block_reset),
|
||||
.valid_in (valid_p),
|
||||
.ready_in (ready_p),
|
||||
.data_in ({
|
||||
.data_in ({
|
||||
dispatch_data[issue_idx][IN_DATAW-1 : DATA_TMASK_OFF+`NUM_THREADS+ISSUE_WIS_W],
|
||||
block_wid,
|
||||
block_tmask[block_idx],
|
||||
dispatch_data[issue_idx][DATA_TMASK_OFF-1 : DATA_REGS_OFF + 3 * `NUM_THREADS * `XLEN],
|
||||
block_regs[block_idx][0],
|
||||
block_regs[block_idx][1],
|
||||
block_regs[block_idx][2],
|
||||
block_regs[block_idx][2],
|
||||
block_pid[block_idx],
|
||||
block_sop[block_idx],
|
||||
block_eop[block_idx]}),
|
||||
|
@ -251,6 +252,6 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
ready_in[issue_indices[i]] = block_ready[i] && block_eop[i];
|
||||
end
|
||||
end
|
||||
assign dispatch_ready = ready_in;
|
||||
assign dispatch_ready = ready_in;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,124 +14,103 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_execute import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input base_dcrs_t base_dcrs,
|
||||
|
||||
// Dcache interface
|
||||
VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS],
|
||||
|
||||
// commit interface
|
||||
VX_commit_csr_if.slave commit_csr_if,
|
||||
|
||||
// fetch interface
|
||||
VX_sched_csr_if.slave sched_csr_if,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.slave mem_perf_if,
|
||||
VX_pipeline_perf_if.slave pipeline_perf_if,
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_dispatch_if.slave fpu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master fpu_commit_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
|
||||
VX_dispatch_if.slave alu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master alu_commit_if [`ISSUE_WIDTH],
|
||||
input base_dcrs_t base_dcrs,
|
||||
|
||||
// Dcache interface
|
||||
VX_lsu_mem_if.master lsu_mem_if [`NUM_LSU_BLOCKS],
|
||||
|
||||
// dispatch interface
|
||||
VX_dispatch_if.slave dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
|
||||
// commit interface
|
||||
VX_commit_if.master commit_if [`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
|
||||
// scheduler interfaces
|
||||
VX_sched_csr_if.slave sched_csr_if,
|
||||
VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS],
|
||||
|
||||
VX_dispatch_if.slave lsu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master lsu_commit_if [`ISSUE_WIDTH],
|
||||
|
||||
VX_dispatch_if.slave sfu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master sfu_commit_if [`ISSUE_WIDTH],
|
||||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak
|
||||
// commit interface
|
||||
VX_commit_csr_if.slave commit_csr_if
|
||||
);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if fpu_to_csr_if[`NUM_FPU_BLOCKS]();
|
||||
VX_fpu_csr_if fpu_csr_if[`NUM_FPU_BLOCKS]();
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (alu_reset, reset);
|
||||
`RESET_RELAY (lsu_reset, reset);
|
||||
`RESET_RELAY (sfu_reset, reset);
|
||||
|
||||
|
||||
VX_alu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID))
|
||||
) alu_unit (
|
||||
.clk (clk),
|
||||
.reset (alu_reset),
|
||||
.dispatch_if (alu_dispatch_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.commit_if (alu_commit_if)
|
||||
.dispatch_if (dispatch_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.branch_ctl_if (branch_ctl_if)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (1)
|
||||
|
||||
VX_lsu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID))
|
||||
) lsu_unit (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (lsu_reset),
|
||||
.cache_bus_if (dcache_bus_if),
|
||||
.dispatch_if (lsu_dispatch_if),
|
||||
.commit_if (lsu_commit_if)
|
||||
.dispatch_if (dispatch_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.lsu_mem_if (lsu_mem_if)
|
||||
);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`RESET_RELAY (fpu_reset, reset);
|
||||
|
||||
VX_fpu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID))
|
||||
) fpu_unit (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.dispatch_if (fpu_dispatch_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.commit_if (fpu_commit_if)
|
||||
.reset (fpu_reset),
|
||||
.dispatch_if (dispatch_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.fpu_csr_if (fpu_csr_if)
|
||||
);
|
||||
`endif
|
||||
|
||||
VX_sfu_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-sfu", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID)
|
||||
) sfu_unit (
|
||||
.clk (clk),
|
||||
.reset (sfu_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
.pipeline_perf_if (pipeline_perf_if),
|
||||
`endif
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
|
||||
.dispatch_if (sfu_dispatch_if),
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
.dispatch_if (dispatch_if[`EX_SFU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_SFU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.fpu_csr_if (fpu_csr_if),
|
||||
`endif
|
||||
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.sched_csr_if (sched_csr_if),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.commit_if (sfu_commit_if)
|
||||
.warp_ctl_if (warp_ctl_if)
|
||||
);
|
||||
|
||||
// simulation helper signal to get RISC-V tests Pass/Fail status
|
||||
assign sim_ebreak = alu_dispatch_if[0].valid && alu_dispatch_if[0].ready
|
||||
&& alu_dispatch_if[0].data.wis == 0
|
||||
&& `INST_ALU_IS_BR(alu_dispatch_if[0].data.op_mod)
|
||||
&& (`INST_BR_BITS'(alu_dispatch_if[0].data.op_type) == `INST_BR_EBREAK
|
||||
|| `INST_BR_BITS'(alu_dispatch_if[0].data.op_type) == `INST_BR_ECALL);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fetch import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -23,16 +23,15 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
|
||||
// Icache interface
|
||||
VX_mem_bus_if.master icache_bus_if,
|
||||
|
||||
|
||||
// inputs
|
||||
VX_schedule_if.slave schedule_if,
|
||||
|
||||
// outputs
|
||||
VX_fetch_if.master fetch_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
localparam ISW_WIDTH = `LOG2UP(`ISSUE_WIDTH);
|
||||
|
||||
wire icache_req_valid;
|
||||
wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr;
|
||||
|
@ -40,60 +39,65 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
wire icache_req_ready;
|
||||
|
||||
wire [`UUID_WIDTH-1:0] rsp_uuid;
|
||||
wire [`NW_WIDTH-1:0] req_tag, rsp_tag;
|
||||
wire [`NW_WIDTH-1:0] req_tag, rsp_tag;
|
||||
|
||||
wire icache_req_fire = icache_req_valid && icache_req_ready;
|
||||
|
||||
wire [ISW_WIDTH-1:0] schedule_isw = wid_to_isw(schedule_if.data.wid);
|
||||
|
||||
assign req_tag = schedule_if.data.wid;
|
||||
|
||||
|
||||
assign {rsp_uuid, rsp_tag} = icache_bus_if.rsp_data.tag;
|
||||
|
||||
wire [`XLEN-1:0] rsp_PC;
|
||||
wire [`PC_BITS-1:0] rsp_PC;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (`XLEN + `NUM_THREADS),
|
||||
.DATAW (`PC_BITS + `NUM_THREADS),
|
||||
.SIZE (`NUM_WARPS),
|
||||
.LUTRAM (1)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (icache_req_fire),
|
||||
`UNUSED_PIN (wren),
|
||||
.write (icache_req_fire),
|
||||
.wren (1'b1),
|
||||
.waddr (req_tag),
|
||||
.wdata ({schedule_if.data.PC, schedule_if.data.tmask}),
|
||||
.raddr (rsp_tag),
|
||||
.rdata ({rsp_PC, rsp_tmask})
|
||||
);
|
||||
|
||||
`ifndef L1_ENABLE
|
||||
// Ensure that the ibuffer doesn't fill up.
|
||||
// This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache request.
|
||||
// This issue is particularly prevalent when the icache and dcache is disabled and both requests share the same bus.
|
||||
wire [`ISSUE_WIDTH-1:0] pending_ibuf_full;
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
VX_pending_size #(
|
||||
// This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache requests.
|
||||
// This issue is particularly prevalent when the icache and dcache are disabled and both requests share the same bus.
|
||||
wire [`NUM_WARPS-1:0] pending_ibuf_full;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
VX_pending_size #(
|
||||
.SIZE (`IBUF_SIZE)
|
||||
) pending_reads (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (icache_req_fire && schedule_isw == i),
|
||||
.incr (icache_req_fire && schedule_if.data.wid == i),
|
||||
.decr (fetch_if.ibuf_pop[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (pending_ibuf_full[i]),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
end
|
||||
wire ibuf_ready = ~pending_ibuf_full[schedule_if.data.wid];
|
||||
`else
|
||||
wire ibuf_ready = 1'b1;
|
||||
`endif
|
||||
|
||||
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
|
||||
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
|
||||
("%t: *** %s invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, INSTANCE_ID, {schedule_if.data.PC, 1'b0}, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
|
||||
// Icache Request
|
||||
|
||||
wire ibuf_ready = ~pending_ibuf_full[schedule_isw];
|
||||
|
||||
assign icache_req_valid = schedule_if.valid && ibuf_ready;
|
||||
assign icache_req_addr = schedule_if.data.PC[`MEM_ADDR_WIDTH-1:2];
|
||||
assign icache_req_addr = schedule_if.data.PC[1 +: ICACHE_ADDR_WIDTH];
|
||||
assign icache_req_tag = {schedule_if.data.uuid, req_tag};
|
||||
assign schedule_if.ready = icache_req_ready && ibuf_ready;
|
||||
|
||||
|
@ -112,9 +116,10 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
.ready_out (icache_bus_if.req_ready)
|
||||
);
|
||||
|
||||
assign icache_bus_if.req_data.atype = '0;
|
||||
assign icache_bus_if.req_data.rw = 0;
|
||||
assign icache_bus_if.req_data.byteen = 4'b1111;
|
||||
assign icache_bus_if.req_data.data = '0;
|
||||
assign icache_bus_if.req_data.data = '0;
|
||||
|
||||
// Icache Response
|
||||
|
||||
|
@ -127,56 +132,46 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
assign icache_bus_if.rsp_ready = fetch_if.ready;
|
||||
|
||||
`ifdef DBG_SCOPE_FETCH
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (1),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (3*`UUID_WIDTH + 108)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
schedule_fire,
|
||||
icache_req_fire,
|
||||
icache_rsp_fire
|
||||
}),
|
||||
.probes({
|
||||
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
|
||||
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
|
||||
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
|
||||
}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_fetch ila_fetch_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({reset, schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, schedule_if.ready, schedule_if.valid}),
|
||||
.probe1 ({icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.req_ready, icache_bus_if.req_valid}),
|
||||
.probe2 ({icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (1),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
|
||||
ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
|
||||
(ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH)
|
||||
) scope_tap (
|
||||
.clk (clk),
|
||||
.reset (scope_reset),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers ({
|
||||
reset,
|
||||
schedule_fire,
|
||||
icache_req_fire,
|
||||
icache_rsp_fire
|
||||
}),
|
||||
.probes ({
|
||||
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
|
||||
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
|
||||
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
|
||||
}),
|
||||
.bus_in (scope_bus_in),
|
||||
.bus_out (scope_bus_out)
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_ICACHE
|
||||
`ifdef DBG_TRACE_MEM
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire fetch_fire = fetch_if.valid && fetch_if.ready;
|
||||
always @(posedge clk) begin
|
||||
if (schedule_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.data.wid, schedule_if.data.PC, schedule_if.data.tmask, schedule_if.data.uuid));
|
||||
`TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid));
|
||||
end
|
||||
if (fetch_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.data.wid, fetch_if.data.PC, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
|
||||
`TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -1,31 +1,32 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_unit import VX_fpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
VX_fpu_to_csr_if.master fpu_to_csr_if[`NUM_FPU_BLOCKS],
|
||||
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH]
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_fpu_csr_if.master fpu_csr_if[`NUM_FPU_BLOCKS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = `NUM_FPU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_FPU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
|
@ -35,34 +36,32 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) execute_if[BLOCK_SIZE]();
|
||||
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
) per_block_execute_if[BLOCK_SIZE]();
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (PARTIAL_BW ? 1 : 0)
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 0)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
.reset (reset),
|
||||
.dispatch_if(dispatch_if),
|
||||
.execute_if (execute_if)
|
||||
.execute_if (per_block_execute_if)
|
||||
);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_block_if[BLOCK_SIZE]();
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
`UNUSED_VAR (execute_if[block_idx].data.tid)
|
||||
`UNUSED_VAR (execute_if[block_idx].data.wb)
|
||||
`UNUSED_VAR (execute_if[block_idx].data.use_PC)
|
||||
`UNUSED_VAR (execute_if[block_idx].data.use_imm)
|
||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.tid)
|
||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.wb)
|
||||
|
||||
`RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1));
|
||||
|
||||
// Store request info
|
||||
wire fpu_req_valid, fpu_req_ready;
|
||||
wire fpu_rsp_valid, fpu_rsp_ready;
|
||||
wire fpu_rsp_valid, fpu_rsp_ready;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] fpu_rsp_result;
|
||||
fflags_t fpu_rsp_fflags;
|
||||
wire fpu_rsp_has_fflags;
|
||||
|
@ -70,68 +69,66 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
wire [`UUID_WIDTH-1:0] fpu_rsp_uuid;
|
||||
wire [`NW_WIDTH-1:0] fpu_rsp_wid;
|
||||
wire [NUM_LANES-1:0] fpu_rsp_tmask;
|
||||
wire [`XLEN-1:0] fpu_rsp_PC;
|
||||
wire [`PC_BITS-1:0] fpu_rsp_PC;
|
||||
wire [`NR_BITS-1:0] fpu_rsp_rd;
|
||||
wire [PID_WIDTH-1:0] fpu_rsp_pid;
|
||||
wire fpu_rsp_sop;
|
||||
wire fpu_rsp_eop;
|
||||
|
||||
wire [TAG_WIDTH-1:0] fpu_req_tag, fpu_rsp_tag;
|
||||
wire [TAG_WIDTH-1:0] fpu_req_tag, fpu_rsp_tag;
|
||||
wire mdata_full;
|
||||
|
||||
wire [`INST_FMT_BITS-1:0] fpu_fmt = execute_if[block_idx].data.imm[`INST_FMT_BITS-1:0];
|
||||
wire [`INST_FRM_BITS-1:0] fpu_frm = execute_if[block_idx].data.op_mod[`INST_FRM_BITS-1:0];
|
||||
wire [`INST_FMT_BITS-1:0] fpu_fmt = per_block_execute_if[block_idx].data.op_args.fpu.fmt;
|
||||
wire [`INST_FRM_BITS-1:0] fpu_frm = per_block_execute_if[block_idx].data.op_args.fpu.frm;
|
||||
|
||||
wire execute_fire = execute_if[block_idx].valid && execute_if[block_idx].ready;
|
||||
wire execute_fire = per_block_execute_if[block_idx].valid && per_block_execute_if[block_idx].ready;
|
||||
wire fpu_rsp_fire = fpu_rsp_valid && fpu_rsp_ready;
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + PID_WIDTH + 1 + 1),
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + PID_WIDTH + 1 + 1),
|
||||
.SIZE (`FPUQ_SIZE)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_en (execute_fire),
|
||||
.write_addr (fpu_req_tag),
|
||||
.write_data ({execute_if[block_idx].data.uuid, execute_if[block_idx].data.wid, execute_if[block_idx].data.tmask, execute_if[block_idx].data.PC, execute_if[block_idx].data.rd, execute_if[block_idx].data.pid, execute_if[block_idx].data.sop, execute_if[block_idx].data.eop}),
|
||||
.reset (block_reset),
|
||||
.acquire_en (execute_fire),
|
||||
.write_addr (fpu_req_tag),
|
||||
.write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}),
|
||||
.read_data ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),
|
||||
.read_addr (fpu_rsp_tag),
|
||||
.release_en (fpu_rsp_fire),
|
||||
.release_en (fpu_rsp_fire),
|
||||
.full (mdata_full),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
// resolve dynamic FRM from CSR
|
||||
wire [`INST_FRM_BITS-1:0] fpu_req_frm;
|
||||
`ASSIGN_BLOCKED_WID (fpu_to_csr_if[block_idx].read_wid, execute_if[block_idx].data.wid, block_idx, `NUM_FPU_BLOCKS)
|
||||
assign fpu_req_frm = (execute_if[block_idx].data.op_type != `INST_FPU_MISC
|
||||
&& fpu_frm == `INST_FRM_DYN) ? fpu_to_csr_if[block_idx].read_frm : fpu_frm;
|
||||
// resolve dynamic FRM from CSR
|
||||
wire [`INST_FRM_BITS-1:0] fpu_req_frm;
|
||||
`ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].read_wid, per_block_execute_if[block_idx].data.wid, block_idx, `NUM_FPU_BLOCKS)
|
||||
assign fpu_req_frm = (per_block_execute_if[block_idx].data.op_type != `INST_FPU_MISC
|
||||
&& fpu_frm == `INST_FRM_DYN) ? fpu_csr_if[block_idx].read_frm : fpu_frm;
|
||||
|
||||
// submit FPU request
|
||||
|
||||
assign fpu_req_valid = execute_if[block_idx].valid && ~mdata_full;
|
||||
assign execute_if[block_idx].ready = fpu_req_ready && ~mdata_full;
|
||||
|
||||
`RESET_RELAY (fpu_reset, reset);
|
||||
assign fpu_req_valid = per_block_execute_if[block_idx].valid && ~mdata_full;
|
||||
assign per_block_execute_if[block_idx].ready = fpu_req_ready && ~mdata_full;
|
||||
|
||||
`ifdef FPU_DPI
|
||||
|
||||
VX_fpu_dpi #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAGW (TAG_WIDTH),
|
||||
.OUT_REG (PARTIAL_BW ? 1 : 3)
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_dpi (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.reset (block_reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.op_type (execute_if[block_idx].data.op_type),
|
||||
.lane_mask (execute_if[block_idx].data.tmask),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
.op_type (per_block_execute_if[block_idx].data.op_type),
|
||||
.fmt (fpu_fmt),
|
||||
.frm (fpu_req_frm),
|
||||
.dataa (execute_if[block_idx].data.rs1_data),
|
||||
.datab (execute_if[block_idx].data.rs2_data),
|
||||
.datac (execute_if[block_idx].data.rs3_data),
|
||||
.dataa (per_block_execute_if[block_idx].data.rs1_data),
|
||||
.datab (per_block_execute_if[block_idx].data.rs2_data),
|
||||
.datac (per_block_execute_if[block_idx].data.rs3_data),
|
||||
.tag_in (fpu_req_tag),
|
||||
.ready_in (fpu_req_ready),
|
||||
|
||||
|
@ -140,67 +137,67 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.has_fflags (fpu_rsp_has_fflags),
|
||||
.fflags (fpu_rsp_fflags),
|
||||
.tag_out (fpu_rsp_tag),
|
||||
.ready_out (fpu_rsp_ready)
|
||||
);
|
||||
.ready_out (fpu_rsp_ready)
|
||||
);
|
||||
|
||||
`elsif FPU_FPNEW
|
||||
|
||||
VX_fpu_fpnew #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAGW (TAG_WIDTH),
|
||||
.OUT_REG (PARTIAL_BW ? 1 : 3)
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_fpnew (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.reset (block_reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.op_type (execute_if[block_idx].data.op_type),
|
||||
.lane_mask (execute_if[block_idx].data.tmask),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
.op_type (per_block_execute_if[block_idx].data.op_type),
|
||||
.fmt (fpu_fmt),
|
||||
.frm (fpu_req_frm),
|
||||
.dataa (execute_if[block_idx].data.rs1_data),
|
||||
.datab (execute_if[block_idx].data.rs2_data),
|
||||
.datac (execute_if[block_idx].data.rs3_data),
|
||||
.dataa (per_block_execute_if[block_idx].data.rs1_data),
|
||||
.datab (per_block_execute_if[block_idx].data.rs2_data),
|
||||
.datac (per_block_execute_if[block_idx].data.rs3_data),
|
||||
.tag_in (fpu_req_tag),
|
||||
.ready_in (fpu_req_ready),
|
||||
|
||||
.valid_out (fpu_rsp_valid),
|
||||
.valid_out (fpu_rsp_valid),
|
||||
.result (fpu_rsp_result),
|
||||
.has_fflags (fpu_rsp_has_fflags),
|
||||
.fflags (fpu_rsp_fflags),
|
||||
.tag_out (fpu_rsp_tag),
|
||||
.ready_out (fpu_rsp_ready)
|
||||
.tag_out (fpu_rsp_tag),
|
||||
.ready_out (fpu_rsp_ready)
|
||||
);
|
||||
|
||||
`elsif FPU_DSP
|
||||
|
||||
VX_fpu_dsp #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAGW (TAG_WIDTH),
|
||||
.OUT_REG (PARTIAL_BW ? 1 : 3)
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_dsp (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.reset (block_reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.lane_mask (execute_if[block_idx].data.tmask),
|
||||
.op_type (execute_if[block_idx].data.op_type),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
.op_type (per_block_execute_if[block_idx].data.op_type),
|
||||
.fmt (fpu_fmt),
|
||||
.frm (fpu_req_frm),
|
||||
.dataa (execute_if[block_idx].data.rs1_data),
|
||||
.datab (execute_if[block_idx].data.rs2_data),
|
||||
.datac (execute_if[block_idx].data.rs3_data),
|
||||
.dataa (per_block_execute_if[block_idx].data.rs1_data),
|
||||
.datab (per_block_execute_if[block_idx].data.rs2_data),
|
||||
.datac (per_block_execute_if[block_idx].data.rs3_data),
|
||||
.tag_in (fpu_req_tag),
|
||||
.ready_in (fpu_req_ready),
|
||||
|
||||
.valid_out (fpu_rsp_valid),
|
||||
.result (fpu_rsp_result),
|
||||
.valid_out (fpu_rsp_valid),
|
||||
.result (fpu_rsp_result),
|
||||
.has_fflags (fpu_rsp_has_fflags),
|
||||
.fflags (fpu_rsp_fflags),
|
||||
.tag_out (fpu_rsp_tag),
|
||||
.ready_out (fpu_rsp_ready)
|
||||
);
|
||||
|
||||
|
||||
`endif
|
||||
|
||||
// handle FPU response
|
||||
|
@ -210,7 +207,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
if (PID_BITS != 0) begin
|
||||
fflags_t fpu_rsp_fflags_r;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
if (block_reset) begin
|
||||
fpu_rsp_fflags_r <= '0;
|
||||
end else if (fpu_rsp_fire) begin
|
||||
fpu_rsp_fflags_r <= fpu_rsp_eop ? '0 : (fpu_rsp_fflags_r | fpu_rsp_fflags);
|
||||
|
@ -220,39 +217,37 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
end else begin
|
||||
assign fpu_rsp_fflags_q = fpu_rsp_fflags;
|
||||
end
|
||||
|
||||
assign fpu_to_csr_if[block_idx].write_enable = fpu_rsp_fire && fpu_rsp_eop && fpu_rsp_has_fflags;
|
||||
`ASSIGN_BLOCKED_WID (fpu_to_csr_if[block_idx].write_wid, fpu_rsp_wid, block_idx, `NUM_FPU_BLOCKS)
|
||||
assign fpu_to_csr_if[block_idx].write_fflags = fpu_rsp_fflags_q;
|
||||
|
||||
assign fpu_csr_if[block_idx].write_enable = fpu_rsp_fire && fpu_rsp_eop && fpu_rsp_has_fflags;
|
||||
`ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].write_wid, fpu_rsp_wid, block_idx, `NUM_FPU_BLOCKS)
|
||||
assign fpu_csr_if[block_idx].write_fflags = fpu_rsp_fflags_q;
|
||||
|
||||
// send response
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
||||
.SIZE (0)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (block_reset),
|
||||
.valid_in (fpu_rsp_valid),
|
||||
.ready_in (fpu_rsp_ready),
|
||||
.data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),
|
||||
.data_out ({commit_block_if[block_idx].data.uuid, commit_block_if[block_idx].data.wid, commit_block_if[block_idx].data.tmask, commit_block_if[block_idx].data.PC, commit_block_if[block_idx].data.rd, commit_block_if[block_idx].data.data, commit_block_if[block_idx].data.pid, commit_block_if[block_idx].data.sop, commit_block_if[block_idx].data.eop}),
|
||||
.valid_out (commit_block_if[block_idx].valid),
|
||||
.ready_out (commit_block_if[block_idx].ready)
|
||||
.data_out ({per_block_commit_if[block_idx].data.uuid, per_block_commit_if[block_idx].data.wid, per_block_commit_if[block_idx].data.tmask, per_block_commit_if[block_idx].data.PC, per_block_commit_if[block_idx].data.rd, per_block_commit_if[block_idx].data.data, per_block_commit_if[block_idx].data.pid, per_block_commit_if[block_idx].data.sop, per_block_commit_if[block_idx].data.eop}),
|
||||
.valid_out (per_block_commit_if[block_idx].valid),
|
||||
.ready_out (per_block_commit_if[block_idx].ready)
|
||||
);
|
||||
assign commit_block_if[block_idx].data.wb = 1'b1;
|
||||
assign per_block_commit_if[block_idx].data.wb = 1'b1;
|
||||
end
|
||||
|
||||
`RESET_RELAY (commit_reset, reset);
|
||||
|
||||
VX_gather_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (PARTIAL_BW ? 3 : 0)
|
||||
.OUT_BUF (PARTIAL_BW ? 3 : 0)
|
||||
) gather_unit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.commit_in_if (commit_block_if),
|
||||
.reset (reset),
|
||||
.commit_in_if (per_block_commit_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,22 +16,24 @@
|
|||
module VX_gather_unit import VX_gpu_pkg::*; #(
|
||||
parameter BLOCK_SIZE = 1,
|
||||
parameter NUM_LANES = 1,
|
||||
parameter OUT_REG = 0
|
||||
) (
|
||||
parameter OUT_BUF = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if.slave commit_in_if [BLOCK_SIZE],
|
||||
|
||||
|
||||
// outputs
|
||||
VX_commit_if.master commit_out_if [`ISSUE_WIDTH]
|
||||
|
||||
);
|
||||
`STATIC_ASSERT (`IS_DIVISBLE(`ISSUE_WIDTH, BLOCK_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT (`IS_DIVISBLE(`NUM_THREADS, NUM_LANES), ("invalid parameter"))
|
||||
localparam BLOCK_SIZE_W = `LOG2UP(BLOCK_SIZE);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + 1 + `NR_BITS + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam DATA_WIS_OFF = DATAW - (`UUID_WIDTH + `NW_WIDTH);
|
||||
|
||||
wire [BLOCK_SIZE-1:0] commit_in_valid;
|
||||
|
@ -71,24 +73,22 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
assign commit_in_ready[i] = commit_out_ready[commit_in_isw[i]];
|
||||
end
|
||||
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_tmp_if();
|
||||
|
||||
`RESET_RELAY(commit_out_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`OUT_REG_TO_EB_SIZE(OUT_REG)),
|
||||
.OUT_REG (`OUT_REG_TO_EB_REG(OUT_REG))
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (commit_out_reset),
|
||||
.reset (reset),
|
||||
.valid_in (commit_out_valid[i]),
|
||||
.ready_in (commit_out_ready[i]),
|
||||
.data_in (commit_out_data[i]),
|
||||
.data_in (commit_out_data[i]),
|
||||
.data_out (commit_tmp_if.data),
|
||||
.valid_out (commit_tmp_if.valid),
|
||||
.ready_out (commit_tmp_if.ready)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,60 +14,73 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_ibuffer import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire [`PERF_CTR_BITS-1:0] perf_stalls,
|
||||
`endif
|
||||
|
||||
// inputs
|
||||
VX_decode_if.slave decode_if,
|
||||
|
||||
// outputs
|
||||
VX_ibuffer_if.master ibuffer_if [`ISSUE_WIDTH]
|
||||
VX_ibuffer_if.master ibuffer_if [PER_ISSUE_WARPS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam ISW_WIDTH = `LOG2UP(`ISSUE_WIDTH);
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + (`NR_BITS * 4);
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] ibuf_ready_in;
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4);
|
||||
|
||||
wire [ISW_WIDTH-1:0] decode_isw = wid_to_isw(decode_if.data.wid);
|
||||
wire [ISSUE_WIS_W-1:0] decode_wis = wid_to_wis(decode_if.data.wid);
|
||||
|
||||
assign decode_if.ready = ibuf_ready_in[decode_isw];
|
||||
wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in;
|
||||
assign decode_if.ready = ibuf_ready_in[decode_if.data.wid];
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`IBUF_SIZE),
|
||||
.OUT_REG (1)
|
||||
.OUT_REG (2) // 2-cycle EB for area reduction
|
||||
) instr_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (decode_if.valid && decode_isw == i),
|
||||
.ready_in (ibuf_ready_in[i]),
|
||||
.valid_in (decode_if.valid && decode_if.data.wid == ISSUE_WIS_W'(w)),
|
||||
.data_in ({
|
||||
decode_if.data.uuid,
|
||||
decode_wis,
|
||||
decode_if.data.tmask,
|
||||
decode_if.data.PC,
|
||||
decode_if.data.ex_type,
|
||||
decode_if.data.op_type,
|
||||
decode_if.data.op_mod,
|
||||
decode_if.data.op_args,
|
||||
decode_if.data.wb,
|
||||
decode_if.data.use_PC,
|
||||
decode_if.data.use_imm,
|
||||
decode_if.data.PC,
|
||||
decode_if.data.imm,
|
||||
decode_if.data.rd,
|
||||
decode_if.data.rs1,
|
||||
decode_if.data.rs2,
|
||||
decode_if.data.rs3}),
|
||||
.data_out(ibuffer_if[i].data),
|
||||
.valid_out (ibuffer_if[i].valid),
|
||||
.ready_out(ibuffer_if[i].ready)
|
||||
);
|
||||
|
||||
assign decode_if.ibuf_pop[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
|
||||
decode_if.data.rd,
|
||||
decode_if.data.rs1,
|
||||
decode_if.data.rs2,
|
||||
decode_if.data.rs3
|
||||
}),
|
||||
.ready_in (ibuf_ready_in[w]),
|
||||
.valid_out(ibuffer_if[w].valid),
|
||||
.data_out (ibuffer_if[w].data),
|
||||
.ready_out(ibuffer_if[w].ready)
|
||||
);
|
||||
`ifndef L1_ENABLE
|
||||
assign decode_if.ibuf_pop[w] = ibuffer_if[w].valid && ibuffer_if[w].ready;
|
||||
`endif
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
|
||||
|
||||
wire decode_if_stall = decode_if.valid && ~decode_if.ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_ibf_stalls <= '0;
|
||||
end else begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(decode_if_stall);
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_stalls = perf_ibf_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -24,14 +24,15 @@ module VX_ipdom_stack #(
|
|||
input wire [WIDTH-1:0] q0,
|
||||
input wire [WIDTH-1:0] q1,
|
||||
output wire [WIDTH-1:0] d,
|
||||
output wire d_set,
|
||||
output wire d_set,
|
||||
output wire [ADDRW-1:0] q_ptr,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
input wire pop,
|
||||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
reg slot_set [DEPTH-1:0];
|
||||
|
||||
|
||||
reg [ADDRW-1:0] rd_ptr, wr_ptr;
|
||||
|
||||
reg empty_r, full_r;
|
||||
|
@ -41,28 +42,28 @@ module VX_ipdom_stack #(
|
|||
wire d_set_n = slot_set[rd_ptr];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
if (reset) begin
|
||||
rd_ptr <= '0;
|
||||
wr_ptr <= '0;
|
||||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
full_r <= 0;
|
||||
end else begin
|
||||
`ASSERT(~push || ~full, ("runtime error: writing to a full stack!"));
|
||||
`ASSERT(~pop || ~empty, ("runtime error: reading an empty stack!"));
|
||||
`ASSERT(~push || ~pop, ("runtime error: push and pop in same cycle not supported!"));
|
||||
if (push) begin
|
||||
if (push) begin
|
||||
rd_ptr <= wr_ptr;
|
||||
wr_ptr <= wr_ptr + ADDRW'(1);
|
||||
wr_ptr <= wr_ptr + ADDRW'(1);
|
||||
empty_r <= 0;
|
||||
full_r <= (ADDRW'(DEPTH-1) == wr_ptr);
|
||||
end else if (pop) begin
|
||||
end else if (pop) begin
|
||||
wr_ptr <= wr_ptr - ADDRW'(d_set_n);
|
||||
rd_ptr <= rd_ptr - ADDRW'(d_set_n);
|
||||
empty_r <= (rd_ptr == 0) && (d_set_n == 1);
|
||||
full_r <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (WIDTH * 2),
|
||||
|
@ -71,24 +72,26 @@ module VX_ipdom_stack #(
|
|||
.LUTRAM (OUT_REG ? 0 : 1)
|
||||
) store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.write (push),
|
||||
.wren (1'b1),
|
||||
.waddr (wr_ptr),
|
||||
.wdata ({q1, q0}),
|
||||
.raddr (rd_ptr),
|
||||
.rdata ({d1, d0})
|
||||
);
|
||||
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
slot_set[wr_ptr] <= 0;
|
||||
end else if (pop) begin
|
||||
slot_set[wr_ptr] <= 0;
|
||||
end else if (pop) begin
|
||||
slot_set[rd_ptr] <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
wire d_set_r;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1),
|
||||
.DEPTH (OUT_REG)
|
||||
|
@ -102,6 +105,7 @@ module VX_ipdom_stack #(
|
|||
|
||||
assign d = d_set_r ? d0 : d1;
|
||||
assign d_set = ~d_set_r;
|
||||
assign q_ptr = wr_ptr;
|
||||
assign empty = empty_r;
|
||||
assign full = full_r;
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -12,10 +12,9 @@
|
|||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_issue #(
|
||||
parameter CORE_ID = 0
|
||||
module VX_issue import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -23,152 +22,81 @@ module VX_issue #(
|
|||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_pipeline_perf_if.issue perf_issue_if,
|
||||
output issue_perf_t issue_perf,
|
||||
`endif
|
||||
|
||||
VX_decode_if.slave decode_if,
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
|
||||
VX_dispatch_if.master alu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_dispatch_if.master lsu_dispatch_if [`ISSUE_WIDTH],
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_dispatch_if.master fpu_dispatch_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
VX_dispatch_if.master sfu_dispatch_if [`ISSUE_WIDTH]
|
||||
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH]
|
||||
);
|
||||
VX_ibuffer_if ibuffer_if [`ISSUE_WIDTH]();
|
||||
VX_ibuffer_if scoreboard_if [`ISSUE_WIDTH]();
|
||||
VX_operands_if operands_if [`ISSUE_WIDTH]();
|
||||
|
||||
`RESET_RELAY (ibuf_reset, reset);
|
||||
`RESET_RELAY (scoreboard_reset, reset);
|
||||
`RESET_RELAY (operands_reset, reset);
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
|
||||
VX_ibuffer #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (ibuf_reset),
|
||||
.decode_if (decode_if),
|
||||
.ibuffer_if (ibuffer_if)
|
||||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (scoreboard_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_scb_stalls(perf_issue_if.scb_stalls),
|
||||
.perf_units_uses(perf_issue_if.units_uses),
|
||||
.perf_sfu_uses (perf_issue_if.sfu_uses),
|
||||
`endif
|
||||
.writeback_if (writeback_if),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.scoreboard_if (scoreboard_if)
|
||||
);
|
||||
|
||||
VX_operands #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) operands (
|
||||
.clk (clk),
|
||||
.reset (operands_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.scoreboard_if (scoreboard_if),
|
||||
.operands_if (operands_if)
|
||||
);
|
||||
|
||||
VX_dispatch #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) dispatch (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
`UNUSED_PIN (perf_stalls),
|
||||
`endif
|
||||
.operands_if (operands_if),
|
||||
.alu_dispatch_if(alu_dispatch_if),
|
||||
.lsu_dispatch_if(lsu_dispatch_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_dispatch_if(fpu_dispatch_if),
|
||||
`endif
|
||||
.sfu_dispatch_if(sfu_dispatch_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_SCOPE_ISSUE
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
wire operands_if_fire = operands_if[0].valid && operands_if[0].ready;
|
||||
wire operands_if_not_ready = ~operands_if[0].ready;
|
||||
wire writeback_if_valid = writeback_if[0].valid;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (2),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS +
|
||||
1 + `NR_BITS + `XLEN + 1 + 1 + (`NUM_THREADS * 3 * `XLEN) +
|
||||
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
operands_if_fire,
|
||||
operands_if_not_ready,
|
||||
writeback_if_valid
|
||||
}),
|
||||
.probes({
|
||||
operands_if[0].data.uuid,
|
||||
operands_if[0].data.tmask,
|
||||
operands_if[0].data.ex_type,
|
||||
operands_if[0].data.op_type,
|
||||
operands_if[0].data.op_mod,
|
||||
operands_if[0].data.wb,
|
||||
operands_if[0].data.rd,
|
||||
operands_if[0].data.imm,
|
||||
operands_if[0].data.use_PC,
|
||||
operands_if[0].data.use_imm,
|
||||
operands_if[0].data.rs1_data,
|
||||
operands_if[0].data.rs2_data,
|
||||
operands_if[0].data.rs3_data,
|
||||
writeback_if[0].data.uuid,
|
||||
writeback_if[0].data.tmask,
|
||||
writeback_if[0].data.rd,
|
||||
writeback_if[0].data.data,
|
||||
writeback_if[0].data.eop
|
||||
}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_issue ila_issue_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({operands_if.uuid, ibuffer.rs3, ibuffer.rs2, ibuffer.rs1, operands_if.PC, operands_if.tmask, operands_if.wid, operands_if.ex_type, operands_if.op_type, operands_if.ready, operands_if.valid}),
|
||||
.probe1 ({writeback_if.uuid, writeback_if.data[0], writeback_if.PC, writeback_if.tmask, writeback_if.wid, writeback_if.eop, writeback_if.valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
|
||||
|
||||
wire decode_stall = decode_if.valid && ~decode_if.ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_ibf_stalls <= '0;
|
||||
end else begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(decode_stall);
|
||||
end
|
||||
issue_perf_t per_issue_perf [`ISSUE_WIDTH];
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, ibf_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, scb_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, opd_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, units_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
end
|
||||
for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, sfu_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
end
|
||||
|
||||
assign perf_issue_if.ibf_stalls = perf_ibf_stalls;
|
||||
`endif
|
||||
|
||||
wire [ISSUE_ISW_W-1:0] decode_isw = wid_to_isw(decode_if.data.wid);
|
||||
wire [ISSUE_WIS_W-1:0] decode_wis = wid_to_wis(decode_if.data.wid);
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] decode_ready_in;
|
||||
assign decode_if.ready = decode_ready_in[decode_isw];
|
||||
|
||||
`SCOPE_IO_SWITCH (`ISSUE_WIDTH)
|
||||
|
||||
for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : issue_slices
|
||||
VX_decode_if #(
|
||||
.NUM_WARPS (PER_ISSUE_WARPS)
|
||||
) per_issue_decode_if();
|
||||
|
||||
VX_dispatch_if per_issue_dispatch_if[`NUM_EX_UNITS]();
|
||||
|
||||
assign per_issue_decode_if.valid = decode_if.valid && (decode_isw == ISSUE_ISW_W'(issue_id));
|
||||
assign per_issue_decode_if.data.uuid = decode_if.data.uuid;
|
||||
assign per_issue_decode_if.data.wid = decode_wis;
|
||||
assign per_issue_decode_if.data.tmask = decode_if.data.tmask;
|
||||
assign per_issue_decode_if.data.PC = decode_if.data.PC;
|
||||
assign per_issue_decode_if.data.ex_type = decode_if.data.ex_type;
|
||||
assign per_issue_decode_if.data.op_type = decode_if.data.op_type;
|
||||
assign per_issue_decode_if.data.op_args = decode_if.data.op_args;
|
||||
assign per_issue_decode_if.data.wb = decode_if.data.wb;
|
||||
assign per_issue_decode_if.data.rd = decode_if.data.rd;
|
||||
assign per_issue_decode_if.data.rs1 = decode_if.data.rs1;
|
||||
assign per_issue_decode_if.data.rs2 = decode_if.data.rs2;
|
||||
assign per_issue_decode_if.data.rs3 = decode_if.data.rs3;
|
||||
assign decode_ready_in[issue_id] = per_issue_decode_if.ready;
|
||||
`ifndef L1_ENABLE
|
||||
assign decode_if.ibuf_pop[issue_id * PER_ISSUE_WARPS +: PER_ISSUE_WARPS] = per_issue_decode_if.ibuf_pop;
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
VX_issue_slice #(
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)),
|
||||
.ISSUE_ID (issue_id)
|
||||
) issue_slice (
|
||||
`SCOPE_IO_BIND(issue_id)
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.issue_perf (per_issue_perf[issue_id]),
|
||||
`endif
|
||||
.decode_if (per_issue_decode_if),
|
||||
.writeback_if (writeback_if[issue_id]),
|
||||
.dispatch_if (per_issue_dispatch_if)
|
||||
);
|
||||
|
||||
// Assign transposed dispatch_if
|
||||
for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin
|
||||
`ASSIGN_VX_IF(dispatch_if[ex_id * `ISSUE_WIDTH + issue_id], per_issue_dispatch_if[ex_id]);
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
159
hw/rtl/core/VX_issue_slice.sv
Normal file
159
hw/rtl/core/VX_issue_slice.sv
Normal file
|
@ -0,0 +1,159 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter ISSUE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output issue_perf_t issue_perf,
|
||||
`endif
|
||||
|
||||
VX_decode_if.slave decode_if,
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS]
|
||||
);
|
||||
`UNUSED_PARAM (ISSUE_ID)
|
||||
|
||||
VX_ibuffer_if ibuffer_if [PER_ISSUE_WARPS]();
|
||||
VX_scoreboard_if scoreboard_if();
|
||||
VX_operands_if operands_if();
|
||||
|
||||
`RESET_RELAY (ibuf_reset, reset);
|
||||
`RESET_RELAY (scoreboard_reset, reset);
|
||||
`RESET_RELAY (operands_reset, reset);
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
|
||||
VX_ibuffer #(
|
||||
.INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID))
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (ibuf_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (issue_perf.ibf_stalls),
|
||||
`endif
|
||||
.decode_if (decode_if),
|
||||
.ibuffer_if (ibuffer_if)
|
||||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID))
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (scoreboard_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (issue_perf.scb_stalls),
|
||||
.perf_units_uses(issue_perf.units_uses),
|
||||
.perf_sfu_uses (issue_perf.sfu_uses),
|
||||
`endif
|
||||
.writeback_if (writeback_if),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.scoreboard_if (scoreboard_if)
|
||||
);
|
||||
|
||||
VX_operands #(
|
||||
.INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID))
|
||||
) operands (
|
||||
.clk (clk),
|
||||
.reset (operands_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (issue_perf.opd_stalls),
|
||||
`endif
|
||||
.writeback_if (writeback_if),
|
||||
.scoreboard_if (scoreboard_if),
|
||||
.operands_if (operands_if)
|
||||
);
|
||||
|
||||
VX_dispatch #(
|
||||
.INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID))
|
||||
) dispatch (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
`UNUSED_PIN (perf_stalls),
|
||||
`endif
|
||||
.operands_if (operands_if),
|
||||
.dispatch_if (dispatch_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_SCOPE_ISSUE
|
||||
wire operands_if_fire = operands_if.valid && operands_if.ready;
|
||||
wire operands_if_not_ready = ~operands_if.ready;
|
||||
wire writeback_if_valid = writeback_if.valid;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (2),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS +
|
||||
1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) +
|
||||
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
|
||||
) scope_tap (
|
||||
.clk (clk),
|
||||
.reset (scope_reset),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers ({
|
||||
reset,
|
||||
operands_if_fire,
|
||||
operands_if_not_ready,
|
||||
writeback_if_valid
|
||||
}),
|
||||
.probes ({
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.ex_type,
|
||||
operands_if.data.op_type,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.rd,
|
||||
operands_if.data.rs1_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs3_data,
|
||||
writeback_if.data.uuid,
|
||||
writeback_if.data.tmask,
|
||||
writeback_if.data.rd,
|
||||
writeback_if.data.data,
|
||||
writeback_if.data.eop
|
||||
}),
|
||||
.bus_in (scope_bus_in),
|
||||
.bus_out (scope_bus_out)
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (operands_if.valid && operands_if.ready) begin
|
||||
`TRACE(1, ("%d: %s wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0}));
|
||||
trace_ex_type(1, operands_if.data.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs2_data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs3_data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS);
|
||||
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
132
hw/rtl/core/VX_issue_top.sv
Normal file
132
hw/rtl/core/VX_issue_top.sv
Normal file
|
@ -0,0 +1,132 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue_top import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "issue"
|
||||
) (
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire decode_valid,
|
||||
input wire [`UUID_WIDTH-1:0] decode_uuid,
|
||||
input wire [`NW_WIDTH-1:0] decode_wid,
|
||||
input wire [`NUM_THREADS-1:0] decode_tmask,
|
||||
input wire [`PC_BITS-1:0] decode_PC,
|
||||
input wire [`EX_BITS-1:0] decode_ex_type,
|
||||
input wire [`INST_OP_BITS-1:0] decode_op_type,
|
||||
input op_args_t decode_op_args,
|
||||
input wire decode_wb,
|
||||
input wire [`NR_BITS-1:0] decode_rd,
|
||||
input wire [`NR_BITS-1:0] decode_rs1,
|
||||
input wire [`NR_BITS-1:0] decode_rs2,
|
||||
input wire [`NR_BITS-1:0] decode_rs3,
|
||||
output wire decode_ready,
|
||||
|
||||
input wire writeback_valid[`ISSUE_WIDTH],
|
||||
input wire [`UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH],
|
||||
input wire [ISSUE_WIS_W-1:0] writeback_wis[`ISSUE_WIDTH],
|
||||
input wire [`NUM_THREADS-1:0] writeback_tmask[`ISSUE_WIDTH],
|
||||
input wire [`PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH],
|
||||
input wire [`NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH],
|
||||
input wire [`NUM_THREADS-1:0][`XLEN-1:0] writeback_data[`ISSUE_WIDTH],
|
||||
input wire writeback_sop[`ISSUE_WIDTH],
|
||||
input wire writeback_eop[`ISSUE_WIDTH],
|
||||
|
||||
output wire dispatch_valid[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`UUID_WIDTH-1:0] dispatch_uuid[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [ISSUE_WIS_W-1:0] dispatch_wis[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0] dispatch_tmask[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`PC_BITS-1:0] dispatch_PC[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`INST_ALU_BITS-1:0] dispatch_op_type[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output op_args_t dispatch_op_args[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire dispatch_wb[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NR_BITS-1:0] dispatch_rd[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NT_WIDTH-1:0] dispatch_tid[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
input wire dispatch_ready[`NUM_EX_UNITS * `ISSUE_WIDTH]
|
||||
);
|
||||
|
||||
VX_decode_if decode_if();
|
||||
VX_dispatch_if dispatch_if[`NUM_EX_UNITS * `ISSUE_WIDTH]();
|
||||
VX_writeback_if writeback_if[`ISSUE_WIDTH]();
|
||||
|
||||
assign decode_if.valid = decode_valid;
|
||||
assign decode_if.data.uuid = decode_uuid;
|
||||
assign decode_if.data.wid = decode_wid;
|
||||
assign decode_if.data.tmask = decode_tmask;
|
||||
assign decode_if.data.PC = decode_PC;
|
||||
assign decode_if.data.ex_type = decode_ex_type;
|
||||
assign decode_if.data.op_type = decode_op_type;
|
||||
assign decode_if.data.op_args = decode_op_args;
|
||||
assign decode_if.data.wb = decode_wb;
|
||||
assign decode_if.data.rd = decode_rd;
|
||||
assign decode_if.data.rs1 = decode_rs1;
|
||||
assign decode_if.data.rs2 = decode_rs2;
|
||||
assign decode_if.data.rs3 = decode_rs3;
|
||||
assign decode_ready = decode_if.ready;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign writeback_if[i].valid = writeback_valid[i];
|
||||
assign writeback_if[i].data.uuid = writeback_uuid[i];
|
||||
assign writeback_if[i].data.wis = writeback_wis[i];
|
||||
assign writeback_if[i].data.tmask = writeback_tmask[i];
|
||||
assign writeback_if[i].data.PC = writeback_PC[i];
|
||||
assign writeback_if[i].data.rd = writeback_rd[i];
|
||||
assign writeback_if[i].data.data = writeback_data[i];
|
||||
assign writeback_if[i].data.sop = writeback_sop[i];
|
||||
assign writeback_if[i].data.eop = writeback_eop[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin
|
||||
assign dispatch_valid[i] = dispatch_if[i].valid;
|
||||
assign dispatch_uuid[i] = dispatch_if[i].data.uuid;
|
||||
assign dispatch_wis[i] = dispatch_if[i].data.wis;
|
||||
assign dispatch_tmask[i] = dispatch_if[i].data.tmask;
|
||||
assign dispatch_PC[i] = dispatch_if[i].data.PC;
|
||||
assign dispatch_op_type[i] = dispatch_if[i].data.op_type;
|
||||
assign dispatch_op_args[i] = dispatch_if[i].data.op_args;
|
||||
assign dispatch_wb[i] = dispatch_if[i].data.wb;
|
||||
assign dispatch_rd[i] = dispatch_if[i].data.rd;
|
||||
assign dispatch_tid[i] = dispatch_if[i].data.tid;
|
||||
assign dispatch_rs1_data[i] = dispatch_if[i].data.rs1_data;
|
||||
assign dispatch_rs2_data[i] = dispatch_if[i].data.rs2_data;
|
||||
assign dispatch_rs3_data[i] = dispatch_if[i].data.rs3_data;
|
||||
assign dispatch_if[i].ready = dispatch_ready[i];
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
issue_perf_t issue_perf = '0;
|
||||
`endif
|
||||
|
||||
VX_issue #(
|
||||
.INSTANCE_ID (INSTANCE_ID)
|
||||
) issue (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.issue_perf (issue_perf),
|
||||
`endif
|
||||
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.dispatch_if (dispatch_if)
|
||||
);
|
||||
|
||||
endmodule
|
201
hw/rtl/core/VX_lmem_unit.sv
Normal file
201
hw/rtl/core/VX_lmem_unit.sv
Normal file
|
@ -0,0 +1,201 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lmem_unit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t cache_perf,
|
||||
`endif
|
||||
|
||||
VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS],
|
||||
VX_lsu_mem_if.master lsu_mem_out_if [`NUM_LSU_BLOCKS]
|
||||
);
|
||||
`STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter"))
|
||||
|
||||
localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `ADDR_TYPE_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH;
|
||||
localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH;
|
||||
localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE);
|
||||
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lsu_switch_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
`RESET_RELAY_EX (block_reset, reset, `NUM_LSU_BLOCKS, 1);
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
|
||||
assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL];
|
||||
end
|
||||
|
||||
wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask);
|
||||
wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask);
|
||||
|
||||
wire req_global_ready;
|
||||
wire req_local_ready;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (1)
|
||||
) req_global_buf (
|
||||
.clk (clk),
|
||||
.reset (block_reset[i]),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
|
||||
.data_in ({
|
||||
lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask,
|
||||
lsu_mem_in_if[i].req_data.rw,
|
||||
lsu_mem_in_if[i].req_data.byteen,
|
||||
lsu_mem_in_if[i].req_data.addr,
|
||||
lsu_mem_in_if[i].req_data.atype,
|
||||
lsu_mem_in_if[i].req_data.data,
|
||||
lsu_mem_in_if[i].req_data.tag
|
||||
}),
|
||||
.ready_in (req_global_ready),
|
||||
.valid_out (lsu_mem_out_if[i].req_valid),
|
||||
.data_out ({
|
||||
lsu_mem_out_if[i].req_data.mask,
|
||||
lsu_mem_out_if[i].req_data.rw,
|
||||
lsu_mem_out_if[i].req_data.byteen,
|
||||
lsu_mem_out_if[i].req_data.addr,
|
||||
lsu_mem_out_if[i].req_data.atype,
|
||||
lsu_mem_out_if[i].req_data.data,
|
||||
lsu_mem_out_if[i].req_data.tag
|
||||
}),
|
||||
.ready_out (lsu_mem_out_if[i].req_ready)
|
||||
);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.SIZE (0),
|
||||
.OUT_REG (0)
|
||||
) req_local_buf (
|
||||
.clk (clk),
|
||||
.reset (block_reset[i]),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
|
||||
.data_in ({
|
||||
lsu_mem_in_if[i].req_data.mask & is_addr_local_mask,
|
||||
lsu_mem_in_if[i].req_data.rw,
|
||||
lsu_mem_in_if[i].req_data.byteen,
|
||||
lsu_mem_in_if[i].req_data.addr,
|
||||
lsu_mem_in_if[i].req_data.atype,
|
||||
lsu_mem_in_if[i].req_data.data,
|
||||
lsu_mem_in_if[i].req_data.tag
|
||||
}),
|
||||
.ready_in (req_local_ready),
|
||||
.valid_out (lsu_switch_if[i].req_valid),
|
||||
.data_out ({
|
||||
lsu_switch_if[i].req_data.mask,
|
||||
lsu_switch_if[i].req_data.rw,
|
||||
lsu_switch_if[i].req_data.byteen,
|
||||
lsu_switch_if[i].req_data.addr,
|
||||
lsu_switch_if[i].req_data.atype,
|
||||
lsu_switch_if[i].req_data.data,
|
||||
lsu_switch_if[i].req_data.tag
|
||||
}),
|
||||
.ready_out (lsu_switch_if[i].req_ready)
|
||||
);
|
||||
|
||||
assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global)
|
||||
|| (req_local_ready && is_addr_local);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (1)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (block_reset[i]),
|
||||
.valid_in ({
|
||||
lsu_switch_if[i].rsp_valid,
|
||||
lsu_mem_out_if[i].rsp_valid
|
||||
}),
|
||||
.ready_in ({
|
||||
lsu_switch_if[i].rsp_ready,
|
||||
lsu_mem_out_if[i].rsp_ready
|
||||
}),
|
||||
.data_in ({
|
||||
lsu_switch_if[i].rsp_data,
|
||||
lsu_mem_out_if[i].rsp_data
|
||||
}),
|
||||
.data_out (lsu_mem_in_if[i].rsp_data),
|
||||
.valid_out (lsu_mem_in_if[i].rsp_valid),
|
||||
.ready_out (lsu_mem_in_if[i].rsp_ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
end
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_bus_if[LSU_NUM_REQS]();
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_bus_tmp_if[`NUM_LSU_LANES]();
|
||||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
|
||||
.ARBITER ("P"),
|
||||
.REQ_OUT_BUF (3),
|
||||
.RSP_OUT_BUF (0)
|
||||
) lsu_adapter (
|
||||
.clk (clk),
|
||||
.reset (block_reset[i]),
|
||||
.lsu_mem_if (lsu_switch_if[i]),
|
||||
.mem_bus_if (lmem_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]);
|
||||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (lmem_reset, reset);
|
||||
|
||||
VX_local_mem #(
|
||||
.INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)),
|
||||
.SIZE (1 << `LMEM_LOG_SIZE),
|
||||
.NUM_REQS (LSU_NUM_REQS),
|
||||
.NUM_BANKS (`LMEM_NUM_BANKS),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.OUT_BUF (3)
|
||||
) local_mem (
|
||||
.clk (clk),
|
||||
.reset (lmem_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (cache_perf),
|
||||
`endif
|
||||
.mem_bus_if (lmem_bus_if)
|
||||
);
|
||||
|
||||
endmodule
|
121
hw/rtl/core/VX_lsu_adapter.sv
Normal file
121
hw/rtl/core/VX_lsu_adapter.sv
Normal file
|
@ -0,0 +1,121 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_adapter import VX_gpu_pkg::*; #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter DATA_SIZE = 1,
|
||||
parameter TAG_WIDTH = 1,
|
||||
parameter TAG_SEL_BITS = 0,
|
||||
parameter `STRING ARBITER = "P",
|
||||
parameter REQ_OUT_BUF = 0,
|
||||
parameter RSP_OUT_BUF = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_lsu_mem_if.slave lsu_mem_if,
|
||||
VX_mem_bus_if.master mem_bus_if [NUM_LANES]
|
||||
);
|
||||
localparam REQ_ADDR_WIDTH = `MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE);
|
||||
localparam REQ_DATA_WIDTH = 1 + DATA_SIZE + REQ_ADDR_WIDTH + `ADDR_TYPE_WIDTH + DATA_SIZE * 8;
|
||||
localparam RSP_DATA_WIDTH = DATA_SIZE * 8;
|
||||
|
||||
// handle request unpacking
|
||||
|
||||
wire [NUM_LANES-1:0][REQ_DATA_WIDTH-1:0] req_data_in;
|
||||
|
||||
wire [NUM_LANES-1:0] req_valid_out;
|
||||
wire [NUM_LANES-1:0][REQ_DATA_WIDTH-1:0] req_data_out;
|
||||
wire [NUM_LANES-1:0][TAG_WIDTH-1:0] req_tag_out;
|
||||
wire [NUM_LANES-1:0] req_ready_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign req_data_in[i] = {
|
||||
lsu_mem_if.req_data.rw,
|
||||
lsu_mem_if.req_data.byteen[i],
|
||||
lsu_mem_if.req_data.addr[i],
|
||||
lsu_mem_if.req_data.atype[i],
|
||||
lsu_mem_if.req_data.data[i]
|
||||
};
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign mem_bus_if[i].req_valid = req_valid_out[i];
|
||||
assign {
|
||||
mem_bus_if[i].req_data.rw,
|
||||
mem_bus_if[i].req_data.byteen,
|
||||
mem_bus_if[i].req_data.addr,
|
||||
mem_bus_if[i].req_data.atype,
|
||||
mem_bus_if[i].req_data.data
|
||||
} = req_data_out[i];
|
||||
assign mem_bus_if[i].req_data.tag = req_tag_out[i];
|
||||
assign req_ready_out[i] = mem_bus_if[i].req_ready;
|
||||
end
|
||||
|
||||
VX_stream_unpack #(
|
||||
.NUM_REQS (NUM_LANES),
|
||||
.DATA_WIDTH (REQ_DATA_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.OUT_BUF (REQ_OUT_BUF)
|
||||
) stream_unpack (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (lsu_mem_if.req_valid),
|
||||
.mask_in (lsu_mem_if.req_data.mask),
|
||||
.data_in (req_data_in),
|
||||
.tag_in (lsu_mem_if.req_data.tag),
|
||||
.ready_in (lsu_mem_if.req_ready),
|
||||
.valid_out (req_valid_out),
|
||||
.data_out (req_data_out),
|
||||
.tag_out (req_tag_out),
|
||||
.ready_out (req_ready_out)
|
||||
);
|
||||
|
||||
// handle response packing
|
||||
|
||||
wire [NUM_LANES-1:0] rsp_valid_out;
|
||||
wire [NUM_LANES-1:0][RSP_DATA_WIDTH-1:0] rsp_data_out;
|
||||
wire [NUM_LANES-1:0][TAG_WIDTH-1:0] rsp_tag_out;
|
||||
wire [NUM_LANES-1:0] rsp_ready_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign rsp_valid_out[i] = mem_bus_if[i].rsp_valid;
|
||||
assign rsp_data_out[i] = mem_bus_if[i].rsp_data.data;
|
||||
assign rsp_tag_out[i] = mem_bus_if[i].rsp_data.tag;
|
||||
assign mem_bus_if[i].rsp_ready = rsp_ready_out[i];
|
||||
end
|
||||
|
||||
VX_stream_pack #(
|
||||
.NUM_REQS (NUM_LANES),
|
||||
.DATA_WIDTH (RSP_DATA_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.TAG_SEL_BITS (TAG_SEL_BITS),
|
||||
.ARBITER (ARBITER),
|
||||
.OUT_BUF (RSP_OUT_BUF)
|
||||
) stream_pack (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_valid_out),
|
||||
.data_in (rsp_data_out),
|
||||
.tag_in (rsp_tag_out),
|
||||
.ready_in (rsp_ready_out),
|
||||
.valid_out (lsu_mem_if.rsp_valid),
|
||||
.mask_out (lsu_mem_if.rsp_data.mask),
|
||||
.data_out (lsu_mem_if.rsp_data.data),
|
||||
.tag_out (lsu_mem_if.rsp_data.tag),
|
||||
.ready_out (lsu_mem_if.rsp_ready)
|
||||
);
|
||||
|
||||
endmodule
|
557
hw/rtl/core/VX_lsu_slice.sv
Normal file
557
hw/rtl/core/VX_lsu_slice.sv
Normal file
|
@ -0,0 +1,557 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_execute_if.slave execute_if,
|
||||
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if,
|
||||
VX_lsu_mem_if.master lsu_mem_if
|
||||
);
|
||||
localparam NUM_LANES = `NUM_LSU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam LSUQ_SIZEW = `LOG2UP(`LSUQ_IN_SIZE);
|
||||
localparam REQ_ASHIFT = `CLOG2(LSU_WORD_SIZE);
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = `MEM_ADDR_WIDTH - MEM_ASHIFT;
|
||||
|
||||
// tag_id = wid + PC + wb + rd + op_type + align + pid + pkt_addr + fence
|
||||
localparam TAG_ID_WIDTH = `NW_WIDTH + `PC_BITS + 1 + `NR_BITS + `INST_LSU_BITS + (NUM_LANES * REQ_ASHIFT) + PID_WIDTH + LSUQ_SIZEW + 1;
|
||||
|
||||
// tag = uuid + tag_id
|
||||
localparam TAG_WIDTH = `UUID_WIDTH + TAG_ID_WIDTH;
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_rsp_if();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_no_rsp_if();
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
`UNUSED_VAR (execute_if.data.tid)
|
||||
|
||||
// full address calculation
|
||||
|
||||
wire req_is_fence, rsp_is_fence;
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] full_addr;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign full_addr[i] = execute_if.data.rs1_data[i] + `SEXT(`XLEN, execute_if.data.op_args.lsu.offset);
|
||||
end
|
||||
|
||||
// address type calculation
|
||||
|
||||
wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_atype;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [MEM_ADDRW-1:0] block_addr = full_addr[i][MEM_ASHIFT +: MEM_ADDRW];
|
||||
// is I/O address
|
||||
wire [MEM_ADDRW-1:0] io_addr_start = MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT);
|
||||
wire [MEM_ADDRW-1:0] io_addr_end = MEM_ADDRW'(`XLEN'(`IO_END_ADDR) >> MEM_ASHIFT);
|
||||
assign mem_req_atype[i][`ADDR_TYPE_FLUSH] = req_is_fence;
|
||||
assign mem_req_atype[i][`ADDR_TYPE_IO] = (block_addr >= io_addr_start) && (block_addr < io_addr_end);
|
||||
`ifdef LMEM_ENABLE
|
||||
// is local memory address
|
||||
wire [MEM_ADDRW-1:0] lmem_addr_start = MEM_ADDRW'(`XLEN'(`LMEM_BASE_ADDR) >> MEM_ASHIFT);
|
||||
wire [MEM_ADDRW-1:0] lmem_addr_end = MEM_ADDRW'((`XLEN'(`LMEM_BASE_ADDR) + `XLEN'(1 << `LMEM_LOG_SIZE)) >> MEM_ASHIFT);
|
||||
assign mem_req_atype[i][`ADDR_TYPE_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end);
|
||||
`endif
|
||||
end
|
||||
|
||||
// schedule memory request
|
||||
|
||||
wire mem_req_valid;
|
||||
wire [NUM_LANES-1:0] mem_req_mask;
|
||||
wire mem_req_rw;
|
||||
wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] mem_req_addr;
|
||||
wire [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] mem_req_byteen;
|
||||
reg [NUM_LANES-1:0][LSU_WORD_SIZE*8-1:0] mem_req_data;
|
||||
wire [TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_ready;
|
||||
|
||||
wire mem_rsp_valid;
|
||||
wire [NUM_LANES-1:0] mem_rsp_mask;
|
||||
wire [NUM_LANES-1:0][LSU_WORD_SIZE*8-1:0] mem_rsp_data;
|
||||
wire [TAG_WIDTH-1:0] mem_rsp_tag;
|
||||
wire mem_rsp_sop;
|
||||
wire mem_rsp_eop;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
`UNUSED_VAR (mem_req_fire)
|
||||
`UNUSED_VAR (mem_rsp_fire)
|
||||
|
||||
wire mem_rsp_sop_pkt, mem_rsp_eop_pkt;
|
||||
wire no_rsp_buf_valid, no_rsp_buf_ready;
|
||||
|
||||
// fence handling
|
||||
|
||||
reg fence_lock;
|
||||
|
||||
assign req_is_fence = `INST_LSU_IS_FENCE(execute_if.data.op_type);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
fence_lock <= 0;
|
||||
end else begin
|
||||
if (mem_req_fire && req_is_fence && execute_if.data.eop) begin
|
||||
fence_lock <= 1;
|
||||
end
|
||||
if (mem_rsp_fire && rsp_is_fence && mem_rsp_eop_pkt) begin
|
||||
fence_lock <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire req_skip = req_is_fence && ~execute_if.data.eop;
|
||||
wire no_rsp_buf_enable = (mem_req_rw && ~execute_if.data.wb) || req_skip;
|
||||
|
||||
assign mem_req_valid = execute_if.valid
|
||||
&& ~req_skip
|
||||
&& ~(no_rsp_buf_enable && ~no_rsp_buf_ready)
|
||||
&& ~fence_lock;
|
||||
|
||||
assign no_rsp_buf_valid = execute_if.valid
|
||||
&& no_rsp_buf_enable
|
||||
&& (req_skip || mem_req_ready)
|
||||
&& ~fence_lock;
|
||||
|
||||
assign execute_if.ready = (mem_req_ready || req_skip)
|
||||
&& ~(no_rsp_buf_enable && ~no_rsp_buf_ready)
|
||||
&& ~fence_lock;
|
||||
|
||||
assign mem_req_mask = execute_if.data.tmask;
|
||||
assign mem_req_rw = execute_if.data.op_args.lsu.is_store;
|
||||
|
||||
// address formatting
|
||||
|
||||
wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] req_align;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign req_align[i] = full_addr[i][REQ_ASHIFT-1:0];
|
||||
assign mem_req_addr[i] = full_addr[i][`MEM_ADDR_WIDTH-1:REQ_ASHIFT];
|
||||
end
|
||||
|
||||
// byte enable formatting
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
reg [LSU_WORD_SIZE-1:0] mem_req_byteen_r;
|
||||
always @(*) begin
|
||||
mem_req_byteen_r = '0;
|
||||
case (`INST_LSU_WSIZE(execute_if.data.op_type))
|
||||
0: begin // 8-bit
|
||||
mem_req_byteen_r[req_align[i]] = 1'b1;
|
||||
end
|
||||
1: begin // 16 bit
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1;
|
||||
end
|
||||
`ifdef XLEN_64
|
||||
2: begin // 32 bit
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1;
|
||||
end
|
||||
`endif
|
||||
// 3: 64 bit
|
||||
default : mem_req_byteen_r = {LSU_WORD_SIZE{1'b1}};
|
||||
endcase
|
||||
end
|
||||
assign mem_req_byteen[i] = mem_req_byteen_r;
|
||||
end
|
||||
|
||||
// memory misalignment not supported!
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire lsu_req_fire = execute_if.valid && execute_if.ready;
|
||||
`RUNTIME_ASSERT((~lsu_req_fire || ~execute_if.data.tmask[i] || req_is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if.data.op_type))) == 0),
|
||||
("misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)",
|
||||
execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid));
|
||||
end
|
||||
|
||||
// store data formatting
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
always @(*) begin
|
||||
mem_req_data[i] = execute_if.data.rs2_data[i];
|
||||
case (req_align[i])
|
||||
1: mem_req_data[i][`XLEN-1:8] = execute_if.data.rs2_data[i][`XLEN-9:0];
|
||||
2: mem_req_data[i][`XLEN-1:16] = execute_if.data.rs2_data[i][`XLEN-17:0];
|
||||
3: mem_req_data[i][`XLEN-1:24] = execute_if.data.rs2_data[i][`XLEN-25:0];
|
||||
`ifdef XLEN_64
|
||||
4: mem_req_data[i][`XLEN-1:32] = execute_if.data.rs2_data[i][`XLEN-33:0];
|
||||
5: mem_req_data[i][`XLEN-1:40] = execute_if.data.rs2_data[i][`XLEN-41:0];
|
||||
6: mem_req_data[i][`XLEN-1:48] = execute_if.data.rs2_data[i][`XLEN-49:0];
|
||||
7: mem_req_data[i][`XLEN-1:56] = execute_if.data.rs2_data[i][`XLEN-57:0];
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// track SOP/EOP for out-of-order memory responses
|
||||
|
||||
wire [LSUQ_SIZEW-1:0] pkt_waddr, pkt_raddr;
|
||||
|
||||
if (PID_BITS != 0) begin
|
||||
reg [`LSUQ_IN_SIZE-1:0][PID_BITS:0] pkt_ctr;
|
||||
reg [`LSUQ_IN_SIZE-1:0] pkt_sop, pkt_eop;
|
||||
|
||||
wire mem_req_rd_fire = mem_req_fire && ~mem_req_rw;
|
||||
wire mem_req_rd_sop_fire = mem_req_rd_fire && execute_if.data.sop;
|
||||
wire mem_req_rd_eop_fire = mem_req_rd_fire && execute_if.data.eop;
|
||||
wire mem_rsp_eop_fire = mem_rsp_fire && mem_rsp_eop;
|
||||
wire full;
|
||||
|
||||
VX_allocator #(
|
||||
.SIZE (`LSUQ_IN_SIZE)
|
||||
) pkt_allocator (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_en (mem_req_rd_eop_fire),
|
||||
.acquire_addr(pkt_waddr),
|
||||
.release_en (mem_rsp_eop_pkt),
|
||||
.release_addr(pkt_raddr),
|
||||
`UNUSED_PIN (empty),
|
||||
.full (full)
|
||||
);
|
||||
|
||||
wire rd_during_wr = mem_req_rd_fire && mem_rsp_eop_fire && (pkt_raddr == pkt_waddr);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pkt_ctr <= '0;
|
||||
pkt_sop <= '0;
|
||||
pkt_eop <= '0;
|
||||
end else begin
|
||||
if (mem_req_rd_sop_fire) begin
|
||||
pkt_sop[pkt_waddr] <= 1;
|
||||
end
|
||||
if (mem_req_rd_eop_fire) begin
|
||||
pkt_eop[pkt_waddr] <= 1;
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
pkt_sop[pkt_raddr] <= 0;
|
||||
end
|
||||
if (mem_rsp_eop_pkt) begin
|
||||
pkt_eop[pkt_raddr] <= 0;
|
||||
end
|
||||
if (~rd_during_wr) begin
|
||||
if (mem_req_rd_fire) begin
|
||||
pkt_ctr[pkt_waddr] <= pkt_ctr[pkt_waddr] + PID_BITS'(1);
|
||||
end
|
||||
if (mem_rsp_eop_fire) begin
|
||||
pkt_ctr[pkt_raddr] <= pkt_ctr[pkt_raddr] - PID_BITS'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_rsp_sop_pkt = pkt_sop[pkt_raddr];
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop_fire && pkt_eop[pkt_raddr] && (pkt_ctr[pkt_raddr] == 1);
|
||||
`RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("allocator full!"))
|
||||
`RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("Oops!"))
|
||||
`UNUSED_VAR (mem_rsp_sop)
|
||||
end else begin
|
||||
assign pkt_waddr = 0;
|
||||
assign mem_rsp_sop_pkt = mem_rsp_sop;
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop;
|
||||
`UNUSED_VAR (pkt_raddr)
|
||||
end
|
||||
|
||||
// pack memory request tag
|
||||
assign mem_req_tag = {
|
||||
execute_if.data.uuid,
|
||||
execute_if.data.wid,
|
||||
execute_if.data.PC,
|
||||
execute_if.data.wb,
|
||||
execute_if.data.rd,
|
||||
execute_if.data.op_type,
|
||||
req_align,
|
||||
execute_if.data.pid,
|
||||
pkt_waddr,
|
||||
req_is_fence
|
||||
};
|
||||
|
||||
wire lsu_mem_req_valid;
|
||||
wire lsu_mem_req_rw;
|
||||
wire [NUM_LANES-1:0] lsu_mem_req_mask;
|
||||
wire [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_mem_req_byteen;
|
||||
wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_mem_req_addr;
|
||||
wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] lsu_mem_req_atype;
|
||||
wire [NUM_LANES-1:0][(LSU_WORD_SIZE*8)-1:0] lsu_mem_req_data;
|
||||
wire [LSU_TAG_WIDTH-1:0] lsu_mem_req_tag;
|
||||
wire lsu_mem_req_ready;
|
||||
|
||||
wire lsu_mem_rsp_valid;
|
||||
wire [NUM_LANES-1:0] lsu_mem_rsp_mask;
|
||||
wire [NUM_LANES-1:0][(LSU_WORD_SIZE*8)-1:0] lsu_mem_rsp_data;
|
||||
wire [LSU_TAG_WIDTH-1:0] lsu_mem_rsp_tag;
|
||||
wire lsu_mem_rsp_ready;
|
||||
|
||||
`RESET_RELAY (mem_scheduler_reset, reset);
|
||||
|
||||
VX_mem_scheduler #(
|
||||
.INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)),
|
||||
.CORE_REQS (NUM_LANES),
|
||||
.MEM_CHANNELS(NUM_LANES),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
.LINE_SIZE (LSU_WORD_SIZE),
|
||||
.ADDR_WIDTH (LSU_ADDR_WIDTH),
|
||||
.ATYPE_WIDTH (`ADDR_TYPE_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_QUEUE_SIZE (`LSUQ_IN_SIZE),
|
||||
.MEM_QUEUE_SIZE (`LSUQ_OUT_SIZE),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.RSP_PARTIAL (1),
|
||||
.MEM_OUT_BUF (0),
|
||||
.CORE_OUT_BUF(0)
|
||||
) mem_scheduler (
|
||||
.clk (clk),
|
||||
.reset (mem_scheduler_reset),
|
||||
|
||||
// Input request
|
||||
.core_req_valid (mem_req_valid),
|
||||
.core_req_rw (mem_req_rw),
|
||||
.core_req_mask (mem_req_mask),
|
||||
.core_req_byteen(mem_req_byteen),
|
||||
.core_req_addr (mem_req_addr),
|
||||
.core_req_atype (mem_req_atype),
|
||||
.core_req_data (mem_req_data),
|
||||
.core_req_tag (mem_req_tag),
|
||||
.core_req_ready (mem_req_ready),
|
||||
`UNUSED_PIN (core_req_empty),
|
||||
`UNUSED_PIN (core_req_sent),
|
||||
|
||||
// Output response
|
||||
.core_rsp_valid (mem_rsp_valid),
|
||||
.core_rsp_mask (mem_rsp_mask),
|
||||
.core_rsp_data (mem_rsp_data),
|
||||
.core_rsp_tag (mem_rsp_tag),
|
||||
.core_rsp_sop (mem_rsp_sop),
|
||||
.core_rsp_eop (mem_rsp_eop),
|
||||
.core_rsp_ready (mem_rsp_ready),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (lsu_mem_req_valid),
|
||||
.mem_req_rw (lsu_mem_req_rw),
|
||||
.mem_req_mask (lsu_mem_req_mask),
|
||||
.mem_req_byteen (lsu_mem_req_byteen),
|
||||
.mem_req_addr (lsu_mem_req_addr),
|
||||
.mem_req_atype (lsu_mem_req_atype),
|
||||
.mem_req_data (lsu_mem_req_data),
|
||||
.mem_req_tag (lsu_mem_req_tag),
|
||||
.mem_req_ready (lsu_mem_req_ready),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (lsu_mem_rsp_valid),
|
||||
.mem_rsp_mask (lsu_mem_rsp_mask),
|
||||
.mem_rsp_data (lsu_mem_rsp_data),
|
||||
.mem_rsp_tag (lsu_mem_rsp_tag),
|
||||
.mem_rsp_ready (lsu_mem_rsp_ready)
|
||||
);
|
||||
|
||||
assign lsu_mem_if.req_valid = lsu_mem_req_valid;
|
||||
assign lsu_mem_if.req_data.mask = lsu_mem_req_mask;
|
||||
assign lsu_mem_if.req_data.rw = lsu_mem_req_rw;
|
||||
assign lsu_mem_if.req_data.byteen = lsu_mem_req_byteen;
|
||||
assign lsu_mem_if.req_data.addr = lsu_mem_req_addr;
|
||||
assign lsu_mem_if.req_data.atype = lsu_mem_req_atype;
|
||||
assign lsu_mem_if.req_data.data = lsu_mem_req_data;
|
||||
assign lsu_mem_if.req_data.tag = lsu_mem_req_tag;
|
||||
assign lsu_mem_req_ready = lsu_mem_if.req_ready;
|
||||
|
||||
assign lsu_mem_rsp_valid = lsu_mem_if.rsp_valid;
|
||||
assign lsu_mem_rsp_mask = lsu_mem_if.rsp_data.mask;
|
||||
assign lsu_mem_rsp_data = lsu_mem_if.rsp_data.data;
|
||||
assign lsu_mem_rsp_tag = lsu_mem_if.rsp_data.tag;
|
||||
assign lsu_mem_if.rsp_ready = lsu_mem_rsp_ready;
|
||||
|
||||
wire [`UUID_WIDTH-1:0] rsp_uuid;
|
||||
wire [`NW_WIDTH-1:0] rsp_wid;
|
||||
wire [`PC_BITS-1:0] rsp_pc;
|
||||
wire rsp_wb;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire [`INST_LSU_BITS-1:0] rsp_op_type;
|
||||
wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] rsp_align;
|
||||
wire [PID_WIDTH-1:0] rsp_pid;
|
||||
`UNUSED_VAR (rsp_op_type)
|
||||
|
||||
// unpack memory response tag
|
||||
assign {
|
||||
rsp_uuid,
|
||||
rsp_wid,
|
||||
rsp_pc,
|
||||
rsp_wb,
|
||||
rsp_rd,
|
||||
rsp_op_type,
|
||||
rsp_align,
|
||||
rsp_pid,
|
||||
pkt_raddr,
|
||||
rsp_is_fence
|
||||
} = mem_rsp_tag;
|
||||
|
||||
// load response formatting
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] rsp_data;
|
||||
|
||||
`ifdef XLEN_64
|
||||
`ifdef EXT_F_ENABLE
|
||||
// apply nan-boxing to flw outputs
|
||||
wire rsp_is_float = rsp_rd[5];
|
||||
`else
|
||||
wire rsp_is_float = 0;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; i++) begin
|
||||
`ifdef XLEN_64
|
||||
wire [63:0] rsp_data64 = mem_rsp_data[i];
|
||||
wire [31:0] rsp_data32 = (rsp_align[i][2] ? mem_rsp_data[i][63:32] : mem_rsp_data[i][31:0]);
|
||||
`else
|
||||
wire [31:0] rsp_data32 = mem_rsp_data[i];
|
||||
`endif
|
||||
wire [15:0] rsp_data16 = rsp_align[i][1] ? rsp_data32[31:16] : rsp_data32[15:0];
|
||||
wire [7:0] rsp_data8 = rsp_align[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
|
||||
|
||||
always @(*) begin
|
||||
case (`INST_LSU_FMT(rsp_op_type))
|
||||
`INST_FMT_B: rsp_data[i] = `XLEN'(signed'(rsp_data8));
|
||||
`INST_FMT_H: rsp_data[i] = `XLEN'(signed'(rsp_data16));
|
||||
`INST_FMT_BU: rsp_data[i] = `XLEN'(unsigned'(rsp_data8));
|
||||
`INST_FMT_HU: rsp_data[i] = `XLEN'(unsigned'(rsp_data16));
|
||||
`ifdef XLEN_64
|
||||
`INST_FMT_W: rsp_data[i] = rsp_is_float ? (`XLEN'(rsp_data32) | 64'hffffffff00000000) : `XLEN'(signed'(rsp_data32));
|
||||
`INST_FMT_WU: rsp_data[i] = `XLEN'(unsigned'(rsp_data32));
|
||||
`INST_FMT_D: rsp_data[i] = `XLEN'(signed'(rsp_data64));
|
||||
`else
|
||||
`INST_FMT_W: rsp_data[i] = `XLEN'(signed'(rsp_data32));
|
||||
`endif
|
||||
default: rsp_data[i] = 'x;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// commit
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + 1 + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
||||
.SIZE (2)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_rsp_valid),
|
||||
.ready_in (mem_rsp_ready),
|
||||
.data_in ({rsp_uuid, rsp_wid, mem_rsp_mask, rsp_pc, rsp_wb, rsp_rd, rsp_data, rsp_pid, mem_rsp_sop_pkt, mem_rsp_eop_pkt}),
|
||||
.data_out ({commit_rsp_if.data.uuid, commit_rsp_if.data.wid, commit_rsp_if.data.tmask, commit_rsp_if.data.PC, commit_rsp_if.data.wb, commit_rsp_if.data.rd, commit_rsp_if.data.data, commit_rsp_if.data.pid, commit_rsp_if.data.sop, commit_rsp_if.data.eop}),
|
||||
.valid_out (commit_rsp_if.valid),
|
||||
.ready_out (commit_rsp_if.ready)
|
||||
);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + PID_WIDTH + 1 + 1),
|
||||
.SIZE (2)
|
||||
) no_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (no_rsp_buf_valid),
|
||||
.ready_in (no_rsp_buf_ready),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop}),
|
||||
.data_out ({commit_no_rsp_if.data.uuid, commit_no_rsp_if.data.wid, commit_no_rsp_if.data.tmask, commit_no_rsp_if.data.PC, commit_no_rsp_if.data.pid, commit_no_rsp_if.data.sop, commit_no_rsp_if.data.eop}),
|
||||
.valid_out (commit_no_rsp_if.valid),
|
||||
.ready_out (commit_no_rsp_if.ready)
|
||||
);
|
||||
assign commit_no_rsp_if.data.rd = '0;
|
||||
assign commit_no_rsp_if.data.wb = 1'b0;
|
||||
assign commit_no_rsp_if.data.data = commit_rsp_if.data.data; // arbiter MUX optimization
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.ARBITER ("P"), // prioritize commit_rsp_if
|
||||
.OUT_BUF (3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in ({commit_no_rsp_if.valid, commit_rsp_if.valid}),
|
||||
.ready_in ({commit_no_rsp_if.ready, commit_rsp_if.ready}),
|
||||
.data_in ({commit_no_rsp_if.data, commit_rsp_if.data}),
|
||||
.data_out (commit_if.data),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
`ifdef DBG_TRACE_MEM
|
||||
always @(posedge clk) begin
|
||||
if (execute_if.valid && fence_lock) begin
|
||||
`TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID));
|
||||
end
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
`TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid));
|
||||
end else begin
|
||||
`TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
`TRACE(1, (", byteen=0x%0h, rd=%0d, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, mem_req_tag, execute_if.data.uuid));
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef DBG_SCOPE_LSU
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (3),
|
||||
.TRIGGERW (3),
|
||||
.PROBEW (1 + NUM_LANES*(`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE*8) + `UUID_WIDTH + NUM_LANES*LSU_WORD_SIZE*8 + `UUID_WIDTH)
|
||||
) scope_tap (
|
||||
.clk (clk),
|
||||
.reset (scope_reset),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers({reset, mem_req_fire, mem_rsp_fire}),
|
||||
.probes ({mem_req_rw, full_addr, mem_req_byteen, mem_req_data, execute_if.data.uuid, rsp_data, rsp_uuid}),
|
||||
.bus_in (scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,634 +14,71 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Dcache interface
|
||||
VX_mem_bus_if.master cache_bus_if [DCACHE_NUM_REQS],
|
||||
|
||||
// inputs
|
||||
// Inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH]
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_lsu_mem_if.master lsu_mem_if [`NUM_LSU_BLOCKS]
|
||||
);
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_LSU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam LSUQ_SIZEW = `LOG2UP(`LSUQ_SIZE);
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = `XLEN - MEM_ASHIFT;
|
||||
localparam REQ_ASHIFT = `CLOG2(DCACHE_WORD_SIZE);
|
||||
localparam CACHE_TAG_WIDTH = `UUID_WIDTH + (NUM_LANES * `CACHE_ADDR_TYPE_BITS) + LSUQ_TAG_BITS;
|
||||
localparam BLOCK_SIZE = `NUM_LSU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_LSU_LANES;
|
||||
|
||||
`ifdef SCOPE
|
||||
`SCOPE_IO_SWITCH (BLOCK_SIZE);
|
||||
`endif
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) execute_if[BLOCK_SIZE]();
|
||||
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
) per_block_execute_if[BLOCK_SIZE]();
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (1)
|
||||
.OUT_BUF (1)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
.reset (reset),
|
||||
.dispatch_if(dispatch_if),
|
||||
.execute_if (execute_if)
|
||||
.execute_if (per_block_execute_if)
|
||||
);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_st_if();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_ld_if();
|
||||
|
||||
`UNUSED_VAR (execute_if[0].data.op_mod)
|
||||
`UNUSED_VAR (execute_if[0].data.use_PC)
|
||||
`UNUSED_VAR (execute_if[0].data.use_imm)
|
||||
`UNUSED_VAR (execute_if[0].data.rs3_data)
|
||||
`UNUSED_VAR (execute_if[0].data.tid)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
`ifdef SM_ENABLE
|
||||
`STATIC_ASSERT((1 << `SMEM_LOG_SIZE) == `MEM_BLOCK_SIZE * ((1 << `SMEM_LOG_SIZE) / `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT(0 == (`SMEM_BASE_ADDR % (1 << `SMEM_LOG_SIZE)), ("invalid parameter"))
|
||||
localparam SMEM_START_B = MEM_ADDRW'(`XLEN'(`SMEM_BASE_ADDR) >> MEM_ASHIFT);
|
||||
localparam SMEM_END_B = MEM_ADDRW'((`XLEN'(`SMEM_BASE_ADDR) + (1 << `SMEM_LOG_SIZE)) >> MEM_ASHIFT);
|
||||
`endif
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_slices
|
||||
|
||||
// tag = uuid + addr_type + wid + PC + tmask + rd + op_type + align + is_dup + pid + pkt_addr
|
||||
localparam TAG_WIDTH = `UUID_WIDTH + (NUM_LANES * `CACHE_ADDR_TYPE_BITS) + `NW_WIDTH + `XLEN + NUM_LANES + `NR_BITS + `INST_LSU_BITS + (NUM_LANES * (REQ_ASHIFT)) + `LSU_DUP_ENABLED + PID_WIDTH + LSUQ_SIZEW;
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
|
||||
wire [NUM_LANES-1:0][`CACHE_ADDR_TYPE_BITS-1:0] lsu_addr_type;
|
||||
|
||||
// full address calculation
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] full_addr;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign full_addr[i] = execute_if[0].data.rs1_data[i][`XLEN-1:0] + execute_if[0].data.imm;
|
||||
end
|
||||
|
||||
// detect duplicate addresses
|
||||
|
||||
wire lsu_is_dup;
|
||||
`ifdef LSU_DUP_ENABLE
|
||||
if (NUM_LANES > 1) begin
|
||||
wire [NUM_LANES-2:0] addr_matches;
|
||||
for (genvar i = 0; i < (NUM_LANES-1); ++i) begin
|
||||
assign addr_matches[i] = (execute_if[0].data.rs1_data[i+1] == execute_if[0].data.rs1_data[0]) || ~execute_if[0].data.tmask[i+1];
|
||||
end
|
||||
assign lsu_is_dup = execute_if[0].data.tmask[0] && (& addr_matches);
|
||||
end else begin
|
||||
assign lsu_is_dup = 0;
|
||||
end
|
||||
`else
|
||||
assign lsu_is_dup = 0;
|
||||
`endif
|
||||
|
||||
// detect address type
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [MEM_ADDRW-1:0] full_addr_b = full_addr[i][MEM_ASHIFT +: MEM_ADDRW];
|
||||
// is non-cacheable I/O address
|
||||
wire is_addr_io = (full_addr_b >= MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT));
|
||||
`ifdef SM_ENABLE
|
||||
// is shared memory address
|
||||
wire is_addr_sm = (full_addr_b >= SMEM_START_B) && (full_addr_b < SMEM_END_B);
|
||||
assign lsu_addr_type[i] = {is_addr_io, is_addr_sm};
|
||||
`else
|
||||
assign lsu_addr_type[i] = is_addr_io;
|
||||
`endif
|
||||
end
|
||||
|
||||
wire mem_req_empty;
|
||||
wire st_rsp_ready;
|
||||
wire lsu_valid, lsu_ready;
|
||||
|
||||
// fence: stall the pipeline until all pending requests are sent
|
||||
wire is_fence = `INST_LSU_IS_FENCE(execute_if[0].data.op_type);
|
||||
wire fence_wait = is_fence && ~mem_req_empty;
|
||||
|
||||
assign lsu_valid = execute_if[0].valid && ~fence_wait;
|
||||
assign execute_if[0].ready = lsu_ready && ~fence_wait;
|
||||
|
||||
// schedule memory request
|
||||
|
||||
wire mem_req_valid;
|
||||
wire [NUM_LANES-1:0] mem_req_mask;
|
||||
wire mem_req_rw;
|
||||
wire [NUM_LANES-1:0][`MEM_ADDR_WIDTH-REQ_ASHIFT-1:0] mem_req_addr;
|
||||
reg [NUM_LANES-1:0][DCACHE_WORD_SIZE-1:0] mem_req_byteen;
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] mem_req_data;
|
||||
wire [TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_ready;
|
||||
|
||||
wire mem_rsp_valid;
|
||||
wire [NUM_LANES-1:0] mem_rsp_mask;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] mem_rsp_data;
|
||||
wire [TAG_WIDTH-1:0] mem_rsp_tag;
|
||||
wire mem_rsp_sop;
|
||||
wire mem_rsp_eop;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
assign mem_req_valid = lsu_valid;
|
||||
assign lsu_ready = mem_req_ready
|
||||
&& (~mem_req_rw || st_rsp_ready); // writes commit directly
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign mem_req_mask[i] = execute_if[0].data.tmask[i] && (~lsu_is_dup || (i == 0));
|
||||
end
|
||||
|
||||
assign mem_req_rw = ~execute_if[0].data.wb;
|
||||
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
`UNUSED_VAR (mem_req_fire)
|
||||
`UNUSED_VAR (mem_rsp_fire)
|
||||
|
||||
// address formatting
|
||||
|
||||
wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] req_align;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign req_align[i] = full_addr[i][REQ_ASHIFT-1:0];
|
||||
assign mem_req_addr[i] = full_addr[i][`MEM_ADDR_WIDTH-1:REQ_ASHIFT];
|
||||
end
|
||||
|
||||
// byte enable formatting
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
always @(*) begin
|
||||
mem_req_byteen[i] = '0;
|
||||
case (`INST_LSU_WSIZE(execute_if[0].data.op_type))
|
||||
0: begin // 8-bit
|
||||
mem_req_byteen[i][req_align[i]] = 1'b1;
|
||||
end
|
||||
1: begin // 16 bit
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1;
|
||||
end
|
||||
`ifdef XLEN_64
|
||||
2: begin // 32 bit
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1;
|
||||
end
|
||||
`endif
|
||||
default : mem_req_byteen[i] = {DCACHE_WORD_SIZE{1'b1}};
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// memory misalignment not supported!
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire lsu_req_fire = execute_if[0].valid && execute_if[0].ready;
|
||||
`RUNTIME_ASSERT((~lsu_req_fire || ~execute_if[0].data.tmask[i] || is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if[0].data.op_type))) == 0),
|
||||
("misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)",
|
||||
execute_if[0].data.wid, execute_if[0].data.PC, full_addr[i], `INST_LSU_WSIZE(execute_if[0].data.op_type), execute_if[0].data.uuid));
|
||||
end
|
||||
|
||||
// store data formatting
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
always @(*) begin
|
||||
mem_req_data[i] = execute_if[0].data.rs2_data[i];
|
||||
case (req_align[i])
|
||||
1: mem_req_data[i][`XLEN-1:8] = execute_if[0].data.rs2_data[i][`XLEN-9:0];
|
||||
2: mem_req_data[i][`XLEN-1:16] = execute_if[0].data.rs2_data[i][`XLEN-17:0];
|
||||
3: mem_req_data[i][`XLEN-1:24] = execute_if[0].data.rs2_data[i][`XLEN-25:0];
|
||||
`ifdef XLEN_64
|
||||
4: mem_req_data[i][`XLEN-1:32] = execute_if[0].data.rs2_data[i][`XLEN-33:0];
|
||||
5: mem_req_data[i][`XLEN-1:40] = execute_if[0].data.rs2_data[i][`XLEN-41:0];
|
||||
6: mem_req_data[i][`XLEN-1:48] = execute_if[0].data.rs2_data[i][`XLEN-49:0];
|
||||
7: mem_req_data[i][`XLEN-1:56] = execute_if[0].data.rs2_data[i][`XLEN-57:0];
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// track SOP/EOP for out-of-order memory responses
|
||||
|
||||
wire [LSUQ_SIZEW-1:0] pkt_waddr, pkt_raddr;
|
||||
wire mem_rsp_sop_pkt, mem_rsp_eop_pkt;
|
||||
|
||||
if (PID_BITS != 0) begin
|
||||
reg [`LSUQ_SIZE-1:0][PID_BITS:0] pkt_ctr;
|
||||
reg [`LSUQ_SIZE-1:0] pkt_sop, pkt_eop;
|
||||
|
||||
wire mem_req_rd_fire = mem_req_fire && execute_if[0].data.wb;
|
||||
wire mem_req_rd_sop_fire = mem_req_rd_fire && execute_if[0].data.sop;
|
||||
wire mem_req_rd_eop_fire = mem_req_rd_fire && execute_if[0].data.eop;
|
||||
wire mem_rsp_eop_fire = mem_rsp_fire && mem_rsp_eop;
|
||||
wire full;
|
||||
|
||||
VX_allocator #(
|
||||
.SIZE (`LSUQ_SIZE)
|
||||
) pkt_allocator (
|
||||
VX_lsu_slice #(
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx))
|
||||
) lsu_slice(
|
||||
`SCOPE_IO_BIND (block_idx)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_en (mem_req_rd_eop_fire),
|
||||
.acquire_addr(pkt_waddr),
|
||||
.release_en (mem_rsp_eop_pkt),
|
||||
.release_addr(pkt_raddr),
|
||||
`UNUSED_PIN (empty),
|
||||
.full (full)
|
||||
.reset (slice_reset),
|
||||
.execute_if (per_block_execute_if[block_idx]),
|
||||
.commit_if (per_block_commit_if[block_idx]),
|
||||
.lsu_mem_if (lsu_mem_if[block_idx])
|
||||
);
|
||||
|
||||
wire rd_during_wr = mem_req_rd_fire && mem_rsp_eop_fire && (pkt_raddr == pkt_waddr);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pkt_ctr <= '0;
|
||||
pkt_sop <= '0;
|
||||
pkt_eop <= '0;
|
||||
end else begin
|
||||
if (mem_req_rd_sop_fire) begin
|
||||
pkt_sop[pkt_waddr] <= 1;
|
||||
end
|
||||
if (mem_req_rd_eop_fire) begin
|
||||
pkt_eop[pkt_waddr] <= 1;
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
pkt_sop[pkt_raddr] <= 0;
|
||||
end
|
||||
if (mem_rsp_eop_pkt) begin
|
||||
pkt_eop[pkt_raddr] <= 0;
|
||||
end
|
||||
if (~rd_during_wr) begin
|
||||
if (mem_req_rd_fire) begin
|
||||
pkt_ctr[pkt_waddr] <= pkt_ctr[pkt_waddr] + PID_BITS'(1);
|
||||
end
|
||||
if (mem_rsp_eop_fire) begin
|
||||
pkt_ctr[pkt_raddr] <= pkt_ctr[pkt_raddr] - PID_BITS'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_rsp_sop_pkt = pkt_sop[pkt_raddr];
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop_fire && pkt_eop[pkt_raddr] && (pkt_ctr[pkt_raddr] == 1);
|
||||
`RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("allocator full!"))
|
||||
`RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("Oops!"))
|
||||
`UNUSED_VAR (mem_rsp_sop)
|
||||
end else begin
|
||||
assign pkt_waddr = 0;
|
||||
assign mem_rsp_sop_pkt = mem_rsp_sop;
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop;
|
||||
`UNUSED_VAR (pkt_raddr)
|
||||
end
|
||||
|
||||
assign mem_req_tag = {
|
||||
execute_if[0].data.uuid, lsu_addr_type, execute_if[0].data.wid, execute_if[0].data.tmask, execute_if[0].data.PC, execute_if[0].data.rd, execute_if[0].data.op_type, req_align, execute_if[0].data.pid, pkt_waddr
|
||||
`ifdef LSU_DUP_ENABLE
|
||||
, lsu_is_dup
|
||||
`endif
|
||||
};
|
||||
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_req_valid;
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_req_rw;
|
||||
wire [DCACHE_NUM_REQS-1:0][(`XLEN/8)-1:0] cache_req_byteen;
|
||||
wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] cache_req_addr;
|
||||
wire [DCACHE_NUM_REQS-1:0][`XLEN-1:0] cache_req_data;
|
||||
wire [DCACHE_NUM_REQS-1:0][CACHE_TAG_WIDTH-1:0] cache_req_tag;
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_req_ready;
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_rsp_valid;
|
||||
wire [DCACHE_NUM_REQS-1:0][`XLEN-1:0] cache_rsp_data;
|
||||
wire [DCACHE_NUM_REQS-1:0][CACHE_TAG_WIDTH-1:0] cache_rsp_tag;
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_rsp_ready;
|
||||
|
||||
`RESET_RELAY (mem_scheduler_reset, reset);
|
||||
|
||||
VX_mem_scheduler #(
|
||||
.INSTANCE_ID ($sformatf("core%0d-lsu-memsched", CORE_ID)),
|
||||
.NUM_REQS (LSU_MEM_REQS),
|
||||
.NUM_BANKS (DCACHE_NUM_REQS),
|
||||
.ADDR_WIDTH (DCACHE_ADDR_WIDTH),
|
||||
.DATA_WIDTH (`XLEN),
|
||||
.QUEUE_SIZE (`LSUQ_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.MEM_TAG_ID (`UUID_WIDTH + (NUM_LANES * `CACHE_ADDR_TYPE_BITS)),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.RSP_PARTIAL (1),
|
||||
.MEM_OUT_REG (2)
|
||||
) mem_scheduler (
|
||||
.clk (clk),
|
||||
.reset (mem_scheduler_reset),
|
||||
|
||||
// Input request
|
||||
.req_valid (mem_req_valid),
|
||||
.req_rw (mem_req_rw),
|
||||
.req_mask (mem_req_mask),
|
||||
.req_byteen (mem_req_byteen),
|
||||
.req_addr (mem_req_addr),
|
||||
.req_data (mem_req_data),
|
||||
.req_tag (mem_req_tag),
|
||||
.req_empty (mem_req_empty),
|
||||
.req_ready (mem_req_ready),
|
||||
`UNUSED_PIN (write_notify),
|
||||
|
||||
// Output response
|
||||
.rsp_valid (mem_rsp_valid),
|
||||
.rsp_mask (mem_rsp_mask),
|
||||
.rsp_data (mem_rsp_data),
|
||||
.rsp_tag (mem_rsp_tag),
|
||||
.rsp_sop (mem_rsp_sop),
|
||||
.rsp_eop (mem_rsp_eop),
|
||||
.rsp_ready (mem_rsp_ready),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (cache_req_valid),
|
||||
.mem_req_rw (cache_req_rw),
|
||||
.mem_req_byteen (cache_req_byteen),
|
||||
.mem_req_addr (cache_req_addr),
|
||||
.mem_req_data (cache_req_data),
|
||||
.mem_req_tag (cache_req_tag),
|
||||
.mem_req_ready (cache_req_ready),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (cache_rsp_valid),
|
||||
.mem_rsp_data (cache_rsp_data),
|
||||
.mem_rsp_tag (cache_rsp_tag),
|
||||
.mem_rsp_ready (cache_rsp_ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
assign cache_bus_if[i].req_valid = cache_req_valid[i];
|
||||
assign cache_bus_if[i].req_data.rw = cache_req_rw[i];
|
||||
assign cache_bus_if[i].req_data.byteen = cache_req_byteen[i];
|
||||
assign cache_bus_if[i].req_data.addr = cache_req_addr[i];
|
||||
assign cache_bus_if[i].req_data.data = cache_req_data[i];
|
||||
assign cache_req_ready[i] = cache_bus_if[i].req_ready;
|
||||
|
||||
assign cache_rsp_valid[i] = cache_bus_if[i].rsp_valid;
|
||||
assign cache_rsp_data[i] = cache_bus_if[i].rsp_data.data;
|
||||
assign cache_bus_if[i].rsp_ready = cache_rsp_ready[i];
|
||||
end
|
||||
|
||||
// cache tag formatting: <uuid, tag, type>
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
wire [`UUID_WIDTH-1:0] cache_req_uuid, cache_rsp_uuid;
|
||||
wire [NUM_LANES-1:0][`CACHE_ADDR_TYPE_BITS-1:0] cache_req_type, cache_rsp_type;
|
||||
wire [`CLOG2(`LSUQ_SIZE)-1:0] cache_req_tag_x, cache_rsp_tag_x;
|
||||
if (DCACHE_NUM_BATCHES > 1) begin
|
||||
|
||||
wire [DCACHE_NUM_BATCHES-1:0][`CACHE_ADDR_TYPE_BITS-1:0] cache_req_type_b, cache_rsp_type_b;
|
||||
wire [`CACHE_ADDR_TYPE_BITS-1:0] cache_req_type_bi, cache_rsp_type_bi;
|
||||
wire [DCACHE_BATCH_SEL_BITS-1:0] cache_req_bid, cache_rsp_bid;
|
||||
|
||||
assign {cache_req_uuid, cache_req_type, cache_req_bid, cache_req_tag_x} = cache_req_tag[i];
|
||||
assign cache_req_type_bi = cache_req_type_b[cache_req_bid];
|
||||
assign cache_bus_if[i].req_data.tag = {cache_req_uuid, cache_req_bid, cache_req_tag_x, cache_req_type_bi};
|
||||
|
||||
assign {cache_rsp_uuid, cache_rsp_bid, cache_rsp_tag_x, cache_rsp_type_bi} = cache_bus_if[i].rsp_data.tag;
|
||||
assign cache_rsp_type_b = {DCACHE_NUM_BATCHES{cache_rsp_type_bi}};
|
||||
assign cache_rsp_tag[i] = {cache_rsp_uuid, cache_rsp_type, cache_rsp_bid, cache_rsp_tag_x};
|
||||
|
||||
for (genvar j = 0; j < DCACHE_NUM_BATCHES; ++j) begin
|
||||
localparam k = j * DCACHE_NUM_REQS + i;
|
||||
if (k < NUM_LANES) begin
|
||||
assign cache_req_type_b[j] = cache_req_type[k];
|
||||
assign cache_rsp_type[k] = cache_rsp_type_b[j];
|
||||
end else begin
|
||||
assign cache_req_type_b[j] = '0;
|
||||
`UNUSED_VAR (cache_rsp_type_b[j])
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
assign {cache_req_uuid, cache_req_type, cache_req_tag_x} = cache_req_tag[i];
|
||||
assign cache_bus_if[i].req_data.tag = {cache_req_uuid, cache_req_tag_x, cache_req_type[i]};
|
||||
|
||||
assign {cache_rsp_uuid, cache_rsp_tag_x, cache_rsp_type[i]} = cache_bus_if[i].rsp_data.tag;
|
||||
assign cache_rsp_tag[i] = {cache_rsp_uuid, cache_rsp_type, cache_rsp_tag_x};
|
||||
|
||||
for (genvar j = 0; j < DCACHE_NUM_REQS; ++j) begin
|
||||
if (i != j) begin
|
||||
`UNUSED_VAR (cache_req_type[j])
|
||||
assign cache_rsp_type[j] = '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [`UUID_WIDTH-1:0] rsp_uuid;
|
||||
wire [NUM_LANES-1:0][`CACHE_ADDR_TYPE_BITS-1:0] rsp_addr_type;
|
||||
wire [`NW_WIDTH-1:0] rsp_wid;
|
||||
wire [NUM_LANES-1:0] rsp_tmask_uq;
|
||||
wire [`XLEN-1:0] rsp_pc;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire [`INST_LSU_BITS-1:0] rsp_op_type;
|
||||
wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] rsp_align;
|
||||
wire [PID_WIDTH-1:0] rsp_pid;
|
||||
wire rsp_is_dup;
|
||||
|
||||
`ifndef LSU_DUP_ENABLE
|
||||
assign rsp_is_dup = 0;
|
||||
`endif
|
||||
|
||||
assign {
|
||||
rsp_uuid, rsp_addr_type, rsp_wid, rsp_tmask_uq, rsp_pc, rsp_rd, rsp_op_type, rsp_align, rsp_pid, pkt_raddr
|
||||
`ifdef LSU_DUP_ENABLE
|
||||
, rsp_is_dup
|
||||
`endif
|
||||
} = mem_rsp_tag;
|
||||
`UNUSED_VAR (rsp_addr_type)
|
||||
`UNUSED_VAR (rsp_op_type)
|
||||
|
||||
// load response formatting
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] rsp_data;
|
||||
wire [NUM_LANES-1:0] rsp_tmask;
|
||||
|
||||
`ifdef XLEN_64
|
||||
`ifdef EXT_F_ENABLE
|
||||
// apply nan-boxing to flw outputs
|
||||
wire rsp_is_float = rsp_rd[5];
|
||||
`else
|
||||
wire rsp_is_float = 0;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; i++) begin
|
||||
`ifdef XLEN_64
|
||||
wire [63:0] rsp_data64 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i];
|
||||
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? (rsp_align[0][2] ? mem_rsp_data[0][63:32] : mem_rsp_data[0][31:0]) :
|
||||
(rsp_align[i][2] ? mem_rsp_data[i][63:32] : mem_rsp_data[i][31:0]);
|
||||
`else
|
||||
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i];
|
||||
`endif
|
||||
wire [15:0] rsp_data16 = rsp_align[i][1] ? rsp_data32[31:16] : rsp_data32[15:0];
|
||||
wire [7:0] rsp_data8 = rsp_align[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
|
||||
|
||||
always @(*) begin
|
||||
case (`INST_LSU_FMT(rsp_op_type))
|
||||
`INST_FMT_B: rsp_data[i] = `XLEN'(signed'(rsp_data8));
|
||||
`INST_FMT_H: rsp_data[i] = `XLEN'(signed'(rsp_data16));
|
||||
`INST_FMT_BU: rsp_data[i] = `XLEN'(unsigned'(rsp_data8));
|
||||
`INST_FMT_HU: rsp_data[i] = `XLEN'(unsigned'(rsp_data16));
|
||||
`ifdef XLEN_64
|
||||
`INST_FMT_W: rsp_data[i] = rsp_is_float ? (`XLEN'(rsp_data32) | 64'hffffffff00000000) : `XLEN'(signed'(rsp_data32));
|
||||
`INST_FMT_WU: rsp_data[i] = `XLEN'(unsigned'(rsp_data32));
|
||||
`INST_FMT_D: rsp_data[i] = `XLEN'(signed'(rsp_data64));
|
||||
`else
|
||||
`INST_FMT_W: rsp_data[i] = `XLEN'(signed'(rsp_data32));
|
||||
`endif
|
||||
default: rsp_data[i] = 'x;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign rsp_tmask = rsp_is_dup ? rsp_tmask_uq : mem_rsp_mask;
|
||||
|
||||
// load commit
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
||||
.SIZE (2)
|
||||
) ld_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_rsp_valid),
|
||||
.ready_in (mem_rsp_ready),
|
||||
.data_in ({rsp_uuid, rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_data, rsp_pid, mem_rsp_sop_pkt, mem_rsp_eop_pkt}),
|
||||
.data_out ({commit_ld_if.data.uuid, commit_ld_if.data.wid, commit_ld_if.data.tmask, commit_ld_if.data.PC, commit_ld_if.data.rd, commit_ld_if.data.data, commit_ld_if.data.pid, commit_ld_if.data.sop, commit_ld_if.data.eop}),
|
||||
.valid_out (commit_ld_if.valid),
|
||||
.ready_out (commit_ld_if.ready)
|
||||
);
|
||||
|
||||
assign commit_ld_if.data.wb = 1'b1;
|
||||
|
||||
// store commit
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + PID_WIDTH + 1 + 1),
|
||||
.SIZE (2)
|
||||
) st_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_fire && mem_req_rw),
|
||||
.ready_in (st_rsp_ready),
|
||||
.data_in ({execute_if[0].data.uuid, execute_if[0].data.wid, execute_if[0].data.tmask, execute_if[0].data.PC, execute_if[0].data.pid, execute_if[0].data.sop, execute_if[0].data.eop}),
|
||||
.data_out ({commit_st_if.data.uuid, commit_st_if.data.wid, commit_st_if.data.tmask, commit_st_if.data.PC, commit_st_if.data.pid, commit_st_if.data.sop, commit_st_if.data.eop}),
|
||||
.valid_out (commit_st_if.valid),
|
||||
.ready_out (commit_st_if.ready)
|
||||
);
|
||||
assign commit_st_if.data.rd = '0;
|
||||
assign commit_st_if.data.wb = 1'b0;
|
||||
assign commit_st_if.data.data = commit_ld_if.data.data; // force arbiter passthru
|
||||
|
||||
// lsu commit
|
||||
|
||||
`RESET_RELAY (commit_reset, reset);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_arb_if[1]();
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.OUT_REG (3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.valid_in ({commit_st_if.valid, commit_ld_if.valid}),
|
||||
.ready_in ({commit_st_if.ready, commit_ld_if.ready}),
|
||||
.data_in ({commit_st_if.data, commit_ld_if.data}),
|
||||
.data_out (commit_arb_if[0].data),
|
||||
.valid_out (commit_arb_if[0].valid),
|
||||
.ready_out (commit_arb_if[0].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
VX_gather_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (3)
|
||||
.OUT_BUF (3)
|
||||
) gather_unit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.commit_in_if (commit_arb_if),
|
||||
.reset (reset),
|
||||
.commit_in_if (per_block_commit_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_SCOPE_LSU
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (3),
|
||||
.TRIGGERW (3),
|
||||
.PROBEW (`UUID_WIDTH+NUM_LANES*(`XLEN+4+`XLEN)+1+`UUID_WIDTH+NUM_LANES*`XLEN)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({reset, mem_req_fire, mem_rsp_fire}),
|
||||
.probes({execute_if[0].data.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
wire [31:0] full_addr_0 = full_addr[0];
|
||||
wire [31:0] mem_req_data_0 = mem_req_data[0];
|
||||
wire [31:0] rsp_data_0 = rsp_data[0];
|
||||
ila_lsu ila_lsu_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({mem_req_data_0, execute_if[0].data.uuid, execute_if[0].data.wid, execute_if[0].data.PC, mem_req_mask, full_addr_0, mem_req_byteen, mem_req_rw, mem_req_ready, mem_req_valid}),
|
||||
.probe1 ({rsp_data_0, rsp_uuid, mem_rsp_eop, rsp_pc, rsp_rd, rsp_tmask, rsp_wid, mem_rsp_ready, mem_rsp_valid}),
|
||||
.probe2 ({cache_bus_if.req_data.data, cache_bus_if.req_data.tag, cache_bus_if.req_data.byteen, cache_bus_if.req_data.addr, cache_bus_if.req_data.rw, cache_bus_if.req_ready, cache_bus_if.req_valid}),
|
||||
.probe3 ({cache_bus_if.rsp_data.data, cache_bus_if.rsp_data.tag, cache_bus_if.rsp_ready, cache_bus_if.rsp_valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_DCACHE
|
||||
always @(posedge clk) begin
|
||||
if (execute_if[0].valid && fence_wait) begin
|
||||
`TRACE(1, ("%d: *** D$%0d fence wait\n", $time, CORE_ID));
|
||||
end
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%d: D$%0d Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if[0].data.wid, execute_if[0].data.PC, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, full_addr, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h, byteen=0x%0h, type=", mem_req_tag, mem_req_byteen));
|
||||
`TRACE_ARRAY1D(1, lsu_addr_type, NUM_LANES);
|
||||
`TRACE(1, (", data="));
|
||||
`TRACE_ARRAY1D(1, mem_req_data, NUM_LANES);
|
||||
`TRACE(1, (", is_dup=%b (#%0d)\n", lsu_is_dup, execute_if[0].data.uuid));
|
||||
end else begin
|
||||
`TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if[0].data.wid, execute_if[0].data.PC, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, full_addr, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h, byteen=0x%0h, type=", mem_req_tag, mem_req_byteen));
|
||||
`TRACE_ARRAY1D(1, lsu_addr_type, NUM_LANES);
|
||||
`TRACE(1, (", rd=%0d, is_dup=%b (#%0d)\n", execute_if[0].data.rd, lsu_is_dup, execute_if[0].data.uuid));
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, tag=0x%0h, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, CORE_ID, rsp_wid, rsp_pc, mem_rsp_mask, mem_rsp_tag, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE_ARRAY1D(1, mem_rsp_data, NUM_LANES);
|
||||
`TRACE(1, (", is_dup=%b (#%0d)\n", rsp_is_dup, rsp_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,246 +13,242 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
// reset all GPRs in debug mode
|
||||
`ifdef SIMULATION
|
||||
`ifndef NDEBUG
|
||||
`define GPR_RESET
|
||||
`endif
|
||||
`endif
|
||||
|
||||
module VX_operands import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter CACHE_ENABLE = 0
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_BANKS = 4,
|
||||
parameter OUT_BUF = 4 // using 2-cycle EB for area reduction
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_ibuffer_if.slave scoreboard_if [`ISSUE_WIDTH],
|
||||
VX_operands_if.master operands_if [`ISSUE_WIDTH]
|
||||
`ifdef PERF_ENABLE
|
||||
output wire [`PERF_CTR_BITS-1:0] perf_stalls,
|
||||
`endif
|
||||
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_scoreboard_if.slave scoreboard_if,
|
||||
VX_operands_if.master operands_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + `NR_BITS;
|
||||
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * ISSUE_RATIO);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam NUM_SRC_REGS = 3;
|
||||
localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_REGS);
|
||||
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
|
||||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
|
||||
localparam META_DATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS + `UUID_WIDTH;
|
||||
localparam REGS_DATAW = `XLEN * `NUM_THREADS;
|
||||
localparam DATAW = META_DATAW + NUM_SRC_REGS * REGS_DATAW;
|
||||
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
|
||||
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
|
||||
localparam XLEN_SIZE = `XLEN / 8;
|
||||
localparam BYTEENW = `NUM_THREADS * XLEN_SIZE;
|
||||
|
||||
localparam STATE_IDLE = 2'd0;
|
||||
localparam STATE_FETCH1 = 2'd1;
|
||||
localparam STATE_FETCH2 = 2'd2;
|
||||
localparam STATE_FETCH3 = 2'd3;
|
||||
localparam STATE_BITS = 2;
|
||||
`UNUSED_VAR (writeback_if.data.sop)
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data;
|
||||
reg [`NR_BITS-1:0] gpr_rd_rid, gpr_rd_rid_n;
|
||||
reg [ISSUE_WIS_W-1:0] gpr_rd_wis, gpr_rd_wis_n;
|
||||
wire [NUM_SRC_REGS-1:0] src_valid;
|
||||
wire [NUM_SRC_REGS-1:0] req_in_valid, req_in_ready;
|
||||
wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data;
|
||||
wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
|
||||
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data [ISSUE_RATIO-1:0];
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data_n [ISSUE_RATIO-1:0];
|
||||
reg [`NR_BITS-1:0] cache_reg [ISSUE_RATIO-1:0];
|
||||
reg [`NR_BITS-1:0] cache_reg_n [ISSUE_RATIO-1:0];
|
||||
reg [`NUM_THREADS-1:0] cache_tmask [ISSUE_RATIO-1:0];
|
||||
reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0];
|
||||
reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n;
|
||||
wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready;
|
||||
wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2;
|
||||
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1;
|
||||
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1, gpr_rd_data_st2;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2;
|
||||
|
||||
reg valid_out_r;
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n;
|
||||
wire pipe_valid_st1, pipe_ready_st1;
|
||||
wire pipe_valid_st2, pipe_ready_st2;
|
||||
wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2;
|
||||
|
||||
reg [STATE_BITS-1:0] state, state_n;
|
||||
reg [`NR_BITS-1:0] rs2, rs2_n;
|
||||
reg [`NR_BITS-1:0] rs3, rs3_n;
|
||||
reg rs2_ready, rs2_ready_n;
|
||||
reg rs3_ready, rs3_ready_n;
|
||||
reg data_ready, data_ready_n;
|
||||
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n;
|
||||
wire [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2;
|
||||
|
||||
wire ready_out = operands_if[i].ready;
|
||||
|
||||
wire is_rs1_zero = (scoreboard_if[i].data.rs1 == 0);
|
||||
wire is_rs2_zero = (scoreboard_if[i].data.rs2 == 0);
|
||||
wire is_rs3_zero = (scoreboard_if[i].data.rs3 == 0);
|
||||
reg [NUM_SRC_REGS-1:0] data_fetched_n;
|
||||
wire [NUM_SRC_REGS-1:0] data_fetched_st1;
|
||||
|
||||
always @(*) begin
|
||||
state_n = state;
|
||||
rs2_n = rs2;
|
||||
rs3_n = rs3;
|
||||
rs2_ready_n = rs2_ready;
|
||||
rs3_ready_n = rs3_ready;
|
||||
rs1_data_n = rs1_data;
|
||||
rs2_data_n = rs2_data;
|
||||
rs3_data_n = rs3_data;
|
||||
cache_data_n = cache_data;
|
||||
cache_reg_n = cache_reg;
|
||||
cache_tmask_n= cache_tmask;
|
||||
cache_eop_n = cache_eop;
|
||||
gpr_rd_rid_n = gpr_rd_rid;
|
||||
gpr_rd_wis_n = gpr_rd_wis;
|
||||
data_ready_n = data_ready;
|
||||
reg has_collision_n;
|
||||
wire has_collision_st1;
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (valid_out_r && ready_out) begin
|
||||
data_ready_n = 0;
|
||||
end
|
||||
if (scoreboard_if[i].valid && data_ready_n == 0) begin
|
||||
data_ready_n = 1;
|
||||
if (is_rs3_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if[i].data.rs3 == cache_reg[scoreboard_if[i].data.wis] &&
|
||||
(scoreboard_if[i].data.tmask & cache_tmask[scoreboard_if[i].data.wis]) == scoreboard_if[i].data.tmask)) begin
|
||||
rs3_data_n = (is_rs3_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if[i].data.wis];
|
||||
rs3_ready_n = 1;
|
||||
end else begin
|
||||
rs3_ready_n = 0;
|
||||
gpr_rd_rid_n = scoreboard_if[i].data.rs3;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH3;
|
||||
end
|
||||
if (is_rs2_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if[i].data.rs2 == cache_reg[scoreboard_if[i].data.wis] &&
|
||||
(scoreboard_if[i].data.tmask & cache_tmask[scoreboard_if[i].data.wis]) == scoreboard_if[i].data.tmask)) begin
|
||||
rs2_data_n = (is_rs2_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if[i].data.wis];
|
||||
rs2_ready_n = 1;
|
||||
end else begin
|
||||
rs2_ready_n = 0;
|
||||
gpr_rd_rid_n = scoreboard_if[i].data.rs2;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH2;
|
||||
end
|
||||
if (is_rs1_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if[i].data.rs1 == cache_reg[scoreboard_if[i].data.wis] &&
|
||||
(scoreboard_if[i].data.tmask & cache_tmask[scoreboard_if[i].data.wis]) == scoreboard_if[i].data.tmask)) begin
|
||||
rs1_data_n = (is_rs1_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if[i].data.wis];
|
||||
end else begin
|
||||
gpr_rd_rid_n = scoreboard_if[i].data.rs1;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH1;
|
||||
end
|
||||
end
|
||||
gpr_rd_wis_n = scoreboard_if[i].data.wis;
|
||||
rs2_n = scoreboard_if[i].data.rs2;
|
||||
rs3_n = scoreboard_if[i].data.rs3;
|
||||
end
|
||||
STATE_FETCH1: begin
|
||||
rs1_data_n = gpr_rd_data;
|
||||
if (~rs2_ready) begin
|
||||
gpr_rd_rid_n = rs2;
|
||||
state_n = STATE_FETCH2;
|
||||
end else if (~rs3_ready) begin
|
||||
gpr_rd_rid_n = rs3;
|
||||
state_n = STATE_FETCH3;
|
||||
end else begin
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FETCH2: begin
|
||||
rs2_data_n = gpr_rd_data;
|
||||
if (~rs3_ready) begin
|
||||
gpr_rd_rid_n = rs3;
|
||||
state_n = STATE_FETCH3;
|
||||
end else begin
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FETCH3: begin
|
||||
rs3_data_n = gpr_rd_data;
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
|
||||
if (CACHE_ENABLE != 0 && writeback_if[i].valid) begin
|
||||
if ((cache_reg[writeback_if[i].data.wis] == writeback_if[i].data.rd)
|
||||
|| (cache_eop[writeback_if[i].data.wis] && writeback_if[i].data.sop)) begin
|
||||
for (integer j = 0; j < `NUM_THREADS; ++j) begin
|
||||
if (writeback_if[i].data.tmask[j]) begin
|
||||
cache_data_n[writeback_if[i].data.wis][j] = writeback_if[i].data.data[j];
|
||||
end
|
||||
end
|
||||
cache_reg_n[writeback_if[i].data.wis] = writeback_if[i].data.rd;
|
||||
cache_eop_n[writeback_if[i].data.wis] = writeback_if[i].data.eop;
|
||||
cache_tmask_n[writeback_if[i].data.wis] = writeback_if[i].data.sop ? writeback_if[i].data.tmask :
|
||||
(cache_tmask_n[writeback_if[i].data.wis] | writeback_if[i].data.tmask);
|
||||
end
|
||||
end
|
||||
end
|
||||
wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3,
|
||||
scoreboard_if.data.rs2,
|
||||
scoreboard_if.data.rs1};
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
cache_eop <= {ISSUE_RATIO{1'b1}};
|
||||
data_ready <= 0;
|
||||
valid_out_r <= 0;
|
||||
end else begin
|
||||
state <= state_n;
|
||||
cache_eop <= cache_eop_n;
|
||||
data_ready <= data_ready_n;
|
||||
if (~valid_out_r) begin
|
||||
valid_out_r <= scoreboard_if[i].valid && data_ready;
|
||||
end else if (ready_out) begin
|
||||
valid_out_r <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
if (~valid_out_r) begin
|
||||
data_out_r <= {scoreboard_if[i].data.uuid,
|
||||
scoreboard_if[i].data.wis,
|
||||
scoreboard_if[i].data.tmask,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.wb,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.op_type,
|
||||
scoreboard_if[i].data.op_mod,
|
||||
scoreboard_if[i].data.use_PC,
|
||||
scoreboard_if[i].data.use_imm,
|
||||
scoreboard_if[i].data.imm,
|
||||
scoreboard_if[i].data.rd};
|
||||
end
|
||||
|
||||
gpr_rd_rid <= gpr_rd_rid_n;
|
||||
gpr_rd_wis <= gpr_rd_wis_n;
|
||||
rs2_ready <= rs2_ready_n;
|
||||
rs3_ready <= rs3_ready_n;
|
||||
rs2 <= rs2_n;
|
||||
rs3 <= rs3_n;
|
||||
rs1_data <= rs1_data_n;
|
||||
rs2_data <= rs2_data_n;
|
||||
rs3_data <= rs3_data_n;
|
||||
cache_data <= cache_data_n;
|
||||
cache_reg <= cache_reg_n;
|
||||
cache_tmask <= cache_tmask_n;
|
||||
end
|
||||
|
||||
assign operands_if[i].valid = valid_out_r;
|
||||
assign {operands_if[i].data.uuid,
|
||||
operands_if[i].data.wis,
|
||||
operands_if[i].data.tmask,
|
||||
operands_if[i].data.PC,
|
||||
operands_if[i].data.wb,
|
||||
operands_if[i].data.ex_type,
|
||||
operands_if[i].data.op_type,
|
||||
operands_if[i].data.op_mod,
|
||||
operands_if[i].data.use_PC,
|
||||
operands_if[i].data.use_imm,
|
||||
operands_if[i].data.imm,
|
||||
operands_if[i].data.rd} = data_out_r;
|
||||
assign operands_if[i].data.rs1_data = rs1_data;
|
||||
assign operands_if[i].data.rs2_data = rs2_data;
|
||||
assign operands_if[i].data.rs3_data = rs3_data;
|
||||
|
||||
assign scoreboard_if[i].ready = ~valid_out_r && data_ready;
|
||||
|
||||
// GPR banks
|
||||
|
||||
reg [RAM_ADDRW-1:0] gpr_rd_addr;
|
||||
wire [RAM_ADDRW-1:0] gpr_wr_addr;
|
||||
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
|
||||
if (ISSUE_WIS != 0) begin
|
||||
assign gpr_wr_addr = {writeback_if[i].data.wis, writeback_if[i].data.rd};
|
||||
always @(posedge clk) begin
|
||||
gpr_rd_addr <= {gpr_rd_wis_n, gpr_rd_rid_n};
|
||||
end
|
||||
assign req_in_data[i] = {src_regs[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis};
|
||||
end else begin
|
||||
assign gpr_wr_addr = writeback_if[i].data.rd;
|
||||
always @(posedge clk) begin
|
||||
gpr_rd_addr <= gpr_rd_rid_n;
|
||||
assign req_in_data[i] = src_regs[i][`NR_BITS-1:BANK_SEL_BITS];
|
||||
end
|
||||
if (NUM_BANKS != 1) begin
|
||||
assign req_bank_idx[i] = src_regs[i][BANK_SEL_BITS-1:0];
|
||||
end else begin
|
||||
assign req_bank_idx[i] = '0;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
|
||||
assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched_st1[i];
|
||||
end
|
||||
|
||||
assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid;
|
||||
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_SRC_REGS),
|
||||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (PER_BANK_ADDRW),
|
||||
.ARBITER ("P"), // use priority arbiter
|
||||
.PERF_CTR_BITS(`PERF_CTR_BITS),
|
||||
.OUT_BUF (0) // no output buffering
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`UNUSED_PIN(collisions),
|
||||
.valid_in (req_in_valid),
|
||||
.data_in (req_in_data),
|
||||
.sel_in (req_bank_idx),
|
||||
.ready_in (req_in_ready),
|
||||
.valid_out (gpr_rd_valid),
|
||||
.data_out (gpr_rd_addr),
|
||||
.sel_out (gpr_rd_req_idx),
|
||||
.ready_out (gpr_rd_ready)
|
||||
);
|
||||
|
||||
wire pipe_in_ready = pipe_ready_st1 || ~pipe_valid_st1;
|
||||
|
||||
assign gpr_rd_ready = {NUM_BANKS{pipe_in_ready}};
|
||||
|
||||
assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n;
|
||||
|
||||
wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1;
|
||||
wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2;
|
||||
|
||||
always @(*) begin
|
||||
has_collision_n = 0;
|
||||
for (integer i = 0; i < NUM_SRC_REGS; ++i) begin
|
||||
for (integer j = 1; j < (NUM_SRC_REGS-i); ++j) begin
|
||||
has_collision_n |= src_valid[i]
|
||||
&& src_valid[j+i]
|
||||
&& (req_bank_idx[i] == req_bank_idx[j+i]);
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
data_fetched_n = data_fetched_st1;
|
||||
if (scoreboard_if.ready) begin
|
||||
data_fetched_n = '0;
|
||||
end else begin
|
||||
data_fetched_n = data_fetched_st1 | req_in_ready;
|
||||
end
|
||||
end
|
||||
|
||||
assign pipe_data = {
|
||||
scoreboard_if.data.wis,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.rd,
|
||||
scoreboard_if.data.uuid
|
||||
};
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_SRC_REGS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)),
|
||||
.RESETW (1 + NUM_SRC_REGS)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (pipe_in_ready),
|
||||
.data_in ({scoreboard_if.valid, data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}),
|
||||
.data_out ({pipe_valid_st1, data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1})
|
||||
);
|
||||
|
||||
assign pipe_ready_st1 = pipe_ready_st2 || ~pipe_valid_st2;
|
||||
|
||||
assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_n;
|
||||
|
||||
wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1;
|
||||
|
||||
`RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_SRC_REGS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH),
|
||||
.RESETW (1 + NUM_SRC_REGS * REGS_DATAW)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (pipe2_reset),
|
||||
.enable (pipe_ready_st1),
|
||||
.data_in ({pipe_valid2_st1, src_data_st1, gpr_rd_valid_st1, gpr_rd_data_st1, pipe_data_st1, gpr_rd_req_idx_st1}),
|
||||
.data_out ({pipe_valid_st2, src_data_st2, gpr_rd_valid_st2, gpr_rd_data_st2, pipe_data_st2, gpr_rd_req_idx_st2})
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
src_data_n = src_data_st2;
|
||||
for (integer b = 0; b < NUM_BANKS; ++b) begin
|
||||
if (gpr_rd_valid_st2[b]) begin
|
||||
src_data_n[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
|
||||
.LUTRAM (1)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (pipe_valid_st2),
|
||||
.ready_in (pipe_ready_st2),
|
||||
.data_in ({
|
||||
pipe_data_st2,
|
||||
src_data_n[0],
|
||||
src_data_n[1],
|
||||
src_data_n[2]
|
||||
}),
|
||||
.data_out ({
|
||||
operands_if.data.wis,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.PC,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.ex_type,
|
||||
operands_if.data.op_type,
|
||||
operands_if.data.op_args,
|
||||
operands_if.data.rd,
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.rs1_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs3_data
|
||||
}),
|
||||
.valid_out (operands_if.valid),
|
||||
.ready_out (operands_if.ready)
|
||||
);
|
||||
|
||||
wire [PER_BANK_ADDRW-1:0] gpr_wr_addr;
|
||||
if (ISSUE_WIS != 0) begin
|
||||
assign gpr_wr_addr = {writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS], writeback_if.data.wis};
|
||||
end else begin
|
||||
assign gpr_wr_addr = writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS];
|
||||
end
|
||||
|
||||
wire [BANK_SEL_WIDTH-1:0] gpr_wr_bank_idx;
|
||||
if (NUM_BANKS != 1) begin
|
||||
assign gpr_wr_bank_idx = writeback_if.data.rd[BANK_SEL_BITS-1:0];
|
||||
end else begin
|
||||
assign gpr_wr_bank_idx = '0;
|
||||
end
|
||||
|
||||
`ifdef GPR_RESET
|
||||
reg wr_enabled = 0;
|
||||
always @(posedge clk) begin
|
||||
|
@ -260,32 +256,56 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
wr_enabled <= 1;
|
||||
end
|
||||
end
|
||||
`else
|
||||
wire wr_enabled = 1;
|
||||
`endif
|
||||
|
||||
for (genvar j = 0; j < `NUM_THREADS; ++j) begin
|
||||
VX_dp_ram #(
|
||||
.DATAW (`XLEN),
|
||||
.SIZE (`NUM_REGS * ISSUE_RATIO),
|
||||
`ifdef GPR_RESET
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0),
|
||||
`endif
|
||||
.NO_RWCHECK (1)
|
||||
) gpr_ram (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
`UNUSED_PIN (wren),
|
||||
`ifdef GPR_RESET
|
||||
.write (wr_enabled && writeback_if[i].valid && writeback_if[i].data.tmask[j]),
|
||||
`else
|
||||
.write (writeback_if[i].valid && writeback_if[i].data.tmask[j]),
|
||||
`endif
|
||||
.waddr (gpr_wr_addr),
|
||||
.wdata (writeback_if[i].data.data[j]),
|
||||
.raddr (gpr_rd_addr),
|
||||
.rdata (gpr_rd_data[j])
|
||||
);
|
||||
for (genvar b = 0; b < NUM_BANKS; ++b) begin
|
||||
wire gpr_wr_enabled;
|
||||
if (BANK_SEL_BITS != 0) begin
|
||||
assign gpr_wr_enabled = wr_enabled
|
||||
&& writeback_if.valid
|
||||
&& (gpr_wr_bank_idx == BANK_SEL_BITS'(b));
|
||||
end else begin
|
||||
assign gpr_wr_enabled = wr_enabled && writeback_if.valid;
|
||||
end
|
||||
|
||||
wire [BYTEENW-1:0] wren;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}};
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (REGS_DATAW),
|
||||
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
|
||||
.WRENW (BYTEENW),
|
||||
`ifdef GPR_RESET
|
||||
.RESET_RAM (1),
|
||||
`endif
|
||||
.NO_RWCHECK (1)
|
||||
) gpr_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (pipe_fire_st1),
|
||||
.wren (wren),
|
||||
.write (gpr_wr_enabled),
|
||||
.waddr (gpr_wr_addr),
|
||||
.wdata (writeback_if.data.data),
|
||||
.raddr (gpr_rd_addr_st1[b]),
|
||||
.rdata (gpr_rd_data_st1[b])
|
||||
);
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] collisions_r;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
collisions_r <= '0;
|
||||
end else begin
|
||||
collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_in_ready && has_collision_n);
|
||||
end
|
||||
end
|
||||
assign perf_stalls = collisions_r;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,79 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_pending_instr #(
|
||||
parameter CTR_WIDTH = 12,
|
||||
parameter ALM_EMPTY = 1,
|
||||
parameter DECR_COUNT = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire incr,
|
||||
input wire [`NW_WIDTH-1:0] incr_wid,
|
||||
input wire [DECR_COUNT-1:0] decr,
|
||||
input wire [DECR_COUNT-1:0][`NW_WIDTH-1:0] decr_wid,
|
||||
input wire [`NW_WIDTH-1:0] alm_empty_wid,
|
||||
output wire empty,
|
||||
output wire alm_empty
|
||||
);
|
||||
localparam COUNTW = `CLOG2(DECR_COUNT+1);
|
||||
|
||||
reg [`NUM_WARPS-1:0][CTR_WIDTH-1:0] pending_instrs;
|
||||
reg [`NUM_WARPS-1:0][COUNTW-1:0] decr_cnt;
|
||||
reg [`NUM_WARPS-1:0][DECR_COUNT-1:0] decr_mask;
|
||||
reg [`NUM_WARPS-1:0] incr_cnt, incr_cnt_n;
|
||||
reg [`NUM_WARPS-1:0] alm_empty_r, empty_r;
|
||||
|
||||
always @(*) begin
|
||||
incr_cnt_n = 0;
|
||||
decr_mask = 0;
|
||||
if (incr) begin
|
||||
incr_cnt_n[incr_wid] = 1;
|
||||
end
|
||||
for (integer i = 0; i < DECR_COUNT; ++i) begin
|
||||
if (decr[i]) begin
|
||||
decr_mask[decr_wid[i]][i] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
wire [COUNTW-1:0] decr_cnt_n;
|
||||
`POP_COUNT(decr_cnt_n, decr_mask[i]);
|
||||
|
||||
wire [CTR_WIDTH-1:0] pending_instrs_n = pending_instrs[i] + CTR_WIDTH'(incr_cnt[i]) - CTR_WIDTH'(decr_cnt[i]);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
incr_cnt[i] <= '0;
|
||||
decr_cnt[i] <= '0;
|
||||
pending_instrs[i] <= '0;
|
||||
alm_empty_r[i] <= 0;
|
||||
empty_r[i] <= 1;
|
||||
end else begin
|
||||
incr_cnt[i] <= incr_cnt_n[i];
|
||||
decr_cnt[i] <= decr_cnt_n;
|
||||
pending_instrs[i] <= pending_instrs_n;
|
||||
alm_empty_r[i] <= (pending_instrs_n == ALM_EMPTY);
|
||||
empty_r[i] <= (pending_instrs_n == 0);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign alm_empty = alm_empty_r[alm_empty_wid];
|
||||
assign empty = (& empty_r);
|
||||
|
||||
endmodule
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,20 +14,21 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_schedule import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_pipeline_perf_if.schedule perf_schedule_if,
|
||||
output sched_perf_t sched_perf,
|
||||
`endif
|
||||
|
||||
// configuration
|
||||
input base_dcrs_t base_dcrs,
|
||||
|
||||
// inputsdecode_if
|
||||
VX_warp_ctl_if.slave warp_ctl_if,
|
||||
VX_warp_ctl_if.slave warp_ctl_if,
|
||||
VX_branch_ctl_if.slave branch_ctl_if [`NUM_ALU_BLOCKS],
|
||||
VX_decode_sched_if.slave decode_sched_if,
|
||||
VX_commit_sched_if.slave commit_sched_if,
|
||||
|
@ -42,17 +43,18 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
// status
|
||||
output wire busy
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // updated when a warp is activated or disabled
|
||||
reg [`NUM_WARPS-1:0] stalled_warps, stalled_warps_n; // set when branch/gpgpu instructions are issued
|
||||
|
||||
|
||||
reg [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks, thread_masks_n;
|
||||
reg [`NUM_WARPS-1:0][`XLEN-1:0] warp_pcs, warp_pcs_n;
|
||||
reg [`NUM_WARPS-1:0][`PC_BITS-1:0] warp_pcs, warp_pcs_n;
|
||||
|
||||
wire [`NW_WIDTH-1:0] schedule_wid;
|
||||
wire [`NUM_THREADS-1:0] schedule_tmask;
|
||||
wire [`XLEN-1:0] schedule_pc;
|
||||
wire [`PC_BITS-1:0] schedule_pc;
|
||||
wire schedule_valid;
|
||||
wire schedule_ready;
|
||||
|
||||
|
@ -60,9 +62,9 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
wire join_valid;
|
||||
wire join_is_dvg;
|
||||
wire join_is_else;
|
||||
wire [`NW_WIDTH-1:0] join_wid;
|
||||
wire [`NW_WIDTH-1:0] join_wid;
|
||||
wire [`NUM_THREADS-1:0] join_tmask;
|
||||
wire [`XLEN-1:0] join_pc;
|
||||
wire [`PC_BITS-1:0] join_pc;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] cycles;
|
||||
|
||||
|
@ -72,10 +74,10 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
wire schedule_if_fire = schedule_if.valid && schedule_if.ready;
|
||||
|
||||
// branch
|
||||
wire [`NUM_ALU_BLOCKS-1:0] branch_valid;
|
||||
wire [`NUM_ALU_BLOCKS-1:0][`NW_WIDTH-1:0] branch_wid;
|
||||
wire [`NUM_ALU_BLOCKS-1:0] branch_valid;
|
||||
wire [`NUM_ALU_BLOCKS-1:0][`NW_WIDTH-1:0] branch_wid;
|
||||
wire [`NUM_ALU_BLOCKS-1:0] branch_taken;
|
||||
wire [`NUM_ALU_BLOCKS-1:0][`XLEN-1:0] branch_dest;
|
||||
wire [`NUM_ALU_BLOCKS-1:0][`PC_BITS-1:0] branch_dest;
|
||||
for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin
|
||||
assign branch_valid[i] = branch_ctl_if[i].valid;
|
||||
assign branch_wid[i] = branch_ctl_if[i].wid;
|
||||
|
@ -85,47 +87,51 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
|
||||
// barriers
|
||||
reg [`NUM_BARRIERS-1:0][`NUM_WARPS-1:0] barrier_masks, barrier_masks_n;
|
||||
reg [`NUM_BARRIERS-1:0][`NW_WIDTH-1:0] barrier_ctrs, barrier_ctrs_n;
|
||||
reg [`NUM_WARPS-1:0] barrier_stalls, barrier_stalls_n;
|
||||
wire [`CLOG2(`NUM_WARPS+1)-1:0] active_barrier_count;
|
||||
wire [`NUM_WARPS-1:0] curr_barrier_mask;
|
||||
reg [`NUM_WARPS-1:0] curr_barrier_mask_p1;
|
||||
`ifdef GBAR_ENABLE
|
||||
reg [`NUM_WARPS-1:0] curr_barrier_mask_n;
|
||||
reg gbar_req_valid;
|
||||
reg [`NB_WIDTH-1:0] gbar_req_id;
|
||||
reg [`NC_WIDTH-1:0] gbar_req_size_m1;
|
||||
`endif
|
||||
|
||||
assign curr_barrier_mask = barrier_masks[warp_ctl_if.barrier.id];
|
||||
`POP_COUNT(active_barrier_count, curr_barrier_mask);
|
||||
`UNUSED_VAR (active_barrier_count)
|
||||
// wspawn
|
||||
wspawn_t wspawn;
|
||||
reg [`NW_WIDTH-1:0] wspawn_wid;
|
||||
reg is_single_warp;
|
||||
|
||||
wire [`CLOG2(`NUM_WARPS+1)-1:0] active_warps_cnt;
|
||||
`POP_COUNT(active_warps_cnt, active_warps);
|
||||
|
||||
always @(*) begin
|
||||
active_warps_n = active_warps;
|
||||
stalled_warps_n = stalled_warps;
|
||||
thread_masks_n = thread_masks;
|
||||
barrier_masks_n = barrier_masks;
|
||||
barrier_ctrs_n = barrier_ctrs;
|
||||
barrier_stalls_n= barrier_stalls;
|
||||
warp_pcs_n = warp_pcs;
|
||||
|
||||
// wspawn handling
|
||||
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
|
||||
active_warps_n |= warp_ctl_if.wspawn.wmask;
|
||||
if (wspawn.valid && is_single_warp) begin
|
||||
active_warps_n |= wspawn.wmask;
|
||||
for (integer i = 0; i < `NUM_WARPS; ++i) begin
|
||||
if (warp_ctl_if.wspawn.wmask[i]) begin
|
||||
if (wspawn.wmask[i]) begin
|
||||
thread_masks_n[i][0] = 1;
|
||||
warp_pcs_n[i] = warp_ctl_if.wspawn.pc;
|
||||
warp_pcs_n[i] = wspawn.pc;
|
||||
end
|
||||
end
|
||||
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
|
||||
stalled_warps_n[wspawn_wid] = 0; // unlock warp
|
||||
end
|
||||
|
||||
|
||||
// TMC handling
|
||||
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
active_warps_n[warp_ctl_if.wid] = (warp_ctl_if.tmc.tmask != 0);
|
||||
thread_masks_n[warp_ctl_if.wid] = warp_ctl_if.tmc.tmask;
|
||||
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
|
||||
end
|
||||
|
||||
|
||||
// split handling
|
||||
if (warp_ctl_if.valid && warp_ctl_if.split.valid) begin
|
||||
if (warp_ctl_if.split.is_dvg) begin
|
||||
|
@ -145,26 +151,30 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
stalled_warps_n[join_wid] = 0; // unlock warp
|
||||
end
|
||||
|
||||
// barrier handling
|
||||
`ifdef GBAR_ENABLE
|
||||
curr_barrier_mask_n = curr_barrier_mask;
|
||||
curr_barrier_mask_n[warp_ctl_if.wid] = 1;
|
||||
`endif
|
||||
// barrier handling
|
||||
curr_barrier_mask_p1 = barrier_masks[warp_ctl_if.barrier.id];
|
||||
curr_barrier_mask_p1[warp_ctl_if.wid] = 1;
|
||||
if (warp_ctl_if.valid && warp_ctl_if.barrier.valid) begin
|
||||
if (~warp_ctl_if.barrier.is_global
|
||||
&& (active_barrier_count[`NW_WIDTH-1:0] == warp_ctl_if.barrier.size_m1[`NW_WIDTH-1:0])) begin
|
||||
barrier_masks_n[warp_ctl_if.barrier.id] = '0;
|
||||
barrier_stalls_n &= ~barrier_masks[warp_ctl_if.barrier.id];
|
||||
if (~warp_ctl_if.barrier.is_noop) begin
|
||||
if (~warp_ctl_if.barrier.is_global
|
||||
&& (barrier_ctrs[warp_ctl_if.barrier.id] == `NW_WIDTH'(warp_ctl_if.barrier.size_m1))) begin
|
||||
barrier_ctrs_n[warp_ctl_if.barrier.id] = '0; // reset barrier counter
|
||||
barrier_masks_n[warp_ctl_if.barrier.id] = '0; // reset barrier mask
|
||||
stalled_warps_n &= ~barrier_masks[warp_ctl_if.barrier.id]; // unlock warps
|
||||
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
|
||||
end else begin
|
||||
barrier_ctrs_n[warp_ctl_if.barrier.id] = barrier_ctrs[warp_ctl_if.barrier.id] + `NW_WIDTH'(1);
|
||||
barrier_masks_n[warp_ctl_if.barrier.id] = curr_barrier_mask_p1;
|
||||
end
|
||||
end else begin
|
||||
barrier_masks_n[warp_ctl_if.barrier.id][warp_ctl_if.wid] = 1;
|
||||
barrier_stalls_n[warp_ctl_if.wid] = 1;
|
||||
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
|
||||
end
|
||||
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
|
||||
end
|
||||
`ifdef GBAR_ENABLE
|
||||
if (gbar_bus_if.rsp_valid && (gbar_req_id == gbar_bus_if.rsp_id)) begin
|
||||
barrier_masks_n[gbar_bus_if.rsp_id] = '0;
|
||||
barrier_stalls_n = '0; // unlock all warps
|
||||
barrier_ctrs_n[warp_ctl_if.barrier.id] = '0; // reset barrier counter
|
||||
barrier_masks_n[gbar_bus_if.rsp_id] = '0; // reset barrier mask
|
||||
stalled_warps_n = '0; // unlock all warps
|
||||
end
|
||||
`endif
|
||||
|
||||
|
@ -195,7 +205,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
|
||||
// advance PC
|
||||
if (schedule_if_fire) begin
|
||||
warp_pcs_n[schedule_if.data.wid] = schedule_if.data.PC + 4;
|
||||
warp_pcs_n[schedule_if.data.wid] = schedule_if.data.PC + `PC_BITS'(2);
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -204,6 +214,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
barrier_masks <= '0;
|
||||
barrier_ctrs <= '0;
|
||||
`ifdef GBAR_ENABLE
|
||||
gbar_req_valid <= 0;
|
||||
`endif
|
||||
|
@ -214,27 +225,43 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
barrier_stalls <= '0;
|
||||
issued_instrs <= '0;
|
||||
cycles <= '0;
|
||||
wspawn.valid <= 0;
|
||||
|
||||
// activate first warp
|
||||
warp_pcs[0] <= base_dcrs.startup_addr;
|
||||
warp_pcs[0] <= base_dcrs.startup_addr[1 +: `PC_BITS];
|
||||
active_warps[0] <= 1;
|
||||
thread_masks[0][0] <= 1;
|
||||
is_single_warp <= 1;
|
||||
end else begin
|
||||
active_warps <= active_warps_n;
|
||||
stalled_warps <= stalled_warps_n;
|
||||
thread_masks <= thread_masks_n;
|
||||
warp_pcs <= warp_pcs_n;
|
||||
barrier_masks <= barrier_masks_n;
|
||||
barrier_ctrs <= barrier_ctrs_n;
|
||||
barrier_stalls <= barrier_stalls_n;
|
||||
is_single_warp <= (active_warps_cnt == $bits(active_warps_cnt)'(1));
|
||||
|
||||
// wspawn handling
|
||||
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
|
||||
wspawn.valid <= 1;
|
||||
wspawn.wmask <= warp_ctl_if.wspawn.wmask;
|
||||
wspawn.pc <= warp_ctl_if.wspawn.pc;
|
||||
wspawn_wid <= warp_ctl_if.wid;
|
||||
end
|
||||
if (wspawn.valid && is_single_warp) begin
|
||||
wspawn.valid <= 0;
|
||||
end
|
||||
|
||||
// global barrier scheduling
|
||||
`ifdef GBAR_ENABLE
|
||||
if (warp_ctl_if.valid && warp_ctl_if.barrier.valid
|
||||
&& warp_ctl_if.barrier.is_global
|
||||
&& (curr_barrier_mask_n == active_warps)) begin
|
||||
&& warp_ctl_if.barrier.is_global
|
||||
&& !warp_ctl_if.barrier.is_noop
|
||||
&& (curr_barrier_mask_p1 == active_warps)) begin
|
||||
gbar_req_valid <= 1;
|
||||
gbar_req_id <= warp_ctl_if.barrier.id;
|
||||
gbar_req_size_m1 <= warp_ctl_if.barrier.size_m1[`NC_WIDTH-1:0];
|
||||
gbar_req_size_m1 <= `NC_WIDTH'(warp_ctl_if.barrier.size_m1);
|
||||
end
|
||||
if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin
|
||||
gbar_req_valid <= 0;
|
||||
|
@ -247,7 +274,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
|
||||
if (busy) begin
|
||||
cycles <= cycles + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -265,7 +292,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (split_join_reset, reset);
|
||||
|
||||
VX_split_join #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID))
|
||||
) split_join (
|
||||
.clk (clk),
|
||||
.reset (split_join_reset),
|
||||
|
@ -274,19 +301,21 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
.split (warp_ctl_if.split),
|
||||
.sjoin (warp_ctl_if.sjoin),
|
||||
.join_valid (join_valid),
|
||||
.join_is_dvg (join_is_dvg),
|
||||
.join_is_else (join_is_else),
|
||||
.join_wid (join_wid),
|
||||
.join_is_dvg(join_is_dvg),
|
||||
.join_is_else(join_is_else),
|
||||
.join_wid (join_wid),
|
||||
.join_tmask (join_tmask),
|
||||
.join_pc (join_pc)
|
||||
.join_pc (join_pc),
|
||||
.stack_wid (warp_ctl_if.dvstack_wid),
|
||||
.stack_ptr (warp_ctl_if.dvstack_ptr)
|
||||
);
|
||||
|
||||
// schedule the next ready warp
|
||||
|
||||
wire [`NUM_WARPS-1:0] ready_warps = active_warps & ~(stalled_warps | barrier_stalls);
|
||||
wire [`NUM_WARPS-1:0] ready_warps = active_warps & ~stalled_warps;
|
||||
|
||||
VX_lzc #(
|
||||
.N (`NUM_WARPS),
|
||||
.N (`NUM_WARPS),
|
||||
.REVERSE (1)
|
||||
) wid_select (
|
||||
.data_in (ready_warps),
|
||||
|
@ -294,33 +323,40 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
.valid_out (schedule_valid)
|
||||
);
|
||||
|
||||
wire [`NUM_WARPS-1:0][(`NUM_THREADS + `XLEN)-1:0] schedule_data;
|
||||
wire [`NUM_WARPS-1:0][(`NUM_THREADS + `PC_BITS)-1:0] schedule_data;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
assign schedule_data[i] = {thread_masks[i], warp_pcs[i]};
|
||||
end
|
||||
|
||||
assign {schedule_tmask, schedule_pc} = {
|
||||
schedule_data[schedule_wid][(`NUM_THREADS + `XLEN)-1:(`NUM_THREADS + `XLEN)-4],
|
||||
schedule_data[schedule_wid][(`NUM_THREADS + `XLEN)-5:0]
|
||||
schedule_data[schedule_wid][(`NUM_THREADS + `PC_BITS)-1:(`NUM_THREADS + `PC_BITS)-4],
|
||||
schedule_data[schedule_wid][(`NUM_THREADS + `PC_BITS)-5:0]
|
||||
};
|
||||
|
||||
`ifndef NDEBUG
|
||||
localparam GNW_WIDTH = `LOG2UP(`NUM_CLUSTERS * `NUM_CORES * `NUM_WARPS);
|
||||
reg [`UUID_WIDTH-1:0] instr_uuid;
|
||||
wire [GNW_WIDTH-1:0] g_wid = (GNW_WIDTH'(CORE_ID) << `NW_BITS) + GNW_WIDTH'(schedule_wid);
|
||||
`ifdef SV_DPI
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(1, 0, 0));
|
||||
instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(1, 32'd0));
|
||||
end else if (schedule_fire) begin
|
||||
instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(0, 32'(g_wid), 64'(schedule_pc)));
|
||||
instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(0, 32'(g_wid)));
|
||||
end
|
||||
end
|
||||
`else
|
||||
wire [GNW_WIDTH+16-1:0] w_uuid = {g_wid, 16'(schedule_pc)};
|
||||
always @(*) begin
|
||||
instr_uuid = `UUID_WIDTH'(w_uuid);
|
||||
end
|
||||
`endif
|
||||
`else
|
||||
wire [`UUID_WIDTH-1:0] instr_uuid = '0;
|
||||
`endif
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`NUM_THREADS + `XLEN + `NW_WIDTH)
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -334,24 +370,42 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
|
||||
assign schedule_if.data.uuid = instr_uuid;
|
||||
|
||||
`RESET_RELAY (pending_instr_reset, reset);
|
||||
// Track pending instructions per warp
|
||||
|
||||
wire no_pending_instr;
|
||||
VX_pending_instr #(
|
||||
.CTR_WIDTH (12),
|
||||
.DECR_COUNT (`ISSUE_WIDTH),
|
||||
.ALM_EMPTY (1)
|
||||
) pending_instr(
|
||||
.clk (clk),
|
||||
.reset (pending_instr_reset),
|
||||
.incr (schedule_if_fire),
|
||||
.incr_wid (schedule_if.data.wid),
|
||||
.decr (commit_sched_if.committed),
|
||||
.decr_wid (commit_sched_if.committed_wid),
|
||||
.alm_empty_wid (sched_csr_if.alm_empty_wid),
|
||||
.alm_empty (sched_csr_if.alm_empty),
|
||||
.empty (no_pending_instr)
|
||||
);
|
||||
reg [`NUM_WARPS-1:0] per_warp_incr;
|
||||
always @(*) begin
|
||||
per_warp_incr = 0;
|
||||
if (schedule_if_fire) begin
|
||||
per_warp_incr[schedule_if.data.wid] = 1;
|
||||
end
|
||||
end
|
||||
|
||||
wire [`NUM_WARPS-1:0] pending_warp_empty;
|
||||
wire [`NUM_WARPS-1:0] pending_warp_alm_empty;
|
||||
|
||||
`RESET_RELAY_EX (pending_instr_reset, reset, `NUM_WARPS, `MAX_FANOUT);
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (4096),
|
||||
.ALM_EMPTY (1)
|
||||
) counter (
|
||||
.clk (clk),
|
||||
.reset (pending_instr_reset[i]),
|
||||
.incr (per_warp_incr[i]),
|
||||
.decr (commit_sched_if.committed_warps[i]),
|
||||
.empty (pending_warp_empty[i]),
|
||||
.alm_empty (pending_warp_alm_empty[i]),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
end
|
||||
|
||||
assign sched_csr_if.alm_empty = pending_warp_alm_empty[sched_csr_if.alm_empty_wid];
|
||||
|
||||
wire no_pending_instr = (& pending_warp_empty);
|
||||
|
||||
`BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1);
|
||||
|
||||
|
@ -359,7 +413,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
assign sched_csr_if.cycles = cycles;
|
||||
assign sched_csr_if.active_warps = active_warps;
|
||||
assign sched_csr_if.thread_masks = thread_masks;
|
||||
|
||||
|
||||
// timeout handling
|
||||
reg [31:0] timeout_ctr;
|
||||
reg timeout_enable;
|
||||
|
@ -378,9 +432,9 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
end
|
||||
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** core%0d-scheduler-timeout: stalled_warps=%b", $time, CORE_ID, stalled_warps));
|
||||
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** %s timeout: stalled_warps=%b", $time, INSTANCE_ID, stalled_warps))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_sched_idles;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_sched_stalls;
|
||||
|
||||
|
@ -390,15 +444,15 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_sched_idles <= '0;
|
||||
perf_sched_stalls <= '0;
|
||||
perf_sched_stalls <= '0;
|
||||
end else begin
|
||||
perf_sched_idles <= perf_sched_idles + `PERF_CTR_BITS'(schedule_idle);
|
||||
perf_sched_stalls <= perf_sched_stalls + `PERF_CTR_BITS'(schedule_stall);
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_schedule_if.sched_idles = perf_sched_idles;
|
||||
assign perf_schedule_if.sched_stalls = perf_sched_stalls;
|
||||
assign sched_perf.idles = perf_sched_idles;
|
||||
assign sched_perf.stalls = perf_sched_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,63 +14,67 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_scoreboard import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output reg [`PERF_CTR_BITS-1:0] perf_scb_stalls,
|
||||
output reg [`PERF_CTR_BITS-1:0] perf_units_uses [`NUM_EX_UNITS],
|
||||
output reg [`PERF_CTR_BITS-1:0] perf_sfu_uses [`NUM_SFU_UNITS],
|
||||
output reg [`PERF_CTR_BITS-1:0] perf_stalls,
|
||||
output reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_units_uses,
|
||||
output reg [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_sfu_uses,
|
||||
`endif
|
||||
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_ibuffer_if.slave ibuffer_if [`ISSUE_WIDTH],
|
||||
VX_ibuffer_if.master scoreboard_if [`ISSUE_WIDTH]
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_ibuffer_if.slave ibuffer_if [PER_ISSUE_WARPS],
|
||||
VX_scoreboard_if.master scoreboard_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + (`NR_BITS * 4) + 1;
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4) + 1;
|
||||
|
||||
VX_ibuffer_if staging_if [PER_ISSUE_WARPS]();
|
||||
reg [PER_ISSUE_WARPS-1:0] operands_ready;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_units_per_cycle;
|
||||
reg [PER_ISSUE_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle;
|
||||
wire [`NUM_EX_UNITS-1:0] perf_units_per_cycle, perf_units_per_cycle_r;
|
||||
|
||||
reg [`ISSUE_WIDTH-1:0][`NUM_SFU_UNITS-1:0] perf_issue_sfu_per_cycle;
|
||||
reg [PER_ISSUE_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle;
|
||||
wire [`NUM_SFU_UNITS-1:0] perf_sfu_per_cycle, perf_sfu_per_cycle_r;
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] perf_issue_stalls_per_cycle;
|
||||
wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r;
|
||||
|
||||
`POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle);
|
||||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (`NUM_EX_UNITS),
|
||||
.N (`ISSUE_WIDTH),
|
||||
.N (PER_ISSUE_WARPS),
|
||||
.OP ("|")
|
||||
) perf_units_reduce (
|
||||
.data_in (perf_issue_units_per_cycle),
|
||||
.data_in (perf_inuse_units_per_cycle),
|
||||
.data_out (perf_units_per_cycle)
|
||||
);
|
||||
);
|
||||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (`NUM_SFU_UNITS),
|
||||
.N (`ISSUE_WIDTH),
|
||||
.N (PER_ISSUE_WARPS),
|
||||
.OP ("|")
|
||||
) perf_sfu_reduce (
|
||||
.data_in (perf_issue_sfu_per_cycle),
|
||||
.data_in (perf_inuse_sfu_per_cycle),
|
||||
.data_out (perf_sfu_per_cycle)
|
||||
);
|
||||
|
||||
`BUFFER(perf_stalls_per_cycle_r, perf_stalls_per_cycle);
|
||||
`BUFFER(perf_units_per_cycle_r, perf_units_per_cycle);
|
||||
`BUFFER(perf_sfu_per_cycle_r, perf_sfu_per_cycle);
|
||||
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
|
||||
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
|
||||
|
||||
wire [PER_ISSUE_WARPS-1:0] stg_valid_in;
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
assign stg_valid_in[w] = staging_if[w].valid;
|
||||
end
|
||||
|
||||
wire perf_stall_per_cycle = (|stg_valid_in) && ~(|(stg_valid_in & operands_ready));
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_scb_stalls <= '0;
|
||||
perf_stalls <= '0;
|
||||
end else begin
|
||||
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(perf_stalls_per_cycle_r);
|
||||
perf_stalls <= perf_stalls + `PERF_CTR_BITS'(perf_stall_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -83,7 +87,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -95,138 +99,221 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
reg [`UP(ISSUE_RATIO)-1:0][`NUM_REGS-1:0] inuse_regs;
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (1)
|
||||
) stanging_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (ibuffer_if[w].valid),
|
||||
.data_in (ibuffer_if[w].data),
|
||||
.ready_in (ibuffer_if[w].ready),
|
||||
.valid_out(staging_if[w].valid),
|
||||
.data_out (staging_if[w].data),
|
||||
.ready_out(staging_if[w].ready)
|
||||
);
|
||||
end
|
||||
|
||||
wire writeback_fire = writeback_if[i].valid && writeback_if[i].data.eop;
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
reg [`NUM_REGS-1:0] inuse_regs;
|
||||
|
||||
wire inuse_rd = inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd];
|
||||
wire inuse_rs1 = inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1];
|
||||
wire inuse_rs2 = inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2];
|
||||
wire inuse_rs3 = inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3];
|
||||
reg [3:0] operands_busy, operands_busy_n;
|
||||
|
||||
wire ibuffer_fire = ibuffer_if[w].valid && ibuffer_if[w].ready;
|
||||
|
||||
wire staging_fire = staging_if[w].valid && staging_if[w].ready;
|
||||
|
||||
wire writeback_fire = writeback_if.valid
|
||||
&& (writeback_if.data.wis == ISSUE_WIS_W'(w))
|
||||
&& writeback_if.data.eop;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`UP(ISSUE_RATIO)-1:0][`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units;
|
||||
reg [`UP(ISSUE_RATIO)-1:0][`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu;
|
||||
|
||||
reg [`SFU_WIDTH-1:0] sfu_type;
|
||||
always @(*) begin
|
||||
case (scoreboard_if[i].data.op_type)
|
||||
`INST_SFU_CSRRW,
|
||||
`INST_SFU_CSRRS,
|
||||
`INST_SFU_CSRRC: sfu_type = `SFU_CSRS;
|
||||
default: sfu_type = `SFU_WCTL;
|
||||
endcase
|
||||
end
|
||||
reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units;
|
||||
reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu;
|
||||
|
||||
always @(*) begin
|
||||
perf_issue_units_per_cycle[i] = '0;
|
||||
perf_issue_sfu_per_cycle[i] = '0;
|
||||
if (ibuffer_if[i].valid) begin
|
||||
if (inuse_rd) begin
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1;
|
||||
perf_inuse_units_per_cycle[w] = '0;
|
||||
perf_inuse_sfu_per_cycle[w] = '0;
|
||||
if (staging_if[w].valid) begin
|
||||
if (operands_busy[0]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rd]] = 1;
|
||||
if (inuse_units[staging_if[w].data.rd] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rd]] = 1;
|
||||
end
|
||||
end
|
||||
if (inuse_rs1) begin
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1;
|
||||
if (operands_busy[1]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs1]] = 1;
|
||||
if (inuse_units[staging_if[w].data.rs1] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs1]] = 1;
|
||||
end
|
||||
end
|
||||
if (inuse_rs2) begin
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1;
|
||||
if (operands_busy[2]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs2]] = 1;
|
||||
if (inuse_units[staging_if[w].data.rs2] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs2]] = 1;
|
||||
end
|
||||
end
|
||||
if (inuse_rs3) begin
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1;
|
||||
if (operands_busy[3]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs3]] = 1;
|
||||
if (inuse_units[staging_if[w].data.rs3] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs3]] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
assign perf_issue_stalls_per_cycle[i] = ibuffer_if[i].valid && ~ibuffer_if[i].ready;
|
||||
`endif
|
||||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg valid_out_r;
|
||||
wire ready_out;
|
||||
|
||||
wire [3:0] ready_masks = ~{inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
|
||||
wire deps_ready = (& ready_masks);
|
||||
|
||||
wire valid_in = ibuffer_if[i].valid && deps_ready;
|
||||
wire ready_in = ~valid_out_r && deps_ready;
|
||||
wire [DATAW-1:0] data_in = ibuffer_if[i].data;
|
||||
|
||||
assign ready_out = scoreboard_if[i].ready;
|
||||
always @(*) begin
|
||||
operands_busy_n = operands_busy;
|
||||
if (ibuffer_fire) begin
|
||||
operands_busy_n = {
|
||||
inuse_regs[ibuffer_if[w].data.rs3],
|
||||
inuse_regs[ibuffer_if[w].data.rs2],
|
||||
inuse_regs[ibuffer_if[w].data.rs1],
|
||||
inuse_regs[ibuffer_if[w].data.rd]
|
||||
};
|
||||
end
|
||||
if (writeback_fire) begin
|
||||
if (ibuffer_fire) begin
|
||||
if (writeback_if.data.rd == ibuffer_if[w].data.rd) begin
|
||||
operands_busy_n[0] = 0;
|
||||
end
|
||||
if (writeback_if.data.rd == ibuffer_if[w].data.rs1) begin
|
||||
operands_busy_n[1] = 0;
|
||||
end
|
||||
if (writeback_if.data.rd == ibuffer_if[w].data.rs2) begin
|
||||
operands_busy_n[2] = 0;
|
||||
end
|
||||
if (writeback_if.data.rd == ibuffer_if[w].data.rs3) begin
|
||||
operands_busy_n[3] = 0;
|
||||
end
|
||||
end else begin
|
||||
if (writeback_if.data.rd == staging_if[w].data.rd) begin
|
||||
operands_busy_n[0] = 0;
|
||||
end
|
||||
if (writeback_if.data.rd == staging_if[w].data.rs1) begin
|
||||
operands_busy_n[1] = 0;
|
||||
end
|
||||
if (writeback_if.data.rd == staging_if[w].data.rs2) begin
|
||||
operands_busy_n[2] = 0;
|
||||
end
|
||||
if (writeback_if.data.rd == staging_if[w].data.rs3) begin
|
||||
operands_busy_n[3] = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
if (staging_fire && staging_if[w].data.wb) begin
|
||||
if (staging_if[w].data.rd == ibuffer_if[w].data.rd) begin
|
||||
operands_busy_n[0] = 1;
|
||||
end
|
||||
if (staging_if[w].data.rd == ibuffer_if[w].data.rs1) begin
|
||||
operands_busy_n[1] = 1;
|
||||
end
|
||||
if (staging_if[w].data.rd == ibuffer_if[w].data.rs2) begin
|
||||
operands_busy_n[2] = 1;
|
||||
end
|
||||
if (staging_if[w].data.rd == ibuffer_if[w].data.rs3) begin
|
||||
operands_busy_n[3] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_out_r <= 0;
|
||||
inuse_regs <= '0;
|
||||
end else begin
|
||||
if (writeback_fire) begin
|
||||
inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] <= 0;
|
||||
inuse_regs[writeback_if.data.rd] <= 0;
|
||||
end
|
||||
if (~valid_out_r) begin
|
||||
valid_out_r <= valid_in;
|
||||
end else if (ready_out) begin
|
||||
if (scoreboard_if[i].data.wb) begin
|
||||
inuse_regs[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= 1;
|
||||
`ifdef PERF_ENABLE
|
||||
inuse_units[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= scoreboard_if[i].data.ex_type;
|
||||
if (scoreboard_if[i].data.ex_type == `EX_SFU) begin
|
||||
inuse_sfu[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= sfu_type;
|
||||
end
|
||||
`endif
|
||||
end
|
||||
valid_out_r <= 0;
|
||||
if (staging_fire && staging_if[w].data.wb) begin
|
||||
inuse_regs[staging_if[w].data.rd] <= 1;
|
||||
end
|
||||
end
|
||||
if (~valid_out_r) begin
|
||||
data_out_r <= data_in;
|
||||
operands_busy <= operands_busy_n;
|
||||
operands_ready[w] <= ~(| operands_busy_n);
|
||||
`ifdef PERF_ENABLE
|
||||
if (staging_fire && staging_if[w].data.wb) begin
|
||||
inuse_units[staging_if[w].data.rd] <= staging_if[w].data.ex_type;
|
||||
if (staging_if[w].data.ex_type == `EX_SFU) begin
|
||||
inuse_sfu[staging_if[w].data.rd] <= op_to_sfu_type(staging_if[w].data.op_type);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
||||
assign ibuffer_if[i].ready = ready_in;
|
||||
assign scoreboard_if[i].valid = valid_out_r;
|
||||
assign scoreboard_if[i].data = data_out_r;
|
||||
|
||||
`ifdef SIMULATION
|
||||
reg [31:0] timeout_ctr;
|
||||
reg [31:0] timeout_ctr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
timeout_ctr <= '0;
|
||||
end else begin
|
||||
if (ibuffer_if[i].valid && ~ibuffer_if[i].ready) begin
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
`TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
||||
~ready_masks, ibuffer_if[i].data.uuid));
|
||||
end else begin
|
||||
if (staging_if[w].valid && ~staging_if[w].ready) begin
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
`TRACE(3, ("%d: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
$time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
|
||||
operands_busy, staging_if[w].data.uuid));
|
||||
`endif
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
end else if (ibuffer_if[i].valid && ibuffer_if[i].ready) begin
|
||||
end else if (ibuffer_fire) begin
|
||||
timeout_ctr <= '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
|
||||
("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
||||
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
||||
~ready_masks, ibuffer_if[i].data.uuid));
|
||||
|
||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
$time, CORE_ID, wis_to_wid(writeback_if[i].data.wis, i), writeback_if[i].data.PC, writeback_if[i].data.tmask, writeback_if[i].data.rd, writeback_if[i].data.uuid));
|
||||
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
|
||||
("%t: *** %s timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
||||
$time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
|
||||
operands_busy, staging_if[w].data.uuid));
|
||||
|
||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if.data.rd] != 0,
|
||||
("%t: *** %s invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
$time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid));
|
||||
`endif
|
||||
|
||||
|
||||
end
|
||||
|
||||
wire [PER_ISSUE_WARPS-1:0] arb_valid_in;
|
||||
wire [PER_ISSUE_WARPS-1:0][DATAW-1:0] arb_data_in;
|
||||
wire [PER_ISSUE_WARPS-1:0] arb_ready_in;
|
||||
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
assign arb_valid_in[w] = staging_if[w].valid && operands_ready[w];
|
||||
assign arb_data_in[w] = staging_if[w].data;
|
||||
assign staging_if[w].ready = arb_ready_in[w] && operands_ready[w];
|
||||
end
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (PER_ISSUE_WARPS),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER ("F"),
|
||||
.LUTRAM (1),
|
||||
.OUT_BUF (4) // using 2-cycle EB for area reduction
|
||||
) out_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.valid_in (arb_valid_in),
|
||||
.ready_in (arb_ready_in),
|
||||
.data_in (arb_data_in),
|
||||
.data_out ({
|
||||
scoreboard_if.data.uuid,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.rd,
|
||||
scoreboard_if.data.rs1,
|
||||
scoreboard_if.data.rs2,
|
||||
scoreboard_if.data.rs3
|
||||
}),
|
||||
.valid_out (scoreboard_if.valid),
|
||||
.ready_out (scoreboard_if.ready),
|
||||
.sel_out (scoreboard_if.data.wis)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,8 +14,9 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_sfu_unit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -28,71 +29,68 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
// Inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if [`NUM_FPU_BLOCKS],
|
||||
VX_fpu_csr_if.slave fpu_csr_if [`NUM_FPU_BLOCKS],
|
||||
`endif
|
||||
VX_commit_csr_if.slave commit_csr_if,
|
||||
VX_sched_csr_if.slave sched_csr_if,
|
||||
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_commit_csr_if.slave commit_csr_if,
|
||||
VX_sched_csr_if.slave sched_csr_if,
|
||||
VX_warp_ctl_if.master warp_ctl_if
|
||||
VX_warp_ctl_if.master warp_ctl_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_SFU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_SFU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
|
||||
localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `PC_BITS + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + 1;
|
||||
localparam RSP_ARB_IDX_WCTL = 0;
|
||||
localparam RSP_ARB_IDX_CSRS = 1;
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) execute_if[BLOCK_SIZE]();
|
||||
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
) per_block_execute_if[BLOCK_SIZE]();
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (1)
|
||||
.OUT_BUF (1)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
.reset (reset),
|
||||
.dispatch_if(dispatch_if),
|
||||
.execute_if (execute_if)
|
||||
.execute_if (per_block_execute_if)
|
||||
);
|
||||
|
||||
wire [RSP_ARB_SIZE-1:0] rsp_arb_valid_in;
|
||||
wire [RSP_ARB_SIZE-1:0] rsp_arb_ready_in;
|
||||
wire [RSP_ARB_SIZE-1:0][RSP_ARB_DATAW-1:0] rsp_arb_data_in;
|
||||
|
||||
|
||||
// Warp control block
|
||||
// Warp control block
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_execute_if();
|
||||
VX_commit_if#(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_commit_if();
|
||||
|
||||
assign wctl_execute_if.valid = execute_if[0].valid && `INST_SFU_IS_WCTL(execute_if[0].data.op_type);
|
||||
assign wctl_execute_if.data = execute_if[0].data;
|
||||
|
||||
assign wctl_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_WCTL(per_block_execute_if[0].data.op_type);
|
||||
assign wctl_execute_if.data = per_block_execute_if[0].data;
|
||||
|
||||
`RESET_RELAY (wctl_reset, reset);
|
||||
|
||||
|
||||
VX_wctl_unit #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_unit (
|
||||
.clk (clk),
|
||||
.reset (wctl_reset),
|
||||
.execute_if (wctl_execute_if),
|
||||
.warp_ctl_if(warp_ctl_if),
|
||||
.execute_if (wctl_execute_if),
|
||||
.warp_ctl_if(warp_ctl_if),
|
||||
.commit_if (wctl_commit_if)
|
||||
);
|
||||
|
||||
|
@ -108,12 +106,13 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES)
|
||||
) csr_commit_if();
|
||||
|
||||
assign csr_execute_if.valid = execute_if[0].valid && `INST_SFU_IS_CSR(execute_if[0].data.op_type);
|
||||
assign csr_execute_if.data = execute_if[0].data;
|
||||
assign csr_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type);
|
||||
assign csr_execute_if.data = per_block_execute_if[0].data;
|
||||
|
||||
`RESET_RELAY (csr_reset, reset);
|
||||
|
||||
VX_csr_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_unit (
|
||||
|
@ -122,20 +121,20 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
.base_dcrs (base_dcrs),
|
||||
.execute_if (csr_execute_if),
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
.pipeline_perf_if(pipeline_perf_if),
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_csr_if (fpu_csr_if),
|
||||
`endif
|
||||
|
||||
.sched_csr_if (sched_csr_if),
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.commit_if (csr_commit_if)
|
||||
);
|
||||
);
|
||||
|
||||
assign rsp_arb_valid_in[RSP_ARB_IDX_CSRS] = csr_commit_if.valid;
|
||||
assign rsp_arb_data_in[RSP_ARB_IDX_CSRS] = csr_commit_if.data;
|
||||
|
@ -145,18 +144,16 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
reg sfu_req_ready;
|
||||
always @(*) begin
|
||||
case (execute_if[0].data.op_type)
|
||||
case (per_block_execute_if[0].data.op_type)
|
||||
`INST_SFU_CSRRW,
|
||||
`INST_SFU_CSRRS,
|
||||
`INST_SFU_CSRRC: sfu_req_ready = csr_execute_if.ready;
|
||||
default: sfu_req_ready = wctl_execute_if.ready;
|
||||
endcase
|
||||
end
|
||||
assign execute_if[0].ready = sfu_req_ready;
|
||||
assign per_block_execute_if[0].ready = sfu_req_ready;
|
||||
|
||||
// response arbitration
|
||||
|
||||
`RESET_RELAY (commit_reset, reset);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
|
@ -166,10 +163,10 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_REG (3)
|
||||
.OUT_BUF (3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_arb_valid_in),
|
||||
.ready_in (rsp_arb_ready_in),
|
||||
.data_in (rsp_arb_data_in),
|
||||
|
@ -182,10 +179,10 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
VX_gather_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (1)
|
||||
.OUT_BUF (3)
|
||||
) gather_unit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.reset (reset),
|
||||
.commit_in_if (arb_commit_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
|
|
@ -1,124 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_smem_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t cache_perf,
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if.slave dcache_bus_in_if [DCACHE_NUM_REQS],
|
||||
VX_mem_bus_if.master dcache_bus_out_if [DCACHE_NUM_REQS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam SMEM_ADDR_WIDTH = `SMEM_LOG_SIZE - `CLOG2(DCACHE_WORD_SIZE);
|
||||
|
||||
wire [DCACHE_NUM_REQS-1:0] smem_req_valid;
|
||||
wire [DCACHE_NUM_REQS-1:0] smem_req_rw;
|
||||
wire [DCACHE_NUM_REQS-1:0][SMEM_ADDR_WIDTH-1:0] smem_req_addr;
|
||||
wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] smem_req_byteen;
|
||||
wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] smem_req_data;
|
||||
wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] smem_req_tag;
|
||||
wire [DCACHE_NUM_REQS-1:0] smem_req_ready;
|
||||
wire [DCACHE_NUM_REQS-1:0] smem_rsp_valid;
|
||||
wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] smem_rsp_data;
|
||||
wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] smem_rsp_tag;
|
||||
wire [DCACHE_NUM_REQS-1:0] smem_rsp_ready;
|
||||
|
||||
`RESET_RELAY (smem_reset, reset);
|
||||
|
||||
VX_shared_mem #(
|
||||
.INSTANCE_ID($sformatf("core%0d-smem", CORE_ID)),
|
||||
.SIZE (1 << `SMEM_LOG_SIZE),
|
||||
.NUM_REQS (DCACHE_NUM_REQS),
|
||||
.NUM_BANKS (`SMEM_NUM_BANKS),
|
||||
.WORD_SIZE (DCACHE_WORD_SIZE),
|
||||
.ADDR_WIDTH (SMEM_ADDR_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
|
||||
) shared_mem (
|
||||
.clk (clk),
|
||||
.reset (smem_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (cache_perf),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.req_valid (smem_req_valid),
|
||||
.req_rw (smem_req_rw),
|
||||
.req_byteen (smem_req_byteen),
|
||||
.req_addr (smem_req_addr),
|
||||
.req_data (smem_req_data),
|
||||
.req_tag (smem_req_tag),
|
||||
.req_ready (smem_req_ready),
|
||||
|
||||
// Core response
|
||||
.rsp_valid (smem_rsp_valid),
|
||||
.rsp_data (smem_rsp_data),
|
||||
.rsp_tag (smem_rsp_tag),
|
||||
.rsp_ready (smem_rsp_ready)
|
||||
);
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
|
||||
) switch_out_bus_if[2 * DCACHE_NUM_REQS]();
|
||||
|
||||
`RESET_RELAY (switch_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
|
||||
assign smem_req_valid[i] = switch_out_bus_if[i * 2 + 1].req_valid;
|
||||
assign smem_req_rw[i] = switch_out_bus_if[i * 2 + 1].req_data.rw;
|
||||
assign smem_req_byteen[i] = switch_out_bus_if[i * 2 + 1].req_data.byteen;
|
||||
assign smem_req_data[i] = switch_out_bus_if[i * 2 + 1].req_data.data;
|
||||
assign smem_req_tag[i] = switch_out_bus_if[i * 2 + 1].req_data.tag;
|
||||
assign switch_out_bus_if[i * 2 + 1].req_ready = smem_req_ready[i];
|
||||
|
||||
assign switch_out_bus_if[i * 2 + 1].rsp_valid = smem_rsp_valid[i];
|
||||
assign switch_out_bus_if[i * 2 + 1].rsp_data.data = smem_rsp_data[i];
|
||||
assign switch_out_bus_if[i * 2 + 1].rsp_data.tag = smem_rsp_tag[i];
|
||||
assign smem_rsp_ready[i] = switch_out_bus_if[i * 2 + 1].rsp_ready;
|
||||
|
||||
assign smem_req_addr[i] = switch_out_bus_if[i * 2 + 1].req_data.addr[SMEM_ADDR_WIDTH-1:0];
|
||||
|
||||
VX_smem_switch #(
|
||||
.NUM_REQS (2),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (0),
|
||||
.ARBITER ("P"),
|
||||
.OUT_REG_REQ (2),
|
||||
.OUT_REG_RSP (2)
|
||||
) smem_switch (
|
||||
.clk (clk),
|
||||
.reset (switch_reset),
|
||||
.bus_in_if (dcache_bus_in_if[i]),
|
||||
.bus_out_if (switch_out_bus_if[i * 2 +: 2])
|
||||
);
|
||||
end
|
||||
|
||||
// this bus goes to the dcache
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_out_if[i], switch_out_bus_if[i * 2]);
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_split_join import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -27,50 +27,58 @@ module VX_split_join import VX_gpu_pkg::*; #(
|
|||
output wire join_is_else,
|
||||
output wire [`NW_WIDTH-1:0] join_wid,
|
||||
output wire [`NUM_THREADS-1:0] join_tmask,
|
||||
output wire [`XLEN-1:0] join_pc
|
||||
output wire [`PC_BITS-1:0] join_pc,
|
||||
input wire [`NW_WIDTH-1:0] stack_wid,
|
||||
output wire [`DV_STACK_SIZEW-1:0] stack_ptr
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
wire [(`XLEN+`NUM_THREADS)-1:0] ipdom_data [`NUM_WARPS-1:0];
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_data [`NUM_WARPS-1:0];
|
||||
wire [`DV_STACK_SIZEW-1:0] ipdom_q_ptr [`NUM_WARPS-1:0];
|
||||
wire ipdom_set [`NUM_WARPS-1:0];
|
||||
|
||||
wire [(`XLEN+`NUM_THREADS)-1:0] ipdom_q0 = {split.then_tmask | split.else_tmask, `XLEN'(0)};
|
||||
wire [(`XLEN+`NUM_THREADS)-1:0] ipdom_q1 = {split.else_tmask, split.next_pc};
|
||||
wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_q0 = {split.then_tmask | split.else_tmask, `PC_BITS'(0)};
|
||||
wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_q1 = {split.else_tmask, split.next_pc};
|
||||
|
||||
wire sjoin_is_dvg = (sjoin.stack_ptr != ipdom_q_ptr[wid]);
|
||||
|
||||
wire ipdom_push = valid && split.valid && split.is_dvg;
|
||||
wire ipdom_pop = valid && sjoin.valid && sjoin.is_dvg;
|
||||
wire ipdom_pop = valid && sjoin.valid && sjoin_is_dvg;
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
`RESET_RELAY (ipdom_reset, reset);
|
||||
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH (`XLEN+`NUM_THREADS),
|
||||
.DEPTH (`UP(`NUM_THREADS-1))
|
||||
.WIDTH (`NUM_THREADS+`PC_BITS),
|
||||
.DEPTH (`DV_STACK_SIZE)
|
||||
) ipdom_stack (
|
||||
.clk (clk),
|
||||
.reset (ipdom_reset),
|
||||
.push (ipdom_push && (i == wid)),
|
||||
.pop (ipdom_pop && (i == wid)),
|
||||
.q0 (ipdom_q0),
|
||||
.q1 (ipdom_q1),
|
||||
.d (ipdom_data[i]),
|
||||
.d_set (ipdom_set[i]),
|
||||
.q_ptr (ipdom_q_ptr[i]),
|
||||
.push (ipdom_push && (i == wid)),
|
||||
.pop (ipdom_pop && (i == wid)),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (full)
|
||||
);
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + `NW_WIDTH + 1 + `XLEN + `NUM_THREADS),
|
||||
.DATAW (1 + 1 + 1 + `NW_WIDTH + `NUM_THREADS + `PC_BITS),
|
||||
.DEPTH (1),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({valid && sjoin.valid, sjoin.is_dvg, ipdom_set[wid], wid, ipdom_data[wid]}),
|
||||
.data_out ({join_valid, join_is_dvg, join_is_else, join_wid, join_tmask, join_pc})
|
||||
.data_in ({valid && sjoin.valid, sjoin_is_dvg, ipdom_set[wid], wid, ipdom_data[wid]}),
|
||||
.data_out ({join_valid, join_is_dvg, join_is_else, join_wid, {join_tmask, join_pc}})
|
||||
);
|
||||
|
||||
assign stack_ptr = ipdom_q_ptr[stack_wid];
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,379 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VX_TRACE_VH
|
||||
`define VX_TRACE_VH
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
|
||||
case (ex_type)
|
||||
`EX_ALU: `TRACE(level, ("ALU"));
|
||||
`EX_LSU: `TRACE(level, ("LSU"));
|
||||
`EX_FPU: `TRACE(level, ("FPU"));
|
||||
`EX_SFU: `TRACE(level, ("SFU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_ex_op(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input [`INST_MOD_BITS-1:0] op_mod,
|
||||
`UNUSED_ARG(input [`NR_BITS-1:0] rd),
|
||||
`UNUSED_ARG(input [`NR_BITS-1:0] rs2),
|
||||
input use_imm,
|
||||
`UNUSED_ARG(input [`XLEN-1:0] imm)
|
||||
);
|
||||
|
||||
`ifdef FLEN_64
|
||||
logic fdst_d = imm[0];
|
||||
`else
|
||||
logic fdst_d = 0;
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
logic fcvt_l = imm[1];
|
||||
`else
|
||||
logic fcvt_l = 0;
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
logic rd_float = 1'(rd >> 5) || 1'(rs2 >> 5);
|
||||
`else
|
||||
logic rd_float = 0;
|
||||
`endif
|
||||
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
if (`INST_ALU_IS_BR(op_mod)) begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: `TRACE(level, ("BEQ"));
|
||||
`INST_BR_NE: `TRACE(level, ("BNE"));
|
||||
`INST_BR_LT: `TRACE(level, ("BLT"));
|
||||
`INST_BR_GE: `TRACE(level, ("BGE"));
|
||||
`INST_BR_LTU: `TRACE(level, ("BLTU"));
|
||||
`INST_BR_GEU: `TRACE(level, ("BGEU"));
|
||||
`INST_BR_JAL: `TRACE(level, ("JAL"));
|
||||
`INST_BR_JALR: `TRACE(level, ("JALR"));
|
||||
`INST_BR_ECALL: `TRACE(level, ("ECALL"));
|
||||
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"));
|
||||
`INST_BR_URET: `TRACE(level, ("URET"));
|
||||
`INST_BR_SRET: `TRACE(level, ("SRET"));
|
||||
`INST_BR_MRET: `TRACE(level, ("MRET"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else if (`INST_ALU_IS_M(op_mod)) begin
|
||||
if (`INST_ALU_IS_W(op_mod)) begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MULW"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIVW"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVUW"));
|
||||
`INST_M_REM: `TRACE(level, ("REMW"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMUW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MUL"));
|
||||
`INST_M_MULH: `TRACE(level, ("MULH"));
|
||||
`INST_M_MULHSU:`TRACE(level, ("MULHSU"));
|
||||
`INST_M_MULHU: `TRACE(level, ("MULHU"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIV"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVU"));
|
||||
`INST_M_REM: `TRACE(level, ("REM"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (`INST_ALU_IS_W(op_mod)) begin
|
||||
if (use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDIW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLIW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLIW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAIW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDW"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUBW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDI"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLI"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLI"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAI"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLTI"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XORI"));
|
||||
`INST_ALU_OR: `TRACE(level, ("ORI"));
|
||||
`INST_ALU_AND: `TRACE(level, ("ANDI"));
|
||||
`INST_ALU_LUI: `TRACE(level, ("LUI"));
|
||||
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADD"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUB"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLL"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRL"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRA"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLT"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XOR"));
|
||||
`INST_ALU_OR: `TRACE(level, ("OR"));
|
||||
`INST_ALU_AND: `TRACE(level, ("AND"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`EX_LSU: begin
|
||||
if (rd_float) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LW: `TRACE(level, ("FLW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("FLD"));
|
||||
`INST_LSU_SW: `TRACE(level, ("FSW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("FSD"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: `TRACE(level, ("LB"));
|
||||
`INST_LSU_LH: `TRACE(level, ("LH"));
|
||||
`INST_LSU_LW: `TRACE(level, ("LW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("LD"));
|
||||
`INST_LSU_LBU:`TRACE(level, ("LBU"));
|
||||
`INST_LSU_LHU:`TRACE(level, ("LHU"));
|
||||
`INST_LSU_LWU:`TRACE(level, ("LWU"));
|
||||
`INST_LSU_SB: `TRACE(level, ("SB"));
|
||||
`INST_LSU_SH: `TRACE(level, ("SH"));
|
||||
`INST_LSU_SW: `TRACE(level, ("SW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("SD"));
|
||||
`INST_LSU_FENCE:`TRACE(level,("FENCE"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_FPU: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FADD.S"));
|
||||
end
|
||||
`INST_FPU_SUB: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FSUB.S"));
|
||||
end
|
||||
`INST_FPU_MUL: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FMUL.D"));
|
||||
else
|
||||
`TRACE(level, ("FMUL.S"));
|
||||
end
|
||||
`INST_FPU_DIV: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FDIV.D"));
|
||||
else
|
||||
`TRACE(level, ("FDIV.S"));
|
||||
end
|
||||
`INST_FPU_SQRT: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FSQRT.D"));
|
||||
else
|
||||
`TRACE(level, ("FSQRT.S"));
|
||||
end
|
||||
`INST_FPU_MADD: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FMADD.S"));
|
||||
end
|
||||
`INST_FPU_MSUB: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FMSUB.S"));
|
||||
end
|
||||
`INST_FPU_NMADD: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FNMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMADD.S"));
|
||||
end
|
||||
`INST_FPU_NMSUB: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FNMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMSUB.S"));
|
||||
end
|
||||
`INST_FPU_CMP: begin
|
||||
if (fdst_d) begin
|
||||
case (op_mod[1:0])
|
||||
0: `TRACE(level, ("FLE.D"));
|
||||
1: `TRACE(level, ("FLT.D"));
|
||||
2: `TRACE(level, ("FEQ.D"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_mod[1:0])
|
||||
0: `TRACE(level, ("FLE.S"));
|
||||
1: `TRACE(level, ("FLT.S"));
|
||||
2: `TRACE(level, ("FEQ.S"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2F: begin
|
||||
if (fdst_d) begin
|
||||
`TRACE(level, ("FCVT.D.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.D"));
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2I: begin
|
||||
if (fdst_d) begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.L.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.D"));
|
||||
end
|
||||
end else begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.L.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2U: begin
|
||||
if (fdst_d) begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.LU.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.D"));
|
||||
end
|
||||
end else begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.LU.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_I2F: begin
|
||||
if (fdst_d) begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.D.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.W"));
|
||||
end
|
||||
end else begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.S.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.W"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_U2F: begin
|
||||
if (fdst_d) begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.D.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.WU"));
|
||||
end
|
||||
end else begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.S.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.WU"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MISC: begin
|
||||
if (fdst_d) begin
|
||||
case (op_mod)
|
||||
0: `TRACE(level, ("FSGNJ.D"));
|
||||
1: `TRACE(level, ("FSGNJN.D"));
|
||||
2: `TRACE(level, ("FSGNJX.D"));
|
||||
3: `TRACE(level, ("FCLASS.D"));
|
||||
4: `TRACE(level, ("FMV.X.D"));
|
||||
5: `TRACE(level, ("FMV.D.X"));
|
||||
6: `TRACE(level, ("FMIN.D"));
|
||||
7: `TRACE(level, ("FMAX.D"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_mod)
|
||||
0: `TRACE(level, ("FSGNJ.S"));
|
||||
1: `TRACE(level, ("FSGNJN.S"));
|
||||
2: `TRACE(level, ("FSGNJX.S"));
|
||||
3: `TRACE(level, ("FCLASS.S"));
|
||||
4: `TRACE(level, ("FMV.X.S"));
|
||||
5: `TRACE(level, ("FMV.S.X"));
|
||||
6: `TRACE(level, ("FMIN.S"));
|
||||
7: `TRACE(level, ("FMAX.S"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`EX_SFU: begin
|
||||
case (`INST_SFU_BITS'(op_type))
|
||||
`INST_SFU_TMC: `TRACE(level, ("TMC"));
|
||||
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"));
|
||||
`INST_SFU_SPLIT: `TRACE(level, ("SPLIT"));
|
||||
`INST_SFU_JOIN: `TRACE(level, ("JOIN"));
|
||||
`INST_SFU_BAR: `TRACE(level, ("BAR"));
|
||||
`INST_SFU_PRED: `TRACE(level, ("PRED"));
|
||||
`INST_SFU_CSRRW: begin if (use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
|
||||
`INST_SFU_CSRRS: begin if (use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
|
||||
`INST_SFU_CSRRC: begin if (use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
|
||||
case (addr)
|
||||
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"));
|
||||
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"));
|
||||
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
`endif
|
||||
|
||||
`endif // VX_TRACE_VH
|
399
hw/rtl/core/VX_trace_pkg.sv
Normal file
399
hw/rtl/core/VX_trace_pkg.sv
Normal file
|
@ -0,0 +1,399 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VX_TRACE_PKG_VH
|
||||
`define VX_TRACE_PKG_VH
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
package VX_trace_pkg;
|
||||
|
||||
`ifdef SIMULATION
|
||||
|
||||
`ifdef SV_DPI
|
||||
import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/);
|
||||
`endif
|
||||
|
||||
import VX_gpu_pkg::*;
|
||||
|
||||
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
|
||||
case (ex_type)
|
||||
`EX_ALU: `TRACE(level, ("ALU"));
|
||||
`EX_LSU: `TRACE(level, ("LSU"));
|
||||
`EX_FPU: `TRACE(level, ("FPU"));
|
||||
`EX_SFU: `TRACE(level, ("SFU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_ex_op(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
case (op_args.alu.xtype)
|
||||
`ALU_TYPE_ARITH: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDIW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLIW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLIW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAIW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDW"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUBW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDI"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLI"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLI"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAI"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLTI"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XORI"));
|
||||
`INST_ALU_OR: `TRACE(level, ("ORI"));
|
||||
`INST_ALU_AND: `TRACE(level, ("ANDI"));
|
||||
`INST_ALU_LUI: `TRACE(level, ("LUI"));
|
||||
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADD"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUB"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLL"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRL"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRA"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLT"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XOR"));
|
||||
`INST_ALU_OR: `TRACE(level, ("OR"));
|
||||
`INST_ALU_AND: `TRACE(level, ("AND"));
|
||||
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"));
|
||||
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
`ALU_TYPE_BRANCH: begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: `TRACE(level, ("BEQ"));
|
||||
`INST_BR_NE: `TRACE(level, ("BNE"));
|
||||
`INST_BR_LT: `TRACE(level, ("BLT"));
|
||||
`INST_BR_GE: `TRACE(level, ("BGE"));
|
||||
`INST_BR_LTU: `TRACE(level, ("BLTU"));
|
||||
`INST_BR_GEU: `TRACE(level, ("BGEU"));
|
||||
`INST_BR_JAL: `TRACE(level, ("JAL"));
|
||||
`INST_BR_JALR: `TRACE(level, ("JALR"));
|
||||
`INST_BR_ECALL: `TRACE(level, ("ECALL"));
|
||||
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"));
|
||||
`INST_BR_URET: `TRACE(level, ("URET"));
|
||||
`INST_BR_SRET: `TRACE(level, ("SRET"));
|
||||
`INST_BR_MRET: `TRACE(level, ("MRET"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`ALU_TYPE_MULDIV: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MULW"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIVW"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVUW"));
|
||||
`INST_M_REM: `TRACE(level, ("REMW"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMUW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MUL"));
|
||||
`INST_M_MULH: `TRACE(level, ("MULH"));
|
||||
`INST_M_MULHSU:`TRACE(level, ("MULHSU"));
|
||||
`INST_M_MULHU: `TRACE(level, ("MULHU"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIV"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVU"));
|
||||
`INST_M_REM: `TRACE(level, ("REM"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`EX_LSU: begin
|
||||
if (op_args.lsu.is_float) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LW: `TRACE(level, ("FLW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("FLD"));
|
||||
`INST_LSU_SW: `TRACE(level, ("FSW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("FSD"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: `TRACE(level, ("LB"));
|
||||
`INST_LSU_LH: `TRACE(level, ("LH"));
|
||||
`INST_LSU_LW: `TRACE(level, ("LW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("LD"));
|
||||
`INST_LSU_LBU:`TRACE(level, ("LBU"));
|
||||
`INST_LSU_LHU:`TRACE(level, ("LHU"));
|
||||
`INST_LSU_LWU:`TRACE(level, ("LWU"));
|
||||
`INST_LSU_SB: `TRACE(level, ("SB"));
|
||||
`INST_LSU_SH: `TRACE(level, ("SH"));
|
||||
`INST_LSU_SW: `TRACE(level, ("SW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("SD"));
|
||||
`INST_LSU_FENCE:`TRACE(level,("FENCE"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_FPU: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FADD.S"));
|
||||
end
|
||||
`INST_FPU_SUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FSUB.S"));
|
||||
end
|
||||
`INST_FPU_MUL: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMUL.D"));
|
||||
else
|
||||
`TRACE(level, ("FMUL.S"));
|
||||
end
|
||||
`INST_FPU_DIV: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FDIV.D"));
|
||||
else
|
||||
`TRACE(level, ("FDIV.S"));
|
||||
end
|
||||
`INST_FPU_SQRT: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FSQRT.D"));
|
||||
else
|
||||
`TRACE(level, ("FSQRT.S"));
|
||||
end
|
||||
`INST_FPU_MADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FMADD.S"));
|
||||
end
|
||||
`INST_FPU_MSUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FMSUB.S"));
|
||||
end
|
||||
`INST_FPU_NMADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FNMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMADD.S"));
|
||||
end
|
||||
`INST_FPU_NMSUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FNMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMSUB.S"));
|
||||
end
|
||||
`INST_FPU_CMP: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.D"));
|
||||
1: `TRACE(level, ("FLT.D"));
|
||||
2: `TRACE(level, ("FEQ.D"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.S"));
|
||||
1: `TRACE(level, ("FLT.S"));
|
||||
2: `TRACE(level, ("FEQ.S"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FCVT.D.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.D"));
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2I: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.D"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2U: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.D"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_I2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.W"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.W"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_U2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.WU"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.WU"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MISC: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.D"));
|
||||
1: `TRACE(level, ("FSGNJN.D"));
|
||||
2: `TRACE(level, ("FSGNJX.D"));
|
||||
3: `TRACE(level, ("FCLASS.D"));
|
||||
4: `TRACE(level, ("FMV.X.D"));
|
||||
5: `TRACE(level, ("FMV.D.X"));
|
||||
6: `TRACE(level, ("FMIN.D"));
|
||||
7: `TRACE(level, ("FMAX.D"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.S"));
|
||||
1: `TRACE(level, ("FSGNJN.S"));
|
||||
2: `TRACE(level, ("FSGNJX.S"));
|
||||
3: `TRACE(level, ("FCLASS.S"));
|
||||
4: `TRACE(level, ("FMV.X.S"));
|
||||
5: `TRACE(level, ("FMV.S.X"));
|
||||
6: `TRACE(level, ("FMIN.S"));
|
||||
7: `TRACE(level, ("FMAX.S"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`EX_SFU: begin
|
||||
case (`INST_SFU_BITS'(op_type))
|
||||
`INST_SFU_TMC: `TRACE(level, ("TMC"));
|
||||
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"));
|
||||
`INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end
|
||||
`INST_SFU_JOIN: `TRACE(level, ("JOIN"));
|
||||
`INST_SFU_BAR: `TRACE(level, ("BAR"));
|
||||
`INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end
|
||||
`INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
|
||||
`INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
|
||||
`INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_op_args(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
`TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm));
|
||||
end
|
||||
`EX_LSU: begin
|
||||
`TRACE(level, (", offset=0x%0h", op_args.lsu.offset));
|
||||
end
|
||||
`EX_FPU: begin
|
||||
`TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm));
|
||||
end
|
||||
`EX_SFU: begin
|
||||
if (`INST_SFU_IS_CSR(op_type)) begin
|
||||
`TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm));
|
||||
end
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
|
||||
case (addr)
|
||||
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"));
|
||||
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"));
|
||||
`VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0"));
|
||||
`VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1"));
|
||||
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
`endif
|
||||
|
||||
endpackage
|
||||
|
||||
`endif // VX_TRACE_PKG_VH
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_wctl_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -22,22 +22,22 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
// Inputs
|
||||
VX_execute_if.slave execute_if,
|
||||
|
||||
|
||||
// Outputs
|
||||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam LANE_BITS = `CLOG2(NUM_LANES);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam WCTL_WIDTH = $bits(tmc_t) + $bits(wspawn_t) + $bits(split_t) + $bits(join_t) + $bits(barrier_t);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + WCTL_WIDTH + PID_WIDTH + 1 + 1;
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + WCTL_WIDTH + PID_WIDTH + 1 + 1 + `DV_STACK_SIZEW;
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
|
||||
tmc_t tmc, tmc_r;
|
||||
wspawn_t wspawn, wspawn_r;
|
||||
wspawn_t wspawn, wspawn_r;
|
||||
split_t split, split_r;
|
||||
join_t sjoin, sjoin_r;
|
||||
barrier_t barrier, barrier_r;
|
||||
|
@ -55,14 +55,16 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
end else begin
|
||||
assign tid = 0;
|
||||
end
|
||||
|
||||
|
||||
wire [`XLEN-1:0] rs1_data = execute_if.data.rs1_data[tid];
|
||||
wire [`XLEN-1:0] rs2_data = execute_if.data.rs2_data[tid];
|
||||
`UNUSED_VAR (rs1_data)
|
||||
|
||||
|
||||
wire not_pred = execute_if.data.op_args.wctl.is_neg;
|
||||
|
||||
wire [NUM_LANES-1:0] taken;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign taken[i] = execute_if.data.rs1_data[i][0];
|
||||
assign taken[i] = (execute_if.data.rs1_data[i][0] ^ not_pred);
|
||||
end
|
||||
|
||||
reg [`NUM_THREADS-1:0] then_tmask_r, then_tmask_n;
|
||||
|
@ -93,17 +95,27 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
assign tmc.tmask = is_pred ? pred_mask : rs1_data[`NUM_THREADS-1:0];
|
||||
|
||||
// split
|
||||
|
||||
|
||||
wire [`CLOG2(`NUM_THREADS+1)-1:0] then_tmask_cnt, else_tmask_cnt;
|
||||
`POP_COUNT(then_tmask_cnt, then_tmask_n);
|
||||
`POP_COUNT(else_tmask_cnt, else_tmask_n);
|
||||
wire then_first = (then_tmask_cnt >= else_tmask_cnt);
|
||||
wire [`NUM_THREADS-1:0] taken_tmask = then_first ? then_tmask_n : else_tmask_n;
|
||||
wire [`NUM_THREADS-1:0] ntaken_tmask = then_first ? else_tmask_n : then_tmask_n;
|
||||
|
||||
assign split.valid = is_split;
|
||||
assign split.is_dvg = has_then && has_else;
|
||||
assign split.then_tmask = then_tmask_n;
|
||||
assign split.else_tmask = else_tmask_n;
|
||||
assign split.next_pc = execute_if.data.PC + 4;
|
||||
assign split.then_tmask = taken_tmask;
|
||||
assign split.else_tmask = ntaken_tmask;
|
||||
assign split.next_pc = execute_if.data.PC + `PC_BITS'(2);
|
||||
|
||||
assign warp_ctl_if.dvstack_wid = execute_if.data.wid;
|
||||
wire [`DV_STACK_SIZEW-1:0] dvstack_ptr;
|
||||
|
||||
// join
|
||||
|
||||
assign sjoin.valid = is_join;
|
||||
assign sjoin.is_dvg = rs1_data[0];
|
||||
assign sjoin.valid = is_join;
|
||||
assign sjoin.stack_ptr = rs1_data[`DV_STACK_SIZEW-1:0];
|
||||
|
||||
// barrier
|
||||
assign barrier.valid = is_bar;
|
||||
|
@ -114,6 +126,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
assign barrier.is_global = 1'b0;
|
||||
`endif
|
||||
assign barrier.size_m1 = rs2_data[$bits(barrier.size_m1)-1:0] - $bits(barrier.size_m1)'(1);
|
||||
assign barrier.is_noop = (rs2_data[$bits(barrier.size_m1)-1:0] == $bits(barrier.size_m1)'(1));
|
||||
|
||||
// wspawn
|
||||
|
||||
|
@ -123,10 +136,10 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
assign wspawn.valid = is_wspawn;
|
||||
assign wspawn.wmask = wspawn_wmask;
|
||||
assign wspawn.pc = rs2_data;
|
||||
assign wspawn.pc = rs2_data[1 +: `PC_BITS];
|
||||
|
||||
// response
|
||||
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2)
|
||||
|
@ -135,8 +148,8 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
.reset (reset),
|
||||
.valid_in (execute_if.valid),
|
||||
.ready_in (execute_if.ready),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, {tmc, wspawn, split, sjoin, barrier}}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, {tmc_r, wspawn_r, split_r, sjoin_r, barrier_r}}),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, {tmc, wspawn, split, sjoin, barrier}, warp_ctl_if.dvstack_ptr}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, {tmc_r, wspawn_r, split_r, sjoin_r, barrier_r}, dvstack_ptr}),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready)
|
||||
);
|
||||
|
@ -148,9 +161,9 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
assign warp_ctl_if.split = split_r;
|
||||
assign warp_ctl_if.sjoin = sjoin_r;
|
||||
assign warp_ctl_if.barrier = barrier_r;
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign commit_if.data.data[i] = `XLEN'(split_r.is_dvg);
|
||||
assign commit_if.data.data[i] = `XLEN'(dvstack_ptr);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue