mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'mranduril-vortex_vm_rebased' into vortex_vm
This commit is contained in:
commit
35c15f554d
186 changed files with 36003 additions and 4008 deletions
270
.github/workflows/ci.yml
vendored
Normal file
270
.github/workflows/ci.yml
vendored
Normal file
|
@ -0,0 +1,270 @@
|
|||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: CI
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
runs-on: ubuntu-20.04
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-toolchain-
|
||||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Install Dependencies
|
||||
if: steps.cache-toolchain.outputs.cache-hit != 'true' || steps.cache-thirdparty.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
sudo bash ./ci/system_updates.sh
|
||||
|
||||
- name: Setup Toolchain
|
||||
if: steps.cache-toolchain.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
TOOLDIR=$PWD/tools
|
||||
mkdir -p build
|
||||
cd build
|
||||
../configure --tooldir=$TOOLDIR
|
||||
ci/toolchain_install.sh --all
|
||||
|
||||
- name: Setup Third Party
|
||||
if: steps.cache-thirdparty.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make -C third_party > /dev/null
|
||||
|
||||
# build:
|
||||
# runs-on: ubuntu-20.04
|
||||
# needs: setup
|
||||
# strategy:
|
||||
# matrix:
|
||||
# xlen: [32, 64]
|
||||
|
||||
# steps:
|
||||
# - name: Checkout code
|
||||
# uses: actions/checkout@v2
|
||||
|
||||
# - name: Install Dependencies
|
||||
# run: |
|
||||
# sudo bash ./ci/system_updates.sh
|
||||
|
||||
# - name: Cache Toolchain Directory
|
||||
# id: cache-toolchain
|
||||
# uses: actions/cache@v2
|
||||
# with:
|
||||
# path: tools
|
||||
# key: ${{ runner.os }}-toolchain-v0.1
|
||||
# restore-keys: |
|
||||
# ${{ runner.os }}-toolchain-
|
||||
|
||||
# - name: Cache Third Party Directory
|
||||
# id: cache-thirdparty
|
||||
# uses: actions/cache@v2
|
||||
# with:
|
||||
# path: third_party
|
||||
# key: ${{ runner.os }}-thirdparty-v0.1
|
||||
# restore-keys: |
|
||||
# ${{ runner.os }}-thirdparty-
|
||||
|
||||
# - name: Run Build
|
||||
# run: |
|
||||
# TOOLDIR=$PWD/tools
|
||||
# mkdir -p build${{ matrix.xlen }}
|
||||
# cd build${{ matrix.xlen }}
|
||||
# ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }}
|
||||
# source ci/toolchain_env.sh
|
||||
# make software -s > /dev/null
|
||||
# make tests -s > /dev/null
|
||||
|
||||
# - name: Upload Build Artifact
|
||||
# uses: actions/upload-artifact@v2
|
||||
# with:
|
||||
# name: build-${{ matrix.xlen }}
|
||||
# path: build${{ matrix.xlen }}
|
||||
|
||||
# tests:
|
||||
# runs-on: ubuntu-20.04
|
||||
# needs: build
|
||||
# strategy:
|
||||
# matrix:
|
||||
# name: [regression, opencl, config1, config2, debug, stress]
|
||||
# xlen: [32, 64]
|
||||
|
||||
# steps:
|
||||
# - name: Checkout code
|
||||
# uses: actions/checkout@v2
|
||||
|
||||
# - name: Install Dependencies
|
||||
# run: |
|
||||
# sudo bash ./ci/system_updates.sh
|
||||
|
||||
# - name: Cache Toolchain Directory
|
||||
# id: cache-toolchain
|
||||
# uses: actions/cache@v2
|
||||
# with:
|
||||
# path: tools
|
||||
# key: ${{ runner.os }}-toolchain-v0.1
|
||||
# restore-keys: |
|
||||
# ${{ runner.os }}-toolchain-
|
||||
|
||||
# - name: Cache Third Party Directory
|
||||
# id: cache-thirdparty
|
||||
# uses: actions/cache@v2
|
||||
# with:
|
||||
# path: third_party
|
||||
# key: ${{ runner.os }}-thirdparty-v0.1
|
||||
# restore-keys: |
|
||||
# ${{ runner.os }}-thirdparty-
|
||||
|
||||
# - name: Download Build Artifact
|
||||
# uses: actions/download-artifact@v2
|
||||
# with:
|
||||
# name: build-${{ matrix.xlen }}
|
||||
# path: build${{ matrix.xlen }}
|
||||
|
||||
# - name: Run tests
|
||||
# run: |
|
||||
# cd build${{ matrix.xlen }}
|
||||
# source ci/toolchain_env.sh
|
||||
# chmod -R +x . # Ensure all files have executable permissions
|
||||
# if [ "${{ matrix.name }}" == "regression" ]; then
|
||||
# ./ci/regression.sh --unittest
|
||||
# ./ci/regression.sh --isa
|
||||
# ./ci/regression.sh --kernel
|
||||
# ./ci/regression.sh --synthesis
|
||||
# ./ci/regression.sh --regression
|
||||
# else
|
||||
# ./ci/regression.sh --${{ matrix.name }}
|
||||
# fi
|
||||
|
||||
build_vm:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: setup
|
||||
strategy:
|
||||
matrix:
|
||||
xlen: [32, 64]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo bash ./ci/system_updates.sh
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-toolchain-
|
||||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Run Build
|
||||
run: |
|
||||
TOOLDIR=$PWD/tools
|
||||
mkdir -p build${{ matrix.xlen }}-vm
|
||||
cd build${{ matrix.xlen }}-vm
|
||||
../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} --vm_enable=1
|
||||
source ci/toolchain_env.sh
|
||||
make software -s > /dev/null
|
||||
make tests -s > /dev/null
|
||||
|
||||
- name: Upload Build Artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: build-${{ matrix.xlen }}-vm
|
||||
path: build${{ matrix.xlen }}-vm
|
||||
|
||||
test_vm:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: build_vm
|
||||
strategy:
|
||||
matrix:
|
||||
xlen: [32, 64]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo bash ./ci/system_updates.sh
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-toolchain-
|
||||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Download Build Artifact
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: build-${{ matrix.xlen }}-vm
|
||||
path: build${{ matrix.xlen }}-vm
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
cd build${{ matrix.xlen }}-vm
|
||||
source ci/toolchain_env.sh
|
||||
chmod -R +x . # Ensure all files have executable permissions
|
||||
./ci/regression.sh --vm
|
||||
|
||||
complete:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: test_vm
|
||||
|
||||
steps:
|
||||
- name: Check Completion
|
||||
run: echo "All matrix jobs passed"
|
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
|||
/build*
|
||||
/.vscode
|
||||
*.cache
|
||||
*.code-workspace
|
||||
|
|
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -6,5 +6,4 @@
|
|||
url = https://github.com/ucb-bar/berkeley-softfloat-3.git
|
||||
[submodule "third_party/ramulator"]
|
||||
path = third_party/ramulator
|
||||
url = https://github.com/CMU-SAFARI/ramulator.git
|
||||
ignore = dirty
|
||||
url = https://github.com/CMU-SAFARI/ramulator2.git
|
||||
|
|
118
.travis.yml
118
.travis.yml
|
@ -1,118 +0,0 @@
|
|||
language: cpp
|
||||
dist: focal
|
||||
os: linux
|
||||
compiler: gcc
|
||||
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- build-essential
|
||||
- valgrind
|
||||
- libstdc++6
|
||||
- binutils
|
||||
- python
|
||||
- uuid-dev
|
||||
|
||||
env:
|
||||
global:
|
||||
- TOOLDIR=$HOME/tools
|
||||
|
||||
cache:
|
||||
directories:
|
||||
- $TOOLDIR
|
||||
- $HOME/third_party
|
||||
- $HOME/build32
|
||||
- $HOME/build64
|
||||
|
||||
before_install:
|
||||
- if [ ! -d "$TOOLDIR" ] || [ -z "$(ls -A $TOOLDIR)" ] || [ "$(cat "$TOOLDIR/version.txt")" != "v0.4" ]; then
|
||||
rm -rf $TOOLDIR;
|
||||
mkdir -p $TRAVIS_BUILD_DIR/build && cd $TRAVIS_BUILD_DIR/build;
|
||||
../configure --tooldir=$TOOLDIR;
|
||||
ci/toolchain_install.sh --all;
|
||||
echo "v0.3" > "$TOOLDIR/version.txt";
|
||||
else
|
||||
echo "using existing tooldir build";
|
||||
fi
|
||||
- if [ ! -d "$HOME/third_party" ] || [ -z "$(ls -A $HOME/third_party)" ] || [ "$(cat "$HOME/third_party/version.txt")" != "v0.2" ]; then
|
||||
cd $TRAVIS_BUILD_DIR;
|
||||
make -C third_party > /dev/null;
|
||||
echo "v0.2" > "third_party/version.txt";
|
||||
cp -rf third_party $HOME;
|
||||
else
|
||||
echo "using existing third_party build";
|
||||
cp -rf $HOME/third_party $TRAVIS_BUILD_DIR;
|
||||
fi
|
||||
|
||||
install:
|
||||
- if [ ! -d "$HOME/build$XLEN" ] || [ -z "$(ls -A $HOME/build$XLEN)" ] || [ "$(cat "$HOME/build$XLEN/version.txt")" != "$TRAVIS_COMMIT" ]; then
|
||||
mkdir -p $TRAVIS_BUILD_DIR/build$XLEN && cd $TRAVIS_BUILD_DIR/build$XLEN;
|
||||
../configure --tooldir=$TOOLDIR --xlen=$XLEN;
|
||||
source ci/toolchain_env.sh;
|
||||
make build -s > /dev/null;
|
||||
echo "$TRAVIS_COMMIT" > version.txt;
|
||||
cp -rf $TRAVIS_BUILD_DIR/build$XLEN $HOME;
|
||||
else
|
||||
echo "using existing build for commit $TRAVIS_COMMIT";
|
||||
cp -rf $HOME/build$XLEN $TRAVIS_BUILD_DIR;
|
||||
fi
|
||||
|
||||
before_script:
|
||||
- cd $TRAVIS_BUILD_DIR/build$XLEN
|
||||
- source ci/toolchain_env.sh
|
||||
|
||||
stages:
|
||||
- test
|
||||
|
||||
jobs:
|
||||
include:
|
||||
- stage: test
|
||||
name: regression32
|
||||
env: XLEN=32
|
||||
script:
|
||||
- ./ci/travis_run.py ./ci/regression.sh --unittest
|
||||
- ./ci/travis_run.py ./ci/regression.sh --isa
|
||||
- ./ci/travis_run.py ./ci/regression.sh --kernel
|
||||
- ./ci/travis_run.py ./ci/regression.sh --synthesis
|
||||
- ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
- ./ci/travis_run.py ./ci/regression.sh --opencl
|
||||
|
||||
- stage: test
|
||||
name: regression64
|
||||
env: XLEN=64
|
||||
script:
|
||||
- ./ci/travis_run.py ./ci/regression.sh --isa
|
||||
- ./ci/travis_run.py ./ci/regression.sh --kernel
|
||||
- ./ci/travis_run.py ./ci/regression.sh --synthesis
|
||||
- ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
- ./ci/travis_run.py ./ci/regression.sh --opencl
|
||||
|
||||
- stage: test
|
||||
name: config
|
||||
env: XLEN=32
|
||||
script:
|
||||
- ./ci/travis_run.py ./ci/regression.sh --cluster
|
||||
- ./ci/travis_run.py ./ci/regression.sh --config
|
||||
|
||||
- stage: test
|
||||
name: debug
|
||||
env: XLEN=32
|
||||
script:
|
||||
- ./ci/travis_run.py ./ci/regression.sh --debug
|
||||
- ./ci/travis_run.py ./ci/regression.sh --stress
|
||||
|
||||
- stage: test
|
||||
name: virtual_memory
|
||||
env: XLEN=32
|
||||
env: VM_DISABLE=1
|
||||
script:
|
||||
- ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
- ./ci/travis_run.py ./ci/regression.sh --opencl
|
||||
|
||||
- stage: test
|
||||
name: virtual_memory
|
||||
env: XLEN=64
|
||||
env: VM_DISABLE=1
|
||||
script:
|
||||
- ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
- ./ci/travis_run.py ./ci/regression.sh --opencl
|
23
Makefile.in
23
Makefile.in
|
@ -1,5 +1,15 @@
|
|||
include config.mk
|
||||
|
||||
.PHONY: build software tests
|
||||
|
||||
vm:
|
||||
$(MAKE) -C $(VORTEX_HOME)/third_party
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C sim simx
|
||||
$(MAKE) -C kernel
|
||||
$(MAKE) -C runtime vm
|
||||
$(MAKE) -C tests
|
||||
|
||||
all:
|
||||
$(MAKE) -C $(VORTEX_HOME)/third_party
|
||||
$(MAKE) -C hw
|
||||
|
@ -15,13 +25,24 @@ build:
|
|||
$(MAKE) -C runtime
|
||||
$(MAKE) -C tests
|
||||
|
||||
clean:
|
||||
software:
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C kernel
|
||||
$(MAKE) -C runtime/stub
|
||||
|
||||
tests:
|
||||
$(MAKE) -C tests
|
||||
|
||||
clean-build:
|
||||
$(MAKE) -C hw clean
|
||||
$(MAKE) -C sim clean
|
||||
$(MAKE) -C kernel clean
|
||||
$(MAKE) -C runtime clean
|
||||
$(MAKE) -C tests clean
|
||||
|
||||
clean: clean-build
|
||||
$(MAKE) -C $(VORTEX_HOME)/third_party clean
|
||||
|
||||
# Install setup
|
||||
KERNEL_INC_DST = $(PREFIX)/kernel/include
|
||||
KERNEL_LIB_DST = $(PREFIX)/kernel/lib$(XLEN)
|
||||
|
|
10
README.md
10
README.md
|
@ -56,7 +56,7 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
|
|||
```
|
||||
### Install Vortex codebase
|
||||
```
|
||||
git clone --depth=1 --recursive git@github.com:vortexgpgpu/vortex.git -b vortex_vm
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git -b vortex_vm
|
||||
cd vortex
|
||||
```
|
||||
|
||||
|
@ -68,18 +68,18 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
|
|||
mkdir out
|
||||
export OUT_DIR=`pwd`/out
|
||||
cd build
|
||||
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-6-14 --prefix=$OUT_DIR
|
||||
# Run the following to disble virtual memory feature in compilation
|
||||
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR
|
||||
# Run the following instead to enable virtual memory feature in compilation
|
||||
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR --vm_enable=1
|
||||
|
||||
### Install prebuilt toolchain
|
||||
# We will use the precomipled tools in volvo toolchanin directory
|
||||
|
||||
### set environment variables
|
||||
# should always run before using the toolchain!
|
||||
source ./ci/toolchain_env.sh
|
||||
|
||||
### Building Vortex
|
||||
make -s
|
||||
|
||||
### Quick demo running vecadd OpenCL kernel on 2 cores
|
||||
$ ./ci/blackbox.sh --cores=2 --app=vecadd
|
||||
|
||||
|
|
|
@ -25,37 +25,6 @@ XLEN=${XLEN:=@XLEN@}
|
|||
|
||||
echo "Vortex Regression Test: XLEN=$XLEN"
|
||||
|
||||
split_file() {
|
||||
if [[ $# -ne 2 ]]; then
|
||||
echo "Usage: $0 <filename> <start_with>"
|
||||
return 1
|
||||
fi
|
||||
input_file="$1"
|
||||
start_with="$2"
|
||||
if [[ ! -r "$input_file" ]]; then
|
||||
echo "Error: File '$input_file' is not readable or does not exist."
|
||||
return 1
|
||||
fi
|
||||
count=0
|
||||
output_file=""
|
||||
while IFS= read -r line; do
|
||||
if [[ $line == $start_with* ]]; then
|
||||
count=$((count + 1))
|
||||
output_file="$input_file.part$count"
|
||||
> "$output_file" # ensure empty
|
||||
fi
|
||||
if [[ -n "$output_file" ]]; then
|
||||
echo "$line" >> "$output_file"
|
||||
fi
|
||||
done < "$input_file"
|
||||
|
||||
if [[ $count -eq 0 ]]; then
|
||||
echo "No lines starting with '$start_with' were found in '$input_file'."
|
||||
fi
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
|
||||
unittest()
|
||||
{
|
||||
make -C tests/unittest run
|
||||
|
@ -66,6 +35,9 @@ isa()
|
|||
{
|
||||
echo "begin isa tests..."
|
||||
|
||||
make -C sim/simx
|
||||
make -C sim/rtlsim
|
||||
|
||||
make -C tests/riscv/isa run-simx
|
||||
make -C tests/riscv/isa run-rtlsim
|
||||
|
||||
|
@ -96,8 +68,8 @@ isa()
|
|||
make -C tests/riscv/isa run-rtlsim-64fx
|
||||
fi
|
||||
|
||||
# restore default prebuilt configuration
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
|
||||
# clean build
|
||||
make -C sim/rtlsim clean
|
||||
|
||||
echo "isa tests done!"
|
||||
}
|
||||
|
@ -106,6 +78,9 @@ kernel()
|
|||
{
|
||||
echo "begin kernel tests..."
|
||||
|
||||
make -C sim/simx
|
||||
make -C sim/rtlsim
|
||||
|
||||
make -C tests/kernel run-simx
|
||||
make -C tests/kernel run-rtlsim
|
||||
|
||||
|
@ -116,6 +91,9 @@ regression()
|
|||
{
|
||||
echo "begin regression tests..."
|
||||
|
||||
make -C runtime/simx
|
||||
make -C runtime/rtlsim
|
||||
|
||||
make -C tests/regression run-simx
|
||||
make -C tests/regression run-rtlsim
|
||||
|
||||
|
@ -134,6 +112,9 @@ opencl()
|
|||
{
|
||||
echo "begin opencl tests..."
|
||||
|
||||
make -C runtime/simx
|
||||
make -C runtime/rtlsim
|
||||
|
||||
make -C tests/opencl run-simx
|
||||
make -C tests/opencl run-rtlsim
|
||||
|
||||
|
@ -143,24 +124,28 @@ opencl()
|
|||
echo "opencl tests done!"
|
||||
}
|
||||
|
||||
cluster()
|
||||
{
|
||||
echo "begin clustering tests..."
|
||||
vm(){
|
||||
echo "begin vm tests..."
|
||||
|
||||
# cores clustering
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1"
|
||||
make -C sim/simx
|
||||
make -C runtime/simx
|
||||
|
||||
# L2/L3
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
make -C tests/kernel run-simx
|
||||
|
||||
# Regression tests
|
||||
make -C tests/regression run-simx
|
||||
|
||||
echo "clustering tests done!"
|
||||
# test global barrier
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
|
||||
# test local barrier
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
|
||||
|
||||
# OpenCL tests
|
||||
make -C tests/opencl run-simx
|
||||
./ci/blackbox.sh --driver=simx --app=lbm --warps=8
|
||||
|
||||
echo "vm tests done!"
|
||||
}
|
||||
|
||||
test_csv_trace()
|
||||
|
@ -170,29 +155,20 @@ test_csv_trace()
|
|||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||
split_file run_simx.log "Running "
|
||||
split_file run_rtlsim.log "Running "
|
||||
for file in ./run_simx.log.part*; do
|
||||
if [[ -f "$file" ]]; then
|
||||
file2="${file//simx/rtlsim}"
|
||||
if [[ -f "$file2" ]]; then
|
||||
./ci/trace_csv.py -tsimx $file -otrace_simx.csv
|
||||
./ci/trace_csv.py -trtlsim $file2 -otrace_rtlsim.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
else
|
||||
echo "File $file2 not found."
|
||||
fi
|
||||
fi
|
||||
done
|
||||
# restore default prebuilt configuration
|
||||
make -C sim/simx clean && make -C sim/simx > /dev/null
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
|
||||
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
# clean build
|
||||
make -C sim/simx clean
|
||||
make -C sim/rtlsim clean
|
||||
}
|
||||
|
||||
debug()
|
||||
{
|
||||
echo "begin debugging tests..."
|
||||
|
||||
test_csv_trace
|
||||
|
||||
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
|
||||
|
@ -200,21 +176,23 @@ debug()
|
|||
echo "debugging tests done!"
|
||||
}
|
||||
|
||||
config()
|
||||
config1()
|
||||
{
|
||||
echo "begin configuration tests..."
|
||||
echo "begin configuration-1 tests..."
|
||||
|
||||
# warp/threads configurations
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --cores=1 --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --cores=1 --warps=8 --threads=16 --app=diverge
|
||||
# warp/threads
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=8 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=8 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge
|
||||
|
||||
# disable DPI
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
|
||||
# cores clustering
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1"
|
||||
|
||||
# issue width
|
||||
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
|
@ -240,6 +218,31 @@ config()
|
|||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
|
||||
# L2/L3
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
|
||||
# multiple L1 caches per socket
|
||||
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=8 --warps=1 --threads=2
|
||||
|
||||
echo "configuration-1 tests done!"
|
||||
}
|
||||
|
||||
config2()
|
||||
{
|
||||
echo "begin configuration-2 tests..."
|
||||
|
||||
# test opaesim
|
||||
./ci/blackbox.sh --driver=opae --app=printf
|
||||
./ci/blackbox.sh --driver=opae --app=diverge
|
||||
|
||||
# disable DPI
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
|
||||
|
||||
# custom program startup address
|
||||
make -C tests/regression/dogfood clean-kernel
|
||||
if [ "$XLEN" == "64" ]; then
|
||||
|
@ -249,55 +252,57 @@ config()
|
|||
fi
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood
|
||||
./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
make -C tests/regression/dogfood clean-kernel
|
||||
|
||||
# disabling M & F extensions
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32i
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
|
||||
make -C sim/rtlsim clean
|
||||
|
||||
# disabling ZICOND extension
|
||||
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
|
||||
|
||||
# disable local memory
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --perf=1
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=demo --perf=1
|
||||
|
||||
# disable L1 cache
|
||||
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
|
||||
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
|
||||
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
|
||||
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
|
||||
|
||||
# multiple L1 caches per socket
|
||||
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=8 --warps=1 --threads=2
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
|
||||
|
||||
# test AXI bus
|
||||
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=demo
|
||||
|
||||
# disable L1 cache
|
||||
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
|
||||
# reduce l1 line size
|
||||
CONFIGS="-DL1_LINE_SIZE=4" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=4" ./ci/blackbox.sh --driver=simx --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=4 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_LINE_SIZE=4 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=simx --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache ways
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test 128-bit MEM block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=demo
|
||||
|
||||
# test single-bank DRAM
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=demo
|
||||
|
||||
# test 27-bit DRAM address
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=demo
|
||||
|
||||
echo "configuration tests done!"
|
||||
echo "configuration-2 tests done!"
|
||||
}
|
||||
|
||||
stress()
|
||||
|
@ -306,9 +311,7 @@ stress()
|
|||
|
||||
# test verilator reset values
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --app=printf
|
||||
./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n128" --l2cache
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache
|
||||
|
||||
echo "stress tests done!"
|
||||
}
|
||||
|
@ -318,7 +321,7 @@ synthesis()
|
|||
echo "begin synthesis tests..."
|
||||
|
||||
PREFIX=build_base make -C hw/syn/yosys clean
|
||||
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys elaborate
|
||||
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys synthesis
|
||||
|
||||
echo "synthesis tests done!"
|
||||
}
|
||||
|
@ -326,7 +329,7 @@ synthesis()
|
|||
show_usage()
|
||||
{
|
||||
echo "Vortex Regression Test"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cluster] [--debug] [--config] [--stress] [--synthesis] [--all] [--h|--help]"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]"
|
||||
}
|
||||
|
||||
start=$SECONDS
|
||||
|
@ -336,6 +339,9 @@ clean=0
|
|||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
--vm )
|
||||
tests+=("vm")
|
||||
;;
|
||||
--clean )
|
||||
clean=1
|
||||
;;
|
||||
|
@ -354,15 +360,15 @@ while [ "$1" != "" ]; do
|
|||
--opencl )
|
||||
tests+=("opencl")
|
||||
;;
|
||||
--cluster )
|
||||
tests+=("cluster")
|
||||
--config1 )
|
||||
tests+=("config1")
|
||||
;;
|
||||
--config2 )
|
||||
tests+=("config2")
|
||||
;;
|
||||
--debug )
|
||||
tests+=("debug")
|
||||
;;
|
||||
--config )
|
||||
tests+=("config")
|
||||
;;
|
||||
--stress )
|
||||
tests+=("stress")
|
||||
;;
|
||||
|
@ -376,9 +382,9 @@ while [ "$1" != "" ]; do
|
|||
tests+=("kernel")
|
||||
tests+=("regression")
|
||||
tests+=("opencl")
|
||||
tests+=("cluster")
|
||||
tests+=("config1")
|
||||
tests+=("config2")
|
||||
tests+=("debug")
|
||||
tests+=("config")
|
||||
tests+=("stress")
|
||||
tests+=("synthesis")
|
||||
;;
|
||||
|
|
27
ci/system_updates.sh
Executable file
27
ci/system_updates.sh
Executable file
|
@ -0,0 +1,27 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -e
|
||||
|
||||
apt-get update -y
|
||||
|
||||
add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
apt-get update
|
||||
apt-get install -y g++-11 gcc-11
|
||||
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100
|
||||
|
||||
apt-get install -y build-essential valgrind libstdc++6 binutils python uuid-dev ccache
|
|
@ -16,8 +16,8 @@
|
|||
|
||||
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
|
||||
|
||||
export VERILATOR_ROOT=$TOOLDIR/verilator
|
||||
export PATH=$VERILATOR_ROOT/bin:$PATH
|
||||
# export VERILATOR_ROOT=$TOOLDIR/verilator
|
||||
# export PATH=$VERILATOR_ROOT/bin:$PATH
|
||||
|
||||
export SV2V_PATH=$TOOLDIR/sv2v
|
||||
export PATH=$SV2V_PATH/bin:$PATH
|
||||
|
|
310
ci/trace_csv.py
310
ci/trace_csv.py
|
@ -26,7 +26,7 @@ def parse_args():
|
|||
parser.add_argument('log', help='Input log file')
|
||||
return parser.parse_args()
|
||||
|
||||
def parse_simx(log_filename):
|
||||
def parse_simx(log_lines):
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
|
||||
opcode_pattern = r"Instr 0x[0-9a-fA-F]+: ([0-9a-zA-Z_\.]+)"
|
||||
|
@ -37,32 +37,31 @@ def parse_simx(log_filename):
|
|||
destination_pattern = r"Dest Reg: (.+)"
|
||||
uuid_pattern = r"#(\d+)"
|
||||
entries = []
|
||||
with open(log_filename, 'r') as log_file:
|
||||
instr_data = None
|
||||
for lineno, line in enumerate(log_file, start=1):
|
||||
try:
|
||||
if line.startswith("DEBUG Fetch:"):
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
instr_data = {}
|
||||
instr_data["lineno"] = lineno
|
||||
instr_data["PC"] = re.search(pc_pattern, line).group(1)
|
||||
instr_data["core_id"] = re.search(core_id_pattern, line).group(1)
|
||||
instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1)
|
||||
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
|
||||
instr_data["uuid"] = re.search(uuid_pattern, line).group(1)
|
||||
elif line.startswith("DEBUG Instr"):
|
||||
instr_data["instr"] = re.search(instr_pattern, line).group(1)
|
||||
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
|
||||
elif line.startswith("DEBUG Src"):
|
||||
src_reg = re.search(operands_pattern, line).group(1)
|
||||
instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg
|
||||
elif line.startswith("DEBUG Dest"):
|
||||
instr_data["destination"] = re.search(destination_pattern, line).group(1)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
instr_data = None
|
||||
for lineno, line in enumerate(log_lines, start=1):
|
||||
try:
|
||||
if line.startswith("DEBUG Fetch:"):
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
instr_data = {}
|
||||
instr_data["lineno"] = lineno
|
||||
instr_data["PC"] = re.search(pc_pattern, line).group(1)
|
||||
instr_data["core_id"] = re.search(core_id_pattern, line).group(1)
|
||||
instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1)
|
||||
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
|
||||
instr_data["uuid"] = re.search(uuid_pattern, line).group(1)
|
||||
elif line.startswith("DEBUG Instr"):
|
||||
instr_data["instr"] = re.search(instr_pattern, line).group(1)
|
||||
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
|
||||
elif line.startswith("DEBUG Src"):
|
||||
src_reg = re.search(operands_pattern, line).group(1)
|
||||
instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg
|
||||
elif line.startswith("DEBUG Dest"):
|
||||
instr_data["destination"] = re.search(destination_pattern, line).group(1)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
return entries
|
||||
|
||||
def reverse_binary(bin_str):
|
||||
|
@ -95,8 +94,9 @@ def append_value(text, reg, value, tmask_arr, sep):
|
|||
text += "}"
|
||||
return text, sep
|
||||
|
||||
def parse_rtlsim(log_filename):
|
||||
line_pattern = r"\d+: core(\d+)-(decode|issue|commit)"
|
||||
def parse_rtlsim(log_lines):
|
||||
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=(\d+), num_barriers=(\d+)"
|
||||
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
|
||||
ex_pattern = r"ex=([a-zA-Z]+)"
|
||||
|
@ -116,124 +116,166 @@ def parse_rtlsim(log_filename):
|
|||
eop_pattern = r"eop=(\d)"
|
||||
uuid_pattern = r"#(\d+)"
|
||||
entries = []
|
||||
with open(log_filename, 'r') as log_file:
|
||||
instr_data = {}
|
||||
for lineno, line in enumerate(log_file, start=1):
|
||||
try:
|
||||
line_match = re.search(line_pattern, line)
|
||||
if line_match:
|
||||
PC = re.search(pc_pattern, line).group(1)
|
||||
warp_id = re.search(warp_id_pattern, line).group(1)
|
||||
tmask = re.search(tmask_pattern, line).group(1)
|
||||
uuid = re.search(uuid_pattern, line).group(1)
|
||||
core_id = line_match.group(1)
|
||||
stage = line_match.group(2)
|
||||
if stage == "decode":
|
||||
trace = {}
|
||||
trace["uuid"] = uuid
|
||||
trace["PC"] = PC
|
||||
trace["core_id"] = core_id
|
||||
trace["warp_id"] = warp_id
|
||||
trace["tmask"] = reverse_binary(tmask)
|
||||
trace["instr"] = re.search(instr_pattern, line).group(1)
|
||||
trace["opcode"] = re.search(op_pattern, line).group(1)
|
||||
trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1))
|
||||
trace["rd"] = re.search(rd_pattern, line).group(1)
|
||||
trace["rs1"] = re.search(rs1_pattern, line).group(1)
|
||||
trace["rs2"] = re.search(rs2_pattern, line).group(1)
|
||||
trace["rs3"] = re.search(rs3_pattern, line).group(1)
|
||||
instr_data = {}
|
||||
num_threads = 0
|
||||
num_warps = 0
|
||||
num_cores = 0
|
||||
num_clusters = 0
|
||||
socket_size = 0
|
||||
local_mem_base = 0
|
||||
num_barriers = 0
|
||||
num_sockets = 0
|
||||
for lineno, line in enumerate(log_lines, start=1):
|
||||
try:
|
||||
config_match = re.search(config_pattern, line)
|
||||
if config_match:
|
||||
num_threads = int(config_match.group(1))
|
||||
num_warps = int(config_match.group(2))
|
||||
num_cores = int(config_match.group(3))
|
||||
num_clusters = int(config_match.group(4))
|
||||
socket_size = int(config_match.group(5))
|
||||
local_mem_base = int(config_match.group(6))
|
||||
num_barriers = int(config_match.group(7))
|
||||
num_sockets = (num_cores + socket_size - 1) // socket_size
|
||||
continue
|
||||
line_match = re.search(line_pattern, line)
|
||||
if line_match:
|
||||
PC = re.search(pc_pattern, line).group(1)
|
||||
warp_id = re.search(warp_id_pattern, line).group(1)
|
||||
tmask = re.search(tmask_pattern, line).group(1)
|
||||
uuid = re.search(uuid_pattern, line).group(1)
|
||||
cluster_id = line_match.group(1)
|
||||
socket_id = line_match.group(2)
|
||||
core_id = line_match.group(3)
|
||||
stage = line_match.group(4)
|
||||
if stage == "decode":
|
||||
trace = {}
|
||||
trace["uuid"] = uuid
|
||||
trace["PC"] = PC
|
||||
trace["core_id"] = ((((cluster_id * num_sockets) + socket_id) * socket_size) + core_id)
|
||||
trace["warp_id"] = warp_id
|
||||
trace["tmask"] = reverse_binary(tmask)
|
||||
trace["instr"] = re.search(instr_pattern, line).group(1)
|
||||
trace["opcode"] = re.search(op_pattern, line).group(1)
|
||||
trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1))
|
||||
trace["rd"] = re.search(rd_pattern, line).group(1)
|
||||
trace["rs1"] = re.search(rs1_pattern, line).group(1)
|
||||
trace["rs2"] = re.search(rs2_pattern, line).group(1)
|
||||
trace["rs3"] = re.search(rs3_pattern, line).group(1)
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "issue":
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
trace["lineno"] = lineno
|
||||
opds = trace["opds"]
|
||||
if opds[1]:
|
||||
trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if opds[2]:
|
||||
trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if opds[3]:
|
||||
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
|
||||
trace["issued"] = True
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "issue":
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
trace["lineno"] = lineno
|
||||
elif stage == "commit":
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
if "issued" in trace:
|
||||
opds = trace["opds"]
|
||||
if opds[1]:
|
||||
trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if opds[2]:
|
||||
trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if opds[3]:
|
||||
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
|
||||
trace["issued"] = True
|
||||
dst_tmask_arr = bin_to_array(tmask)[::-1]
|
||||
wb = re.search(wb_pattern, line).group(1) == "1"
|
||||
if wb:
|
||||
rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if 'rd_data' in trace:
|
||||
merged_rd_data = trace['rd_data']
|
||||
for i in range(len(dst_tmask_arr)):
|
||||
if dst_tmask_arr[i] == 1:
|
||||
merged_rd_data[i] = rd_data[i]
|
||||
trace['rd_data'] = merged_rd_data
|
||||
else:
|
||||
trace['rd_data'] = rd_data
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "commit":
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
if "issued" in trace:
|
||||
opds = trace["opds"]
|
||||
dst_tmask_arr = bin_to_array(tmask)[::-1]
|
||||
wb = re.search(wb_pattern, line).group(1) == "1"
|
||||
eop = re.search(eop_pattern, line).group(1) == "1"
|
||||
if eop:
|
||||
tmask_arr = bin_to_array(trace["tmask"])
|
||||
destination = ''
|
||||
if wb:
|
||||
rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if 'rd_data' in trace:
|
||||
merged_rd_data = trace['rd_data']
|
||||
for i in range(len(dst_tmask_arr)):
|
||||
if dst_tmask_arr[i] == 1:
|
||||
merged_rd_data[i] = rd_data[i]
|
||||
trace['rd_data'] = merged_rd_data
|
||||
else:
|
||||
trace['rd_data'] = rd_data
|
||||
instr_data[uuid] = trace
|
||||
eop = re.search(eop_pattern, line).group(1) == "1"
|
||||
if eop:
|
||||
tmask_arr = bin_to_array(trace["tmask"])
|
||||
destination = ''
|
||||
if wb:
|
||||
destination, sep = append_value(destination, trace["rd"], trace['rd_data'], tmask_arr, False)
|
||||
del trace['rd_data']
|
||||
trace["destination"] = destination
|
||||
operands = ''
|
||||
sep = False
|
||||
if opds[1]:
|
||||
operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep)
|
||||
del trace["rs1_data"]
|
||||
if opds[2]:
|
||||
operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep)
|
||||
del trace["rs2_data"]
|
||||
if opds[3]:
|
||||
operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
|
||||
del trace["rs3_data"]
|
||||
trace["operands"] = operands
|
||||
del trace["opds"]
|
||||
del trace["rd"]
|
||||
del trace["rs1"]
|
||||
del trace["rs2"]
|
||||
del trace["rs3"]
|
||||
del trace["issued"]
|
||||
del instr_data[uuid]
|
||||
entries.append(trace)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
destination, sep = append_value(destination, trace["rd"], trace['rd_data'], tmask_arr, False)
|
||||
del trace['rd_data']
|
||||
trace["destination"] = destination
|
||||
operands = ''
|
||||
sep = False
|
||||
if opds[1]:
|
||||
operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep)
|
||||
del trace["rs1_data"]
|
||||
if opds[2]:
|
||||
operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep)
|
||||
del trace["rs2_data"]
|
||||
if opds[3]:
|
||||
operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
|
||||
del trace["rs3_data"]
|
||||
trace["operands"] = operands
|
||||
del trace["opds"]
|
||||
del trace["rd"]
|
||||
del trace["rs1"]
|
||||
del trace["rs2"]
|
||||
del trace["rs3"]
|
||||
del trace["issued"]
|
||||
del instr_data[uuid]
|
||||
entries.append(trace)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
return entries
|
||||
|
||||
def write_csv(log_filename, csv_filename, log_type):
|
||||
entries = None
|
||||
|
||||
# parse log file
|
||||
if log_type == "rtlsim":
|
||||
entries = parse_rtlsim(log_filename)
|
||||
elif log_type == "simx":
|
||||
entries = parse_simx(log_filename)
|
||||
else:
|
||||
print('Error: invalid log type')
|
||||
sys.exit()
|
||||
|
||||
# sort entries by uuid
|
||||
entries.sort(key=lambda x: (int(x['uuid'])))
|
||||
for entry in entries:
|
||||
del entry['lineno']
|
||||
|
||||
# write to CSV
|
||||
def write_csv(sublogs, csv_filename, log_type):
|
||||
with open(csv_filename, 'w', newline='') as csv_file:
|
||||
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"]
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for entry in entries:
|
||||
writer.writerow(entry)
|
||||
|
||||
for sublog in sublogs:
|
||||
entries = None
|
||||
|
||||
# parse sublog
|
||||
if log_type == "rtlsim":
|
||||
entries = parse_rtlsim(sublog)
|
||||
elif log_type == "simx":
|
||||
entries = parse_simx(sublog)
|
||||
else:
|
||||
print('Error: invalid log type')
|
||||
sys.exit()
|
||||
|
||||
# sort entries by uuid
|
||||
entries.sort(key=lambda x: (int(x['uuid'])))
|
||||
for entry in entries:
|
||||
del entry['lineno']
|
||||
|
||||
for entry in entries:
|
||||
writer.writerow(entry)
|
||||
|
||||
def split_log_file(log_filename):
|
||||
with open(log_filename, 'r') as log_file:
|
||||
log_lines = log_file.readlines()
|
||||
|
||||
sublogs = []
|
||||
current_sublog = None
|
||||
|
||||
for line in log_lines:
|
||||
if line.startswith("[VXDRV] START"):
|
||||
if current_sublog is not None:
|
||||
sublogs.append(current_sublog)
|
||||
current_sublog = [line]
|
||||
elif current_sublog is not None:
|
||||
current_sublog.append(line)
|
||||
|
||||
if current_sublog is not None:
|
||||
sublogs.append(current_sublog)
|
||||
|
||||
return sublogs
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
write_csv(args.log, args.csv, args.type)
|
||||
sublogs = split_log_file(args.log)
|
||||
write_csv(sublogs, args.csv, args.type)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -32,4 +32,8 @@ RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
|
|||
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
|
||||
|
||||
VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime
|
||||
VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel
|
||||
VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel
|
||||
|
||||
THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party
|
||||
|
||||
VM_ENABLE ?= @VM_ENABLE@
|
15
configure
vendored
15
configure
vendored
|
@ -63,7 +63,7 @@ copy_files() {
|
|||
filename_no_ext="${filename%.in}"
|
||||
dest_file="$dest_dir/$filename_no_ext"
|
||||
mkdir -p "$dest_dir"
|
||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g" "$file" > "$dest_file"
|
||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file"
|
||||
# apply permissions to bash scripts
|
||||
read -r firstline < "$dest_file"
|
||||
if [[ "$firstline" =~ ^#!.*bash ]]; then
|
||||
|
@ -111,9 +111,10 @@ copy_files() {
|
|||
|
||||
# default configuration parameters
|
||||
default_xlen=32
|
||||
default_tooldir=/opt
|
||||
default_tooldir=$HOME/tools
|
||||
default_osversion=$(detect_osversion)
|
||||
default_prefix=$CURRENT_DIR
|
||||
default_vm=0
|
||||
|
||||
# load default configuration parameters from existing config.mk
|
||||
if [ -f "config.mk" ]; then
|
||||
|
@ -126,6 +127,7 @@ if [ -f "config.mk" ]; then
|
|||
TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;;
|
||||
OSVERSION\ ?*) default_osversion=${value//\?=/} ;;
|
||||
PREFIX\ ?*) default_prefix=${value//\?=/} ;;
|
||||
VM_ENABLE\ ?*) default_vm=${value//\?=/} ;;
|
||||
esac
|
||||
done < config.mk
|
||||
fi
|
||||
|
@ -135,14 +137,16 @@ XLEN=${XLEN:=$default_xlen}
|
|||
TOOLDIR=${TOOLDIR:=$default_tooldir}
|
||||
OSVERSION=${OSVERSION:=$default_osversion}
|
||||
PREFIX=${PREFIX:=$default_prefix}
|
||||
VM_ENABLE=${VM_ENABLE:=$default_vm}
|
||||
|
||||
# parse command line arguments
|
||||
usage() {
|
||||
echo "Usage: $0 [--xlen=<value>] [--tooldir=<path>] [--osversion=<version>]"
|
||||
echo " --xlen=<value> Set the XLEN value (default: 32)"
|
||||
echo " --tooldir=<path> Set the TOOLDIR path (default: /opt)"
|
||||
echo " --osversion=<version> Set the OS Version (default: $(detect_os))"
|
||||
echo " --tooldir=<path> Set the TOOLDIR path (default: $HOME/tools)"
|
||||
echo " --osversion=<version> Set the OS Version (default: $(detect_osversion))"
|
||||
echo " --prefix=<path> Set installation directory"
|
||||
echo " --vm_enable=<value> Enable Virtual Memory support (default: 0)"
|
||||
exit 1
|
||||
}
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
|
@ -151,6 +155,7 @@ while [[ "$#" -gt 0 ]]; do
|
|||
--tooldir=*) TOOLDIR="${1#*=}" ;;
|
||||
--osversion=*) OSVERSION="${1#*=}" ;;
|
||||
--prefix=*) PREFIX="${1#*=}" ;;
|
||||
--vm_enable=*) VM_ENABLE="${1#*=}" ;;
|
||||
-h|--help) usage ;;
|
||||
*) echo "Unknown parameter passed: $1"; usage ;;
|
||||
esac
|
||||
|
@ -172,3 +177,5 @@ SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
|||
THIRD_PARTY_DIR=$SCRIPT_DIR/third_party
|
||||
|
||||
copy_files "$SCRIPT_DIR" "$CURRENT_DIR"
|
||||
|
||||
echo "VM Enable: "$VM_ENABLE
|
79
docs/altera_fpga_guide.md
Normal file
79
docs/altera_fpga_guide.md
Normal file
|
@ -0,0 +1,79 @@
|
|||
# FPGA Startup and Configuration Guide
|
||||
|
||||
OPAE Environment Setup
|
||||
----------------------
|
||||
|
||||
$ source /opt/inteldevstack/init_env_user.sh
|
||||
$ export OPAE_HOME=/opt/opae/1.1.2
|
||||
$ export PATH=$OPAE_HOME/bin:$PATH
|
||||
$ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH
|
||||
$ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH
|
||||
$ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH
|
||||
|
||||
OPAE Build
|
||||
------------------
|
||||
|
||||
The FPGA has to following configuration options:
|
||||
- DEVICE_FAMILY=arria10 | stratix10
|
||||
- NUM_CORES=#n
|
||||
|
||||
Command line:
|
||||
|
||||
$ cd hw/syn/altera/opae
|
||||
$ PREFIX=test1 TARGET=fpga NUM_CORES=4 make
|
||||
|
||||
A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete.
|
||||
Setting TARGET=ase will build the project for simulation using Intel ASE.
|
||||
|
||||
|
||||
OPAE Build Configuration
|
||||
------------------------
|
||||
|
||||
The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured:
|
||||
- `NUM_WARPS`: Number of warps per cores
|
||||
- `NUM_THREADS`: Number of threads per warps
|
||||
- `PERF_ENABLE`: enable the use of all profile counters
|
||||
|
||||
You configure the syntesis build from the command line:
|
||||
|
||||
$ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make
|
||||
|
||||
OPAE Build Progress
|
||||
-------------------
|
||||
|
||||
You could check the last 10 lines in the build log for possible errors until build completion.
|
||||
|
||||
$ tail -n 10 <build_dir>/build.log
|
||||
|
||||
Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs.
|
||||
|
||||
$ ps -u <username>
|
||||
|
||||
If the build fails and you need to restart it, clean up the build folder using the following command:
|
||||
|
||||
$ make clean
|
||||
|
||||
The bitstream file `vortex_afu.gbs` should exist when the build is done:
|
||||
|
||||
$ ls -lsa <build_dir>/synth/vortex_afu.gbs
|
||||
|
||||
|
||||
Signing the bitstream and Programming the FPGA
|
||||
----------------------------------------------
|
||||
|
||||
$ cd <build_dir>
|
||||
$ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs
|
||||
$ fpgasupdate vortex_afu_unsigned_ssl.gbs
|
||||
|
||||
Sample FPGA Run Test
|
||||
--------------------
|
||||
|
||||
Ensure you have the correct opae runtime for the FPGA target
|
||||
|
||||
$ make -C runtime/opae clean
|
||||
$ TARGET=FPGA make -C runtime/opae
|
||||
|
||||
Run the following from your Vortex build directory
|
||||
|
||||
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"
|
||||
|
|
@ -7,7 +7,8 @@
|
|||
- [Cache Subsystem](cache_subsystem.md)
|
||||
- [Software](software.md)
|
||||
- [Simulation](simulation.md)
|
||||
- [FPGA Setup Guide](fpga_setup.md)
|
||||
- [Altera FPGA Setup Guide](altera_fpga_guide.md)
|
||||
- [Xilinx FPGA Setup Guide](xilinx_fpga_guide.md)
|
||||
- [Debugging](debugging.md)
|
||||
- [Useful Links](references.md)
|
||||
|
||||
|
@ -27,6 +28,6 @@ Running Vortex simulators with different configurations:
|
|||
|
||||
$ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo
|
||||
|
||||
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
|
||||
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
|
||||
|
||||
$ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood
|
||||
|
|
36
docs/xilinx_fpga_guide.md
Normal file
36
docs/xilinx_fpga_guide.md
Normal file
|
@ -0,0 +1,36 @@
|
|||
# FPGA Startup and Configuration Guide
|
||||
|
||||
XRT Environment Setup
|
||||
----------------------
|
||||
|
||||
$ source /opt/xilinx/Vitis/2023.1/settings64.sh
|
||||
$ source /opt/xilinx/xrt/setup.sh
|
||||
|
||||
|
||||
Check Installed FPGA Platforms
|
||||
------------------------------
|
||||
|
||||
$ platforminfo -l
|
||||
|
||||
|
||||
Build FPGA image
|
||||
----------------
|
||||
|
||||
$ cd hw/syn/xilinx/xrt
|
||||
$ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make
|
||||
|
||||
Will run the synthesis under new build directory: BUILD_DIR := "\<PREFIX>\_\<PLATFORM>\_\<TARGET>"
|
||||
|
||||
The generated bitstream will be located under <BUILD_DIR>/bin/vortex_afu.xclbin
|
||||
|
||||
Sample FPGA Run Test
|
||||
--------------------
|
||||
|
||||
Ensure you have the correct opae runtime for the FPGA target
|
||||
|
||||
$ make -C runtime/xrt clean
|
||||
$ TARGET=hw make -C runtime/xrt
|
||||
|
||||
Run the following from your Vortex build directory
|
||||
|
||||
$ TARGET=hw FPGA_BIN_DIR=<BUILD_DIR>/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128"
|
|
@ -9,13 +9,14 @@ all: config
|
|||
|
||||
config: VX_config.h VX_types.h
|
||||
|
||||
VX_config.h: $(RTL_DIR)/VX_config.vh
|
||||
VX_config.h: $(RTL_DIR)/VX_config.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_config.vh -o VX_config.h
|
||||
|
||||
VX_types.h: $(RTL_DIR)/VX_types.vh
|
||||
VX_types.h: $(RTL_DIR)/VX_types.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_types.vh -o VX_types.h
|
||||
|
||||
clean:
|
||||
$(MAKE) -C unittest clean
|
||||
rm -f VX_config.h VX_types.h
|
||||
|
||||
.PHONY: VX_config.h VX_types.h
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,8 +14,6 @@
|
|||
`ifndef FLOAT_DPI_VH
|
||||
`define FLOAT_DPI_VH
|
||||
|
||||
`include "VX_config.vh"
|
||||
|
||||
import "DPI-C" function void dpi_fadd(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fsub(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmul(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
|
|
|
@ -14,8 +14,6 @@
|
|||
`ifndef UTIL_DPI_VH
|
||||
`define UTIL_DPI_VH
|
||||
|
||||
`include "VX_config.vh"
|
||||
|
||||
`ifdef XLEN_64
|
||||
`define INT_TYPE longint
|
||||
`else
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_cluster import VX_gpu_pkg::*; #(
|
||||
parameter CLUSTER_ID = 0
|
||||
parameter CLUSTER_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -85,7 +86,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (l2_reset, reset);
|
||||
|
||||
VX_cache_wrap #(
|
||||
.INSTANCE_ID ("l2cache"),
|
||||
.INSTANCE_ID ($sformatf("%s-l2cache", INSTANCE_ID)),
|
||||
.CACHE_SIZE (`L2_CACHE_SIZE),
|
||||
.LINE_SIZE (`L2_LINE_SIZE),
|
||||
.NUM_BANKS (`L2_NUM_BANKS),
|
||||
|
@ -98,6 +99,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
.MREQ_SIZE (`L2_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`L2_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
|
@ -122,17 +124,19 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [`NUM_SOCKETS-1:0] per_socket_busy;
|
||||
|
||||
VX_dcr_bus_if socket_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1));
|
||||
|
||||
// Generate all sockets
|
||||
for (genvar i = 0; i < `NUM_SOCKETS; ++i) begin
|
||||
for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : sockets
|
||||
|
||||
`RESET_RELAY (socket_reset, reset);
|
||||
|
||||
VX_socket #(
|
||||
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + i)
|
||||
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id),
|
||||
.INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id))
|
||||
) socket (
|
||||
`SCOPE_IO_BIND (scope_socket+i)
|
||||
`SCOPE_IO_BIND (scope_socket+socket_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (socket_reset),
|
||||
|
@ -143,13 +147,13 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
.dcr_bus_if (socket_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_socket_mem_bus_if[i]),
|
||||
.mem_bus_if (per_socket_mem_bus_if[socket_id]),
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (per_socket_gbar_bus_if[i]),
|
||||
.gbar_bus_if (per_socket_gbar_bus_if[socket_id]),
|
||||
`endif
|
||||
|
||||
.busy (per_socket_busy[i])
|
||||
.busy (per_socket_busy[socket_id])
|
||||
);
|
||||
end
|
||||
|
||||
|
|
|
@ -33,10 +33,6 @@
|
|||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
`ifndef VM_DISABLE
|
||||
`define VM_ENABLE
|
||||
`endif
|
||||
|
||||
`ifndef EXT_M_DISABLE
|
||||
`define EXT_M_ENABLE
|
||||
`endif
|
||||
|
@ -114,7 +110,6 @@
|
|||
`ifndef SOCKET_SIZE
|
||||
`define SOCKET_SIZE `MIN(4, `NUM_CORES)
|
||||
`endif
|
||||
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
|
||||
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_ENABLED 1
|
||||
|
@ -357,7 +352,7 @@
|
|||
|
||||
// Number of SFU units
|
||||
`ifndef NUM_SFU_LANES
|
||||
`define NUM_SFU_LANES `MIN(`NUM_THREADS, 4)
|
||||
`define NUM_SFU_LANES `NUM_THREADS
|
||||
`endif
|
||||
`ifndef NUM_SFU_BLOCKS
|
||||
`define NUM_SFU_BLOCKS 1
|
||||
|
@ -481,22 +476,27 @@
|
|||
`define LATENCY_FCVT 5
|
||||
`endif
|
||||
|
||||
// FMA Bandwidth ratio
|
||||
`ifndef FMA_PE_RATIO
|
||||
`define FMA_PE_RATIO 1
|
||||
`endif
|
||||
|
||||
// FDIV Bandwidth ratio
|
||||
`ifndef FDIV_PE_RATIO
|
||||
`define FDIV_PE_RATIO 8
|
||||
`endif
|
||||
|
||||
// FSQRT Bandwidth ratio
|
||||
`ifndef FSQRT_PE_RATIO
|
||||
`define FSQRT_PE_RATIO 8
|
||||
`endif
|
||||
|
||||
// FCVT Bandwidth ratio
|
||||
`ifndef FCVT_PE_RATIO
|
||||
`define FCVT_PE_RATIO 8
|
||||
`endif
|
||||
|
||||
// FNCP Bandwidth ratio
|
||||
`ifndef FNCP_PE_RATIO
|
||||
`define FNCP_PE_RATIO 2
|
||||
`endif
|
||||
|
@ -603,7 +603,12 @@
|
|||
`define DCACHE_NUM_WAYS 1
|
||||
`endif
|
||||
|
||||
// SM Configurable Knobs //////////////////////////////////////////////////////
|
||||
// Enable Cache Writeback
|
||||
`ifndef DCACHE_WRITEBACK
|
||||
`define DCACHE_WRITEBACK 0
|
||||
`endif
|
||||
|
||||
// LMEM Configurable Knobs ////////////////////////////////////////////////////
|
||||
|
||||
`ifndef LMEM_DISABLE
|
||||
`define LMEM_ENABLE
|
||||
|
@ -662,6 +667,11 @@
|
|||
`define L2_NUM_WAYS 2
|
||||
`endif
|
||||
|
||||
// Enable Cache Writeback
|
||||
`ifndef L2_WRITEBACK
|
||||
`define L2_WRITEBACK 0
|
||||
`endif
|
||||
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Cache Size
|
||||
|
@ -703,6 +713,11 @@
|
|||
`define L3_NUM_WAYS 4
|
||||
`endif
|
||||
|
||||
// Enable Cache Writeback
|
||||
`ifndef L3_WRITEBACK
|
||||
`define L3_WRITEBACK 0
|
||||
`endif
|
||||
|
||||
// ISA Extensions /////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef EXT_A_ENABLE
|
||||
|
|
|
@ -59,6 +59,8 @@
|
|||
`define OFFSET_BITS 12
|
||||
`define IMM_BITS `XLEN
|
||||
|
||||
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_ALU 0
|
||||
|
@ -296,6 +298,7 @@
|
|||
`ifdef ICACHE_ENABLE
|
||||
`define L1_ENABLE
|
||||
`endif
|
||||
|
||||
`ifdef DCACHE_ENABLE
|
||||
`define L1_ENABLE
|
||||
`endif
|
||||
|
@ -322,7 +325,7 @@
|
|||
.DATAW ($bits(dst)), \
|
||||
.RESETW ($bits(dst)), \
|
||||
.DEPTH (latency) \
|
||||
) __``dst ( \
|
||||
) __``dst``__ ( \
|
||||
.clk (clk), \
|
||||
.reset (reset), \
|
||||
.enable (ena), \
|
||||
|
@ -336,13 +339,18 @@
|
|||
VX_popcount #( \
|
||||
.N ($bits(in)), \
|
||||
.MODEL (model) \
|
||||
) __``out ( \
|
||||
) __``out``__ ( \
|
||||
.data_in (in), \
|
||||
.data_out (out) \
|
||||
)
|
||||
|
||||
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
|
||||
|
||||
`define ASSIGN_VX_IF(dst, src) \
|
||||
assign dst.valid = src.valid; \
|
||||
assign dst.data = src.data; \
|
||||
assign src.ready = dst.ready
|
||||
|
||||
`define ASSIGN_VX_MEM_BUS_IF(dst, src) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data = src.req_data; \
|
||||
|
@ -377,42 +385,42 @@
|
|||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define BUFFER_DCR_BUS_IF(dst, src, enable) \
|
||||
logic [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __``dst; \
|
||||
if (enable) begin \
|
||||
reg [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __dst; \
|
||||
always @(posedge clk) begin \
|
||||
__``dst <= {src.write_valid, src.write_addr, src.write_data}; \
|
||||
__dst <= {src.write_valid, src.write_addr, src.write_data}; \
|
||||
end \
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = __dst; \
|
||||
end else begin \
|
||||
assign __``dst = {src.write_valid, src.write_addr, src.write_data}; \
|
||||
end \
|
||||
VX_dcr_bus_if dst(); \
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = __``dst
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid, src.write_addr, src.write_data}; \
|
||||
end
|
||||
|
||||
`define PERF_COUNTER_ADD(dst, src, field, width, dst_count, src_count, reg_enable) \
|
||||
for (genvar __d = 0; __d < dst_count; ++__d) begin \
|
||||
localparam __count = ((src_count > dst_count) ? `CDIV(src_count, dst_count) : 1); \
|
||||
wire [__count-1:0][width-1:0] __reduce_add_i_``src``field; \
|
||||
wire [width-1:0] __reduce_add_o_``dst``field; \
|
||||
for (genvar __i = 0; __i < __count; ++__i) begin \
|
||||
assign __reduce_add_i_``src``field[__i] = ``src[__d * __count + __i].``field; \
|
||||
`define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \
|
||||
if (count > 1) begin \
|
||||
wire [count-1:0][width-1:0] __reduce_add_i_field; \
|
||||
wire [width-1:0] __reduce_add_o_field; \
|
||||
for (genvar __i = 0; __i < count; ++__i) begin \
|
||||
assign __reduce_add_i_field[__i] = src[__i].``field; \
|
||||
end \
|
||||
VX_reduce #(.DATAW_IN(width), .N(__count), .OP("+")) __reduce_add_``dst``field ( \
|
||||
__reduce_add_i_``src``field, \
|
||||
__reduce_add_o_``dst``field \
|
||||
VX_reduce #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_field ( \
|
||||
__reduce_add_i_field, \
|
||||
__reduce_add_o_field \
|
||||
); \
|
||||
if (reg_enable) begin \
|
||||
reg [width-1:0] __reduce_add_r_``dst``field; \
|
||||
reg [width-1:0] __reduce_add_r_field; \
|
||||
always @(posedge clk) begin \
|
||||
if (reset) begin \
|
||||
__reduce_add_r_``dst``field <= '0; \
|
||||
__reduce_add_r_field <= '0; \
|
||||
end else begin \
|
||||
__reduce_add_r_``dst``field <= __reduce_add_o_``dst``field; \
|
||||
__reduce_add_r_field <= __reduce_add_o_field; \
|
||||
end \
|
||||
end \
|
||||
assign ``dst[__d].``field = __reduce_add_r_``dst``field; \
|
||||
assign dst.``field = __reduce_add_r_field; \
|
||||
end else begin \
|
||||
assign ``dst[__d].``field = __reduce_add_o_``dst``field; \
|
||||
assign dst.``field = __reduce_add_o_field; \
|
||||
end \
|
||||
end else begin \
|
||||
assign dst.``field = src[0].``field; \
|
||||
end
|
||||
|
||||
`define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \
|
||||
|
@ -426,20 +434,4 @@
|
|||
assign dst = src; \
|
||||
end
|
||||
|
||||
`define TO_DISPATCH_DATA(data, tid) { \
|
||||
data.uuid, \
|
||||
data.wis, \
|
||||
data.tmask, \
|
||||
data.PC, \
|
||||
data.op_type, \
|
||||
data.op_args, \
|
||||
data.wb, \
|
||||
data.rd, \
|
||||
tid, \
|
||||
data.rs1_data, \
|
||||
data.rs2_data, \
|
||||
data.rs3_data}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`endif // VX_DEFINE_VH
|
||||
|
|
|
@ -60,6 +60,8 @@ package VX_gpu_pkg;
|
|||
logic [7:0] mpm_class;
|
||||
} base_dcrs_t;
|
||||
|
||||
//////////////////////////// Perf counter types ///////////////////////////
|
||||
|
||||
typedef struct packed {
|
||||
logic [`PERF_CTR_BITS-1:0] reads;
|
||||
logic [`PERF_CTR_BITS-1:0] writes;
|
||||
|
@ -77,48 +79,63 @@ package VX_gpu_pkg;
|
|||
logic [`PERF_CTR_BITS-1:0] latency;
|
||||
} mem_perf_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [`PERF_CTR_BITS-1:0] idles;
|
||||
logic [`PERF_CTR_BITS-1:0] stalls;
|
||||
} sched_perf_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [`PERF_CTR_BITS-1:0] ibf_stalls;
|
||||
logic [`PERF_CTR_BITS-1:0] scb_stalls;
|
||||
logic [`PERF_CTR_BITS-1:0] opd_stalls;
|
||||
logic [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] units_uses;
|
||||
logic [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] sfu_uses;
|
||||
} issue_perf_t;
|
||||
|
||||
//////////////////////// instruction arguments ////////////////////////////
|
||||
|
||||
typedef struct packed {
|
||||
logic use_PC;
|
||||
logic use_imm;
|
||||
logic is_w;
|
||||
logic [`ALU_TYPE_BITS-1:0] xtype;
|
||||
logic [`IMM_BITS-1:0] imm;
|
||||
} alu_mod_t;
|
||||
} alu_args_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [($bits(alu_mod_t)-`INST_FRM_BITS-`INST_FMT_BITS)-1:0] __padding;
|
||||
logic [($bits(alu_args_t)-`INST_FRM_BITS-`INST_FMT_BITS)-1:0] __padding;
|
||||
logic [`INST_FRM_BITS-1:0] frm;
|
||||
logic [`INST_FMT_BITS-1:0] fmt;
|
||||
} fpu_mod_t;
|
||||
} fpu_args_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [($bits(alu_mod_t)-1-1-`OFFSET_BITS)-1:0] __padding;
|
||||
logic [($bits(alu_args_t)-1-1-`OFFSET_BITS)-1:0] __padding;
|
||||
logic is_store;
|
||||
logic is_float;
|
||||
logic [`OFFSET_BITS-1:0] offset;
|
||||
} lsu_mod_t;
|
||||
} lsu_args_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [($bits(alu_mod_t)-1-`VX_CSR_ADDR_BITS-5)-1:0] __padding;
|
||||
logic [($bits(alu_args_t)-1-`VX_CSR_ADDR_BITS-5)-1:0] __padding;
|
||||
logic use_imm;
|
||||
logic [`VX_CSR_ADDR_BITS-1:0] addr;
|
||||
logic [4:0] imm;
|
||||
} csr_mod_t;
|
||||
} csr_args_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [($bits(alu_mod_t)-1)-1:0] __padding;
|
||||
logic [($bits(alu_args_t)-1)-1:0] __padding;
|
||||
logic is_neg;
|
||||
} wctl_mod_t;
|
||||
} wctl_args_t;
|
||||
|
||||
typedef union packed {
|
||||
alu_mod_t alu;
|
||||
fpu_mod_t fpu;
|
||||
lsu_mod_t lsu;
|
||||
csr_mod_t csr;
|
||||
wctl_mod_t wctl;
|
||||
alu_args_t alu;
|
||||
fpu_args_t fpu;
|
||||
lsu_args_t lsu;
|
||||
csr_args_t csr;
|
||||
wctl_args_t wctl;
|
||||
} op_args_t;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
|
||||
///////////////////////// LSU memory Parameters ///////////////////////////
|
||||
|
||||
|
@ -129,6 +146,31 @@ package VX_gpu_pkg;
|
|||
localparam LSU_TAG_WIDTH = (`UUID_WIDTH + LSU_TAG_ID_BITS);
|
||||
localparam LSU_NUM_REQS = `NUM_LSU_BLOCKS * `NUM_LSU_LANES;
|
||||
|
||||
////////////////////////// Icache Parameters //////////////////////////////
|
||||
|
||||
// Word size in bytes
|
||||
localparam ICACHE_WORD_SIZE = 4;
|
||||
localparam ICACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(ICACHE_WORD_SIZE));
|
||||
|
||||
// Block size in bytes
|
||||
localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE;
|
||||
|
||||
// Core request tag Id bits
|
||||
localparam ICACHE_TAG_ID_BITS = `NW_WIDTH;
|
||||
|
||||
// Core request tag bits
|
||||
localparam ICACHE_TAG_WIDTH = (`UUID_WIDTH + ICACHE_TAG_ID_BITS);
|
||||
|
||||
// Memory request data bits
|
||||
localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef ICACHE_ENABLE
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES);
|
||||
`else
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
|
||||
`endif
|
||||
|
||||
////////////////////////// Dcache Parameters //////////////////////////////
|
||||
|
||||
// Word size in bytes
|
||||
|
@ -154,36 +196,11 @@ package VX_gpu_pkg;
|
|||
localparam DCACHE_MEM_DATA_WIDTH = (DCACHE_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef DCACHE_ENABLE
|
||||
`ifdef DCACHE_ENABLE
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
||||
`else
|
||||
`else
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
||||
`endif
|
||||
|
||||
////////////////////////// Icache Parameters //////////////////////////////
|
||||
|
||||
// Word size in bytes
|
||||
localparam ICACHE_WORD_SIZE = 4;
|
||||
localparam ICACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(ICACHE_WORD_SIZE));
|
||||
|
||||
// Block size in bytes
|
||||
localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE;
|
||||
|
||||
// Core request tag Id bits
|
||||
localparam ICACHE_TAG_ID_BITS = `NW_WIDTH;
|
||||
|
||||
// Core request tag bits
|
||||
localparam ICACHE_TAG_WIDTH = (`UUID_WIDTH + ICACHE_TAG_ID_BITS);
|
||||
|
||||
// Memory request data bits
|
||||
localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef ICACHE_ENABLE
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES);
|
||||
`else
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
|
||||
`endif
|
||||
`endif
|
||||
|
||||
/////////////////////////////// L1 Parameters /////////////////////////////
|
||||
|
||||
|
@ -208,11 +225,11 @@ package VX_gpu_pkg;
|
|||
localparam L2_MEM_DATA_WIDTH = (`L2_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef L2_ENABLE
|
||||
`ifdef L2_ENABLE
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
||||
`else
|
||||
`else
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
||||
`endif
|
||||
`endif
|
||||
|
||||
/////////////////////////////// L3 Parameters /////////////////////////////
|
||||
|
||||
|
@ -229,23 +246,20 @@ package VX_gpu_pkg;
|
|||
localparam L3_MEM_DATA_WIDTH = (`L3_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef L3_ENABLE
|
||||
`ifdef L3_ENABLE
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
||||
`else
|
||||
`else
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
||||
`endif
|
||||
|
||||
/* verilator lint_on UNUSED */
|
||||
`endif
|
||||
|
||||
/////////////////////////////// Issue parameters //////////////////////////
|
||||
|
||||
localparam ISSUE_ISW = `CLOG2(`ISSUE_WIDTH);
|
||||
localparam ISSUE_ISW_W = `UP(ISSUE_ISW);
|
||||
localparam ISSUE_RATIO = `NUM_WARPS / `ISSUE_WIDTH;
|
||||
localparam ISSUE_WIS = `CLOG2(ISSUE_RATIO);
|
||||
localparam PER_ISSUE_WARPS = `NUM_WARPS / `ISSUE_WIDTH;
|
||||
localparam ISSUE_WIS = `CLOG2(PER_ISSUE_WARPS);
|
||||
localparam ISSUE_WIS_W = `UP(ISSUE_WIS);
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
function logic [`NW_WIDTH-1:0] wis_to_wid(
|
||||
input logic [ISSUE_WIS_W-1:0] wis,
|
||||
input logic [ISSUE_ISW_W-1:0] isw
|
||||
|
@ -278,6 +292,20 @@ package VX_gpu_pkg;
|
|||
wid_to_wis = 0;
|
||||
end
|
||||
endfunction
|
||||
|
||||
///////////////////////// Miscaellaneous functions ////////////////////////
|
||||
|
||||
function logic [`SFU_WIDTH-1:0] op_to_sfu_type(
|
||||
input logic [`INST_OP_BITS-1:0] op_type
|
||||
);
|
||||
case (op_type)
|
||||
`INST_SFU_CSRRW,
|
||||
`INST_SFU_CSRRS,
|
||||
`INST_SFU_CSRRC: op_to_sfu_type = `SFU_CSRS;
|
||||
default: op_to_sfu_type = `SFU_WCTL;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
endpackage
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
`define UNUSED_VAR(x)
|
||||
`define UNUSED_PIN(x) . x ()
|
||||
`define UNUSED_ARG(x) x
|
||||
`define TRACE(level, args) $write args
|
||||
`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args
|
||||
`else
|
||||
`ifdef VERILATOR
|
||||
`define TRACING_ON /* verilator tracing_on */
|
||||
|
@ -112,8 +112,14 @@
|
|||
`define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \
|
||||
x \
|
||||
/* verilator lint_on UNUSED */
|
||||
`define TRACE(level, args) dpi_trace(level, $sformatf args)
|
||||
`endif
|
||||
|
||||
`ifdef SV_DPI
|
||||
`define TRACE(level, args) dpi_trace(level, $sformatf args)
|
||||
`else
|
||||
`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
||||
`ifdef SIMULATION
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_socket import VX_gpu_pkg::*; #(
|
||||
parameter SOCKET_ID = 0
|
||||
parameter SOCKET_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -40,6 +41,11 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_core = 0;
|
||||
`SCOPE_IO_SWITCH (`SOCKET_SIZE);
|
||||
`endif
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE]();
|
||||
|
||||
|
@ -81,7 +87,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (icache_reset, reset);
|
||||
|
||||
VX_cache_cluster #(
|
||||
.INSTANCE_ID ($sformatf("socket%0d-icache", SOCKET_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-icache", INSTANCE_ID)),
|
||||
.NUM_UNITS (`NUM_ICACHES),
|
||||
.NUM_INPUTS (`SOCKET_SIZE),
|
||||
.TAG_SEL_IDX (0),
|
||||
|
@ -126,7 +132,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (dcache_reset, reset);
|
||||
|
||||
VX_cache_cluster #(
|
||||
.INSTANCE_ID ($sformatf("socket%0d-dcache", SOCKET_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-dcache", INSTANCE_ID)),
|
||||
.NUM_UNITS (`NUM_DCACHES),
|
||||
.NUM_INPUTS (`SOCKET_SIZE),
|
||||
.TAG_SEL_IDX (0),
|
||||
|
@ -143,8 +149,9 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`DCACHE_WRITEBACK),
|
||||
.NC_ENABLE (1),
|
||||
.CORE_OUT_BUF (`LMEM_ENABLED ? 2 : 1),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2)
|
||||
) dcache (
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -194,19 +201,19 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [`SOCKET_SIZE-1:0] per_core_busy;
|
||||
|
||||
VX_dcr_bus_if core_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1));
|
||||
|
||||
`SCOPE_IO_SWITCH (`SOCKET_SIZE)
|
||||
|
||||
// Generate all cores
|
||||
for (genvar i = 0; i < `SOCKET_SIZE; ++i) begin
|
||||
for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : cores
|
||||
|
||||
`RESET_RELAY (core_reset, reset);
|
||||
|
||||
VX_core #(
|
||||
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + i)
|
||||
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id),
|
||||
.INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id))
|
||||
) core (
|
||||
`SCOPE_IO_BIND (i)
|
||||
`SCOPE_IO_BIND (scope_core + core_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (core_reset),
|
||||
|
@ -217,15 +224,15 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
|
||||
.dcr_bus_if (core_dcr_bus_if),
|
||||
|
||||
.dcache_bus_if (per_core_dcache_bus_if[i * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
|
||||
.dcache_bus_if (per_core_dcache_bus_if[core_id * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
|
||||
|
||||
.icache_bus_if (per_core_icache_bus_if[i]),
|
||||
.icache_bus_if (per_core_icache_bus_if[core_id]),
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (per_core_gbar_bus_if[i]),
|
||||
.gbar_bus_if (per_core_gbar_bus_if[core_id]),
|
||||
`endif
|
||||
|
||||
.busy (per_core_busy[i])
|
||||
.busy (per_core_busy[core_id])
|
||||
);
|
||||
end
|
||||
|
||||
|
|
|
@ -85,30 +85,31 @@
|
|||
`define VX_CSR_MPM_IBUF_ST_H 12'hB85
|
||||
`define VX_CSR_MPM_SCRB_ST 12'hB06
|
||||
`define VX_CSR_MPM_SCRB_ST_H 12'hB86
|
||||
`define VX_CSR_MPM_SCRB_ALU 12'hB07
|
||||
`define VX_CSR_MPM_SCRB_ALU_H 12'hB87
|
||||
`define VX_CSR_MPM_SCRB_FPU 12'hB08
|
||||
`define VX_CSR_MPM_SCRB_FPU_H 12'hB88
|
||||
`define VX_CSR_MPM_SCRB_LSU 12'hB09
|
||||
`define VX_CSR_MPM_SCRB_LSU_H 12'hB89
|
||||
`define VX_CSR_MPM_SCRB_SFU 12'hB0A
|
||||
`define VX_CSR_MPM_SCRB_SFU_H 12'hB8A
|
||||
`define VX_CSR_MPM_OPDS_ST 12'hB07
|
||||
`define VX_CSR_MPM_OPDS_ST_H 12'hB87
|
||||
`define VX_CSR_MPM_SCRB_ALU 12'hB08
|
||||
`define VX_CSR_MPM_SCRB_ALU_H 12'hB88
|
||||
`define VX_CSR_MPM_SCRB_FPU 12'hB09
|
||||
`define VX_CSR_MPM_SCRB_FPU_H 12'hB89
|
||||
`define VX_CSR_MPM_SCRB_LSU 12'hB0A
|
||||
`define VX_CSR_MPM_SCRB_LSU_H 12'hB8A
|
||||
`define VX_CSR_MPM_SCRB_SFU 12'hB0B
|
||||
`define VX_CSR_MPM_SCRB_SFU_H 12'hB8B
|
||||
`define VX_CSR_MPM_SCRB_CSRS 12'hB0C
|
||||
`define VX_CSR_MPM_SCRB_CSRS_H 12'hB8C
|
||||
`define VX_CSR_MPM_SCRB_WCTL 12'hB0D
|
||||
`define VX_CSR_MPM_SCRB_WCTL_H 12'hB8D
|
||||
// PERF: memory
|
||||
`define VX_CSR_MPM_IFETCHES 12'hB0B
|
||||
`define VX_CSR_MPM_IFETCHES_H 12'hB8B
|
||||
`define VX_CSR_MPM_LOADS 12'hB0C
|
||||
`define VX_CSR_MPM_LOADS_H 12'hB8C
|
||||
`define VX_CSR_MPM_STORES 12'hB0D
|
||||
`define VX_CSR_MPM_STORES_H 12'hB8D
|
||||
`define VX_CSR_MPM_IFETCH_LT 12'hB0E
|
||||
`define VX_CSR_MPM_IFETCH_LT_H 12'hB8E
|
||||
`define VX_CSR_MPM_LOAD_LT 12'hB0F
|
||||
`define VX_CSR_MPM_LOAD_LT_H 12'hB8F
|
||||
// SFU: scoreboard
|
||||
`define VX_CSR_MPM_SCRB_WCTL 12'hB10
|
||||
`define VX_CSR_MPM_SCRB_WCTL_H 12'hB90
|
||||
`define VX_CSR_MPM_SCRB_CSRS 12'hB11
|
||||
`define VX_CSR_MPM_SCRB_CSRS_H 12'hB91
|
||||
`define VX_CSR_MPM_IFETCHES 12'hB0E
|
||||
`define VX_CSR_MPM_IFETCHES_H 12'hB8E
|
||||
`define VX_CSR_MPM_LOADS 12'hB0F
|
||||
`define VX_CSR_MPM_LOADS_H 12'hB8F
|
||||
`define VX_CSR_MPM_STORES 12'hB10
|
||||
`define VX_CSR_MPM_STORES_H 12'hB90
|
||||
`define VX_CSR_MPM_IFETCH_LT 12'hB11
|
||||
`define VX_CSR_MPM_IFETCH_LT_H 12'hB91
|
||||
`define VX_CSR_MPM_LOAD_LT 12'hB12
|
||||
`define VX_CSR_MPM_LOAD_LT_H 12'hB92
|
||||
|
||||
// Machine Performance-monitoring memory counters (class 2) ///////////////////
|
||||
|
||||
|
|
|
@ -44,6 +44,11 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_cluster = 0;
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS);
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_if();
|
||||
assign mem_perf_if.icache = 'x;
|
||||
|
@ -78,6 +83,7 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
.MREQ_SIZE (`L3_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`L3_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
|
@ -121,19 +127,19 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS)
|
||||
|
||||
// Generate all clusters
|
||||
for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin
|
||||
for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters
|
||||
|
||||
`RESET_RELAY (cluster_reset, reset);
|
||||
|
||||
VX_dcr_bus_if cluster_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
|
||||
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID (i)
|
||||
.CLUSTER_ID (cluster_id),
|
||||
.INSTANCE_ID ($sformatf("cluster%0d", cluster_id))
|
||||
) cluster (
|
||||
`SCOPE_IO_BIND (i)
|
||||
`SCOPE_IO_BIND (scope_cluster + cluster_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (cluster_reset),
|
||||
|
@ -144,9 +150,9 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
|
||||
.dcr_bus_if (cluster_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_cluster_mem_bus_if[i]),
|
||||
.mem_bus_if (per_cluster_mem_bus_if[cluster_id]),
|
||||
|
||||
.busy (per_cluster_busy[i])
|
||||
.busy (per_cluster_busy[cluster_id])
|
||||
);
|
||||
end
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
// To be done:
|
||||
// Check how to run this with OPAE. Looks like setup issue
|
||||
|
||||
`ifndef NOPAE
|
||||
|
||||
`include "platform_if.vh"
|
||||
|
||||
|
@ -85,7 +86,7 @@ module ccip_std_afu #(
|
|||
t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS];
|
||||
t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS];
|
||||
logic avs_write [NUM_LOCAL_MEM_BANKS];
|
||||
logic avs_read [NUM_LOCAL_MEM_BANKS];
|
||||
logic avs_read [NUM_LOCAL_MEM_BANKS];
|
||||
|
||||
for (genvar b = 0; b < NUM_LOCAL_MEM_BANKS; b++) begin
|
||||
assign local_mem[b].burstcount = avs_burstcount[b];
|
||||
|
@ -94,7 +95,7 @@ module ccip_std_afu #(
|
|||
assign local_mem[b].byteenable = avs_byteenable[b];
|
||||
assign local_mem[b].write = avs_write[b];
|
||||
assign local_mem[b].read = avs_read[b];
|
||||
|
||||
|
||||
assign avs_waitrequest[b] = local_mem[b].waitrequest;
|
||||
assign avs_readdata[b] = local_mem[b].readdata;
|
||||
assign avs_readdatavalid[b] = local_mem[b].readdatavalid;
|
||||
|
@ -107,7 +108,7 @@ module ccip_std_afu #(
|
|||
.reset (reset_T1),
|
||||
|
||||
.cp2af_sRxPort (cp2af_sRx_T1),
|
||||
.af2cp_sTxPort (af2cp_sTx_T0),
|
||||
.af2cp_sTxPort (af2cp_sTx_T0),
|
||||
|
||||
.avs_writedata (avs_writedata),
|
||||
.avs_readdata (avs_readdata),
|
||||
|
@ -121,3 +122,5 @@ module ccip_std_afu #(
|
|||
);
|
||||
|
||||
endmodule
|
||||
|
||||
`endif
|
||||
|
|
|
@ -587,7 +587,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.DATA_SIZE (LMEM_DATA_SIZE),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (AVS_REQ_TAGW),
|
||||
.ARBITER ("P"),
|
||||
.ARBITER ("P"), // prioritize VX requests
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (0)
|
||||
) mem_arb (
|
||||
|
@ -692,9 +692,11 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.reset (reset),
|
||||
.incr (cci_rd_req_fire),
|
||||
.decr (cci_rdq_pop),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (cci_pending_reads_full),
|
||||
.size (cci_pending_reads),
|
||||
`UNUSED_PIN (empty)
|
||||
`UNUSED_PIN (alm_full),
|
||||
.size (cci_pending_reads)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (cci_pending_reads)
|
||||
|
@ -852,7 +854,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.incr (cci_mem_rd_rsp_fire),
|
||||
.decr (cci_wr_rsp_fire),
|
||||
.empty (cci_pending_writes_empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (cci_pending_writes_full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
.size (cci_pending_writes)
|
||||
);
|
||||
|
||||
|
@ -1010,7 +1014,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
// SCOPE //////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_SCOPE_AFU
|
||||
`ifdef SCOPE
|
||||
wire mem_req_fire = mem_bus_if[0].req_valid && mem_bus_if[0].req_ready;
|
||||
wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready;
|
||||
wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0];
|
||||
|
@ -1080,7 +1083,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.bus_in(scope_bus_in_w[0]),
|
||||
.bus_out(scope_bus_out_w[0])
|
||||
);
|
||||
`endif
|
||||
`else
|
||||
`SCOPE_IO_UNUSED_W(0)
|
||||
`endif
|
||||
|
|
|
@ -311,7 +311,6 @@ module VX_afu_wrap #(
|
|||
// SCOPE //////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_SCOPE_AFU
|
||||
`ifdef SCOPE
|
||||
`define TRIGGERS { \
|
||||
reset, \
|
||||
ap_start, \
|
||||
|
@ -330,35 +329,17 @@ module VX_afu_wrap #(
|
|||
VX_scope_tap #(
|
||||
.SCOPE_ID (0),
|
||||
.TRIGGERW ($bits(`TRIGGERS)),
|
||||
.PROBEW ($bits(`PROBES))
|
||||
.PROBEW ($bits(`PROBES))
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset_w[0]),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers(`TRIGGERS),
|
||||
.probes(`PROBES),
|
||||
.bus_in(scope_bus_in_w[0]),
|
||||
.bus_out(scope_bus_out_w[0])
|
||||
.clk (clk),
|
||||
.reset (scope_reset_w[0]),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers (`TRIGGERS),
|
||||
.probes (`PROBES),
|
||||
.bus_in (scope_bus_in_w[0]),
|
||||
.bus_out (scope_bus_out_w[0])
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_afu ila_afu_inst (
|
||||
.clk (ap_clk),
|
||||
.probe0 ({
|
||||
ap_start,
|
||||
ap_done,
|
||||
ap_idle,
|
||||
interrupt
|
||||
}),
|
||||
.probe1 ({
|
||||
vx_pending_writes,
|
||||
vx_busy_wait,
|
||||
vx_busy,
|
||||
vx_running
|
||||
})
|
||||
);
|
||||
`endif
|
||||
`else
|
||||
`SCOPE_IO_UNUSED_W(0)
|
||||
`endif
|
||||
|
|
109
hw/rtl/cache/VX_bank_flush.sv
vendored
Normal file
109
hw/rtl/cache/VX_bank_flush.sv
vendored
Normal file
|
@ -0,0 +1,109 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_bank_flush #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire flush_in_valid,
|
||||
output wire flush_in_ready,
|
||||
output wire flush_out_init,
|
||||
output wire flush_out_valid,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] flush_out_line,
|
||||
output wire [NUM_WAYS-1:0] flush_out_way,
|
||||
input wire flush_out_ready,
|
||||
input wire mshr_empty
|
||||
);
|
||||
parameter CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);
|
||||
|
||||
parameter STATE_IDLE = 2'd0;
|
||||
parameter STATE_INIT = 2'd1;
|
||||
parameter STATE_FLUSH = 2'd2;
|
||||
|
||||
reg [CTR_WIDTH-1:0] counter_r;
|
||||
reg [1:0] state_r, state_n;
|
||||
reg flush_in_ready_r, flush_in_ready_n;
|
||||
|
||||
always @(*) begin
|
||||
state_n = state_r;
|
||||
flush_in_ready_n = 0;
|
||||
case (state_r)
|
||||
// STATE_IDLE
|
||||
default: begin
|
||||
if (flush_in_valid && mshr_empty) begin
|
||||
state_n = STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_INIT: begin
|
||||
if (counter_r == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
if (counter_r == ((2 ** CTR_WIDTH)-1)) begin
|
||||
state_n = STATE_IDLE;
|
||||
flush_in_ready_n = 1;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state_r <= STATE_INIT;
|
||||
counter_r <= '0;
|
||||
flush_in_ready_r <= '0;
|
||||
end else begin
|
||||
state_r <= state_n;
|
||||
flush_in_ready_r <= flush_in_ready_n;
|
||||
if (state_r != STATE_IDLE) begin
|
||||
if ((state_r == STATE_INIT) || flush_out_ready) begin
|
||||
counter_r <= counter_r + CTR_WIDTH'(1);
|
||||
end
|
||||
end else begin
|
||||
counter_r <= '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign flush_in_ready = flush_in_ready_r;
|
||||
|
||||
assign flush_out_init = (state_r == STATE_INIT);
|
||||
|
||||
assign flush_out_valid = (state_r == STATE_FLUSH);
|
||||
assign flush_out_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin
|
||||
reg [NUM_WAYS-1:0] flush_out_way_r;
|
||||
always @(*) begin
|
||||
flush_out_way_r = '0;
|
||||
flush_out_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
|
||||
end
|
||||
assign flush_out_way = flush_out_way_r;
|
||||
end else begin
|
||||
assign flush_out_way = {NUM_WAYS{1'b1}};
|
||||
end
|
||||
|
||||
endmodule
|
348
hw/rtl/cache/VX_cache.sv
vendored
348
hw/rtl/cache/VX_cache.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,15 +14,15 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 4096,
|
||||
parameter CACHE_SIZE = 4096,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
|
@ -33,7 +33,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
|
@ -42,6 +42,9 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -53,12 +56,12 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_BUF = 0
|
||||
) (
|
||||
) (
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t cache_perf,
|
||||
`endif
|
||||
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -67,6 +70,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter"))
|
||||
|
||||
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
|
||||
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
|
||||
|
@ -78,36 +82,46 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH;
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1;
|
||||
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
|
||||
|
||||
localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
|
||||
localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1);
|
||||
|
||||
localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
||||
`endif
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
|
||||
wire [NUM_REQS-1:0] core_req_rw;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
||||
wire [NUM_REQS-1:0] core_req_ready;
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) core_bus2_if[NUM_REQS]();
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_valid[i] = core_bus_if[i].req_valid;
|
||||
assign core_req_rw[i] = core_bus_if[i].req_data.rw;
|
||||
assign core_req_byteen[i] = core_bus_if[i].req_data.byteen;
|
||||
assign core_req_addr[i] = core_bus_if[i].req_data.addr;
|
||||
assign core_req_data[i] = core_bus_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus_if[i].req_data.tag;
|
||||
assign core_bus_if[i].req_ready = core_req_ready[i];
|
||||
`UNUSED_VAR (core_bus_if[i].req_data.atype)
|
||||
end
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_fire;
|
||||
|
||||
// this reset relay is required to sync with bank initialization
|
||||
`RESET_RELAY (flush_reset, reset);
|
||||
|
||||
VX_cache_flush #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (flush_reset),
|
||||
.core_bus_in_if (core_bus_if),
|
||||
.core_bus_out_if (core_bus2_if),
|
||||
.bank_req_fire (per_bank_core_req_fire),
|
||||
.flush_valid (per_bank_flush_valid),
|
||||
.flush_ready (per_bank_flush_ready)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -117,10 +131,10 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_s;
|
||||
|
||||
`RESET_RELAY (core_rsp_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
||||
`RESET_RELAY (core_rsp_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
|
||||
.SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
|
||||
|
@ -131,9 +145,9 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.valid_in (core_rsp_valid_s[i]),
|
||||
.ready_in (core_rsp_ready_s[i]),
|
||||
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
|
||||
.data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus_if[i].rsp_valid),
|
||||
.ready_out (core_bus_if[i].rsp_ready)
|
||||
.data_out ({core_bus2_if[i].rsp_data.data, core_bus2_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus2_if[i].rsp_valid),
|
||||
.ready_out (core_bus2_if[i].rsp_ready)
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -146,24 +160,29 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [LINE_SIZE-1:0] mem_req_byteen_s;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
|
||||
wire mem_req_flush_s;
|
||||
wire mem_req_ready_s;
|
||||
|
||||
wire mem_bus_if_flush;
|
||||
|
||||
`RESET_RELAY (mem_req_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH),
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_valid_s),
|
||||
.ready_in (mem_req_ready_s),
|
||||
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s}),
|
||||
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}),
|
||||
.valid_out (mem_bus_if.req_valid),
|
||||
.reset (mem_req_reset),
|
||||
.valid_in (mem_req_valid_s),
|
||||
.ready_in (mem_req_ready_s),
|
||||
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}),
|
||||
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag, mem_bus_if_flush}),
|
||||
.valid_out (mem_bus_if.req_valid),
|
||||
.ready_out (mem_bus_if.req_ready)
|
||||
);
|
||||
|
||||
assign mem_bus_if.req_data.atype = '0;
|
||||
|
||||
assign mem_bus_if.req_data.atype = mem_bus_if_flush ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -172,44 +191,26 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||
wire mem_rsp_ready_s;
|
||||
|
||||
|
||||
`RESET_RELAY (mem_rsp_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
|
||||
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
|
||||
.SIZE (MRSQ_SIZE),
|
||||
.OUT_REG (MRSQ_SIZE > 2)
|
||||
) mem_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mem_rsp_reset),
|
||||
.valid_in (mem_bus_if.rsp_valid),
|
||||
.ready_in (mem_bus_if.rsp_ready),
|
||||
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
|
||||
.data_out ({mem_rsp_tag_s, mem_rsp_data_s}),
|
||||
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
|
||||
.data_out ({mem_rsp_tag_s, mem_rsp_data_s}),
|
||||
.valid_out (mem_rsp_valid_s),
|
||||
.ready_out (mem_rsp_ready_s)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] init_line_sel;
|
||||
wire init_enable;
|
||||
|
||||
// this reset relay is required to sync with bank initialization
|
||||
`RESET_RELAY (init_reset, reset);
|
||||
|
||||
VX_cache_init #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS)
|
||||
) cache_init (
|
||||
.clk (clk),
|
||||
.reset (init_reset),
|
||||
.addr_out (init_line_sel),
|
||||
.valid_out (init_enable)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
|
||||
|
@ -218,25 +219,28 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_flush;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
|
||||
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
|
||||
wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_mem_req_wsel;
|
||||
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_flush;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
|
||||
|
||||
|
||||
assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign mem_rsp_ready_s = per_bank_mem_rsp_ready;
|
||||
end else begin
|
||||
|
@ -245,12 +249,33 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
// Bank requests dispatch
|
||||
|
||||
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
|
||||
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
|
||||
wire [NUM_REQS-1:0] core_req_valid;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
|
||||
wire [NUM_REQS-1:0] core_req_rw;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
||||
wire [NUM_REQS-1:0] core_req_flush;
|
||||
wire [NUM_REQS-1:0] core_req_ready;
|
||||
|
||||
wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr;
|
||||
wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid;
|
||||
wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel;
|
||||
|
||||
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
|
||||
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_valid[i] = core_bus2_if[i].req_valid;
|
||||
assign core_req_rw[i] = core_bus2_if[i].req_data.rw;
|
||||
assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen;
|
||||
assign core_req_addr[i] = core_bus2_if[i].req_data.addr;
|
||||
assign core_req_data[i] = core_bus2_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus2_if[i].req_data.tag;
|
||||
assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
|
||||
assign core_bus2_if[i].req_ready = core_req_ready[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS];
|
||||
|
@ -273,9 +298,11 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
core_req_line_addr[i],
|
||||
core_req_rw[i],
|
||||
core_req_wsel[i],
|
||||
core_req_byteen[i],
|
||||
core_req_byteen[i],
|
||||
core_req_data[i],
|
||||
core_req_tag[i]};
|
||||
core_req_tag[i],
|
||||
core_req_flush[i]
|
||||
};
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -284,12 +311,12 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
`RESET_RELAY (req_xbar_reset, reset);
|
||||
|
||||
VX_stream_xbar #(
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_REQS),
|
||||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (CORE_REQ_DATAW),
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||
.OUT_BUF ((NUM_REQS > 4) ? 2 : 0)
|
||||
.OUT_BUF (REQ_XBAR_BUF)
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
.reset (req_xbar_reset),
|
||||
|
@ -313,27 +340,29 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
per_bank_core_req_addr[i],
|
||||
per_bank_core_req_rw[i],
|
||||
per_bank_core_req_wsel[i],
|
||||
per_bank_core_req_byteen[i],
|
||||
per_bank_core_req_byteen[i],
|
||||
per_bank_core_req_data[i],
|
||||
per_bank_core_req_tag[i]} = core_req_data_out[i];
|
||||
per_bank_core_req_tag[i],
|
||||
per_bank_core_req_flush[i]
|
||||
} = core_req_data_out[i];
|
||||
end
|
||||
|
||||
|
||||
// Banks access
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
|
||||
wire curr_bank_mem_rsp_valid;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s;
|
||||
end else begin
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == i);
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == bank_id);
|
||||
end
|
||||
|
||||
`RESET_RELAY (bank_reset, reset);
|
||||
|
||||
VX_cache_bank #(
|
||||
.BANK_ID (i),
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
|
||||
VX_cache_bank #(
|
||||
.BANK_ID (bank_id),
|
||||
.INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
|
@ -344,65 +373,66 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF)
|
||||
) bank (
|
||||
) bank (
|
||||
.clk (clk),
|
||||
.reset (bank_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_read_misses (perf_read_miss_per_bank[i]),
|
||||
.perf_write_misses (perf_write_miss_per_bank[i]),
|
||||
.perf_mshr_stalls (perf_mshr_stall_per_bank[i]),
|
||||
.perf_read_misses (perf_read_miss_per_bank[bank_id]),
|
||||
.perf_write_misses (perf_write_miss_per_bank[bank_id]),
|
||||
.perf_mshr_stalls (perf_mshr_stall_per_bank[bank_id]),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_bank_core_req_valid[i]),
|
||||
.core_req_addr (per_bank_core_req_addr[i]),
|
||||
.core_req_rw (per_bank_core_req_rw[i]),
|
||||
.core_req_wsel (per_bank_core_req_wsel[i]),
|
||||
.core_req_byteen (per_bank_core_req_byteen[i]),
|
||||
.core_req_data (per_bank_core_req_data[i]),
|
||||
.core_req_tag (per_bank_core_req_tag[i]),
|
||||
.core_req_idx (per_bank_core_req_idx[i]),
|
||||
.core_req_ready (per_bank_core_req_ready[i]),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_bank_core_rsp_valid[i]),
|
||||
.core_rsp_data (per_bank_core_rsp_data[i]),
|
||||
.core_rsp_tag (per_bank_core_rsp_tag[i]),
|
||||
.core_rsp_idx (per_bank_core_rsp_idx[i]),
|
||||
.core_rsp_ready (per_bank_core_rsp_ready[i]),
|
||||
// Core request
|
||||
.core_req_valid (per_bank_core_req_valid[bank_id]),
|
||||
.core_req_addr (per_bank_core_req_addr[bank_id]),
|
||||
.core_req_rw (per_bank_core_req_rw[bank_id]),
|
||||
.core_req_wsel (per_bank_core_req_wsel[bank_id]),
|
||||
.core_req_byteen (per_bank_core_req_byteen[bank_id]),
|
||||
.core_req_data (per_bank_core_req_data[bank_id]),
|
||||
.core_req_tag (per_bank_core_req_tag[bank_id]),
|
||||
.core_req_idx (per_bank_core_req_idx[bank_id]),
|
||||
.core_req_flush (per_bank_core_req_flush[bank_id]),
|
||||
.core_req_ready (per_bank_core_req_ready[bank_id]),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_bank_core_rsp_valid[bank_id]),
|
||||
.core_rsp_data (per_bank_core_rsp_data[bank_id]),
|
||||
.core_rsp_tag (per_bank_core_rsp_tag[bank_id]),
|
||||
.core_rsp_idx (per_bank_core_rsp_idx[bank_id]),
|
||||
.core_rsp_ready (per_bank_core_rsp_ready[bank_id]),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (per_bank_mem_req_valid[i]),
|
||||
.mem_req_valid (per_bank_mem_req_valid[bank_id]),
|
||||
.mem_req_addr (curr_bank_mem_req_addr),
|
||||
.mem_req_rw (per_bank_mem_req_rw[i]),
|
||||
.mem_req_wsel (per_bank_mem_req_wsel[i]),
|
||||
.mem_req_byteen (per_bank_mem_req_byteen[i]),
|
||||
.mem_req_data (per_bank_mem_req_data[i]),
|
||||
.mem_req_id (per_bank_mem_req_id[i]),
|
||||
.mem_req_ready (per_bank_mem_req_ready[i]),
|
||||
.mem_req_rw (per_bank_mem_req_rw[bank_id]),
|
||||
.mem_req_byteen (per_bank_mem_req_byteen[bank_id]),
|
||||
.mem_req_data (per_bank_mem_req_data[bank_id]),
|
||||
.mem_req_id (per_bank_mem_req_id[bank_id]),
|
||||
.mem_req_flush (per_bank_mem_req_flush[bank_id]),
|
||||
.mem_req_ready (per_bank_mem_req_ready[bank_id]),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (curr_bank_mem_rsp_valid),
|
||||
.mem_rsp_data (mem_rsp_data_s),
|
||||
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
|
||||
.mem_rsp_ready (per_bank_mem_rsp_ready[i]),
|
||||
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
|
||||
|
||||
// initialization
|
||||
.init_enable (init_enable),
|
||||
.init_line_sel (init_line_sel)
|
||||
.flush_valid (per_bank_flush_valid[bank_id]),
|
||||
.flush_ready (per_bank_flush_ready[bank_id])
|
||||
);
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr;
|
||||
assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr;
|
||||
end else begin
|
||||
assign per_bank_mem_req_addr[i] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, i);
|
||||
assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Bank responses gather
|
||||
|
||||
|
@ -442,37 +472,41 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire mem_req_valid_p;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p;
|
||||
wire mem_req_rw_p;
|
||||
wire [WORD_SEL_WIDTH-1:0] mem_req_wsel_p;
|
||||
wire [WORD_SIZE-1:0] mem_req_byteen_p;
|
||||
wire [`CS_WORD_WIDTH-1:0] mem_req_data_p;
|
||||
wire [LINE_SIZE-1:0] mem_req_byteen_p;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data_p;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_p;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id_p;
|
||||
wire mem_req_flush_p;
|
||||
wire mem_req_ready_p;
|
||||
|
||||
// Memory request arbitration
|
||||
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH)-1:0] data_in;
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign data_in[i] = {per_bank_mem_req_addr[i],
|
||||
per_bank_mem_req_rw[i],
|
||||
per_bank_mem_req_wsel[i],
|
||||
per_bank_mem_req_byteen[i],
|
||||
per_bank_mem_req_data[i],
|
||||
per_bank_mem_req_id[i]};
|
||||
assign data_in[i] = {
|
||||
per_bank_mem_req_addr[i],
|
||||
per_bank_mem_req_rw[i],
|
||||
per_bank_mem_req_byteen[i],
|
||||
per_bank_mem_req_data[i],
|
||||
per_bank_mem_req_id[i],
|
||||
per_bank_mem_req_flush[i]
|
||||
};
|
||||
end
|
||||
|
||||
`RESET_RELAY (mem_arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.ARBITER ("R")
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1),
|
||||
.ARBITER ("F")
|
||||
) mem_req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mem_arb_reset),
|
||||
.valid_in (per_bank_mem_req_valid),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.data_in (data_in),
|
||||
.data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_wsel_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p}),
|
||||
.data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}),
|
||||
.valid_out (mem_req_valid_p),
|
||||
.ready_out (mem_req_ready_p),
|
||||
`UNUSED_PIN (sel_out)
|
||||
|
@ -480,44 +514,28 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
if (NUM_BANKS > 1) begin
|
||||
wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p);
|
||||
assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p});
|
||||
assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p});
|
||||
end else begin
|
||||
assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p);
|
||||
end
|
||||
end
|
||||
|
||||
// Memory request multi-port handling
|
||||
|
||||
assign mem_req_valid_s = mem_req_valid_p;
|
||||
assign mem_req_addr_s = mem_req_addr_p;
|
||||
assign mem_req_tag_s = mem_req_tag_p;
|
||||
assign mem_req_flush_s = mem_req_flush_p;
|
||||
assign mem_req_ready_p = mem_req_ready_s;
|
||||
|
||||
if (WRITE_ENABLE != 0) begin
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
reg [LINE_SIZE-1:0] mem_req_byteen_r;
|
||||
reg [`CS_LINE_WIDTH-1:0] mem_req_data_r;
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen_r = '0;
|
||||
mem_req_data_r = 'x;
|
||||
mem_req_byteen_r[mem_req_wsel_p * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p;
|
||||
mem_req_data_r[mem_req_wsel_p * `CS_WORD_WIDTH +: `CS_WORD_WIDTH] = mem_req_data_p;
|
||||
end
|
||||
assign mem_req_rw_s = mem_req_rw_p;
|
||||
assign mem_req_byteen_s = mem_req_byteen_r;
|
||||
assign mem_req_data_s = mem_req_data_r;
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_wsel_p)
|
||||
assign mem_req_rw_s = mem_req_rw_p;
|
||||
assign mem_req_byteen_s = mem_req_byteen_p;
|
||||
assign mem_req_data_s = mem_req_data_p;
|
||||
end
|
||||
assign mem_req_rw_s = mem_req_rw_p;
|
||||
assign mem_req_byteen_s = mem_req_byteen_p;
|
||||
assign mem_req_data_s = mem_req_data_p;
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_byteen_p)
|
||||
`UNUSED_VAR (mem_req_wsel_p)
|
||||
`UNUSED_VAR (mem_req_data_p)
|
||||
`UNUSED_VAR (mem_req_rw_p)
|
||||
|
||||
|
||||
assign mem_req_rw_s = 0;
|
||||
assign mem_req_byteen_s = {LINE_SIZE{1'b1}};
|
||||
assign mem_req_data_s = '0;
|
||||
|
@ -527,10 +545,10 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
// per cycle: core_reads, core_writes
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
||||
|
||||
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_req;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_req;
|
||||
|
||||
|
||||
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
|
||||
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
|
||||
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
|
||||
|
@ -539,16 +557,16 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
`BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw);
|
||||
`BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw);
|
||||
|
||||
|
||||
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req);
|
||||
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req);
|
||||
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
|
||||
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
|
||||
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
|
||||
|
||||
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_req;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign perf_crsp_stall_per_req[i] = core_bus_if[i].rsp_valid && ~core_bus_if[i].rsp_ready;
|
||||
assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req);
|
||||
|
@ -561,7 +579,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
reg [`PERF_CTR_BITS-1:0] perf_write_misses;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
|
355
hw/rtl/cache/VX_cache_bank.sv
vendored
355
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -41,6 +41,9 @@ module VX_cache_bank #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -69,12 +72,13 @@ module VX_cache_bank #(
|
|||
// Core Request
|
||||
input wire core_req_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire core_req_rw,
|
||||
input wire [WORD_SEL_WIDTH-1:0] core_req_wsel,
|
||||
input wire [WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [`CS_WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag,
|
||||
input wire [REQ_SEL_WIDTH-1:0] core_req_idx,
|
||||
input wire core_req_rw, // write enable
|
||||
input wire [WORD_SEL_WIDTH-1:0] core_req_wsel, // select the word in a cacheline, e.g. word size = 4 bytes, cacheline size = 64 bytes, it should have log(64/4)= 4 bits
|
||||
input wire [WORD_SIZE-1:0] core_req_byteen,// which bytes in data to write
|
||||
input wire [`CS_WORD_WIDTH-1:0] core_req_data, // data to be written
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag, // identifier of the request (request id)
|
||||
input wire [REQ_SEL_WIDTH-1:0] core_req_idx, // index of the request in the core request array
|
||||
input wire core_req_flush, // flush enable
|
||||
output wire core_req_ready,
|
||||
|
||||
// Core Response
|
||||
|
@ -88,10 +92,10 @@ module VX_cache_bank #(
|
|||
output wire mem_req_valid,
|
||||
output wire [`CS_LINE_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire mem_req_rw,
|
||||
output wire [WORD_SEL_WIDTH-1:0] mem_req_wsel,
|
||||
output wire [WORD_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_WORD_WIDTH-1:0] mem_req_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
|
||||
output wire [LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, // index of the head entry in the mshr
|
||||
output wire mem_req_flush,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
|
@ -100,9 +104,9 @@ module VX_cache_bank #(
|
|||
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// initialization
|
||||
input wire init_enable,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] init_line_sel
|
||||
// flush
|
||||
input wire flush_valid,
|
||||
output wire flush_ready
|
||||
);
|
||||
|
||||
localparam PIPELINE_STAGES = 2;
|
||||
|
@ -128,23 +132,56 @@ module VX_cache_bank #(
|
|||
wire [MSHR_ADDR_WIDTH-1:0] replay_id;
|
||||
wire replay_ready;
|
||||
|
||||
wire is_init_st0;
|
||||
wire is_flush_st0, is_flush_st1;
|
||||
wire [NUM_WAYS-1:0] flush_way_st0;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
|
||||
wire rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] wsel_st0, wsel_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_st0, byteen_st1;
|
||||
wire [REQ_SEL_WIDTH-1:0] req_idx_st0, req_idx_st1;
|
||||
wire [TAG_WIDTH-1:0] tag_st0, tag_st1;
|
||||
wire rw_sel, rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
|
||||
wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1;
|
||||
wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1;
|
||||
wire [`CS_WORD_WIDTH-1:0] read_data_st1;
|
||||
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1;
|
||||
wire valid_sel, valid_st0, valid_st1;
|
||||
wire is_init_st0;
|
||||
wire is_creq_st0, is_creq_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire is_replay_st0, is_replay_st1;
|
||||
wire creq_flush_st0, creq_flush_st1;
|
||||
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1;
|
||||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
wire mshr_empty;
|
||||
|
||||
wire line_flush_valid;
|
||||
wire line_flush_init;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_flush_sel;
|
||||
wire [NUM_WAYS-1:0] line_flush_way;
|
||||
wire line_flush_ready;
|
||||
|
||||
// flush unit
|
||||
VX_bank_flush #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WRITEBACK (WRITEBACK)
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.flush_in_valid (flush_valid),
|
||||
.flush_in_ready (flush_ready),
|
||||
.flush_out_init (line_flush_init),
|
||||
.flush_out_valid (line_flush_valid),
|
||||
.flush_out_line (line_flush_sel),
|
||||
.flush_out_way (line_flush_way),
|
||||
.flush_out_ready (line_flush_ready),
|
||||
.mshr_empty (mshr_empty)
|
||||
);
|
||||
|
||||
wire rdw_hazard_st0;
|
||||
reg rdw_hazard_st1;
|
||||
|
@ -154,76 +191,77 @@ module VX_cache_bank #(
|
|||
// inputs arbitration:
|
||||
// mshr replay has highest priority to maximize utilization since there is no miss.
|
||||
// handle memory responses next to prevent deadlock with potential memory request from a miss.
|
||||
wire replay_grant = ~init_enable;
|
||||
// flush has precedence over core requests to ensure that the cache is in a consistent state.
|
||||
wire replay_grant = ~line_flush_init;
|
||||
wire replay_enable = replay_grant && replay_valid;
|
||||
|
||||
wire fill_grant = ~init_enable && ~replay_enable;
|
||||
wire fill_grant = ~line_flush_init && ~replay_enable;
|
||||
wire fill_enable = fill_grant && mem_rsp_valid;
|
||||
|
||||
wire creq_grant = ~init_enable && ~replay_enable && ~fill_enable;
|
||||
wire flush_grant = ~line_flush_init && ~replay_enable && ~fill_enable;
|
||||
wire flush_enable = flush_grant && line_flush_valid;
|
||||
|
||||
wire creq_grant = ~line_flush_init && ~replay_enable && ~fill_enable && ~flush_enable;
|
||||
wire creq_enable = creq_grant && core_req_valid;
|
||||
|
||||
assign replay_ready = replay_grant
|
||||
&& ~rdw_hazard_st0
|
||||
&& ~pipe_stall;
|
||||
&& ~rdw_hazard_st0
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign mem_rsp_ready = fill_grant
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign core_req_ready = creq_grant
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~mshr_alm_full
|
||||
&& ~pipe_stall;
|
||||
assign line_flush_ready = flush_grant
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~pipe_stall;
|
||||
|
||||
wire init_fire = init_enable;
|
||||
assign core_req_ready = creq_grant
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~mshr_alm_full
|
||||
&& ~pipe_stall;
|
||||
|
||||
wire init_fire = line_flush_init;
|
||||
wire replay_fire = replay_valid && replay_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
wire flush_fire = line_flush_valid && line_flush_ready;
|
||||
wire core_req_fire = core_req_valid && core_req_ready;
|
||||
|
||||
wire [TAG_WIDTH-1:0] mshr_creq_tag = replay_enable ? replay_tag : core_req_tag;
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
|
||||
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
|
||||
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
|
||||
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
|
||||
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
|
||||
assign tag_sel = replay_valid ? replay_tag : core_req_tag;
|
||||
|
||||
assign addr_sel = (line_flush_init | line_flush_valid) ? `CS_LINE_ADDR_WIDTH'(line_flush_sel) :
|
||||
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data);
|
||||
end else begin
|
||||
assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0];
|
||||
`UNUSED_VAR (core_req_data)
|
||||
`UNUSED_VAR (replay_data)
|
||||
end
|
||||
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin
|
||||
assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel
|
||||
end
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign req_uuid_sel = mshr_creq_tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign req_uuid_sel = 0;
|
||||
end
|
||||
|
||||
`UNUSED_VAR (mshr_creq_tag)
|
||||
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || core_req_fire;
|
||||
|
||||
assign addr_sel = init_enable ? `CS_LINE_ADDR_WIDTH'(init_line_sel) :
|
||||
(replay_valid ? replay_addr :
|
||||
(mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
|
||||
assign data_sel[`CS_WORD_WIDTH-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : (replay_valid ? replay_data : core_req_data);
|
||||
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin
|
||||
assign data_sel[i] = mem_rsp_data[i];
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({
|
||||
valid_sel,
|
||||
init_enable,
|
||||
replay_enable,
|
||||
fill_enable,
|
||||
creq_enable,
|
||||
addr_sel,
|
||||
data_sel,
|
||||
replay_valid ? replay_rw : core_req_rw,
|
||||
replay_valid ? replay_byteen : core_req_byteen,
|
||||
replay_valid ? replay_wsel : core_req_wsel,
|
||||
replay_valid ? replay_idx : core_req_idx,
|
||||
replay_valid ? replay_tag : core_req_tag,
|
||||
replay_id
|
||||
}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_creq_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
.data_in ({valid_sel, line_flush_init, replay_enable, fill_enable, flush_enable, creq_enable, core_req_flush, line_flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
|
@ -232,20 +270,24 @@ module VX_cache_bank #(
|
|||
assign req_uuid_st0 = 0;
|
||||
end
|
||||
|
||||
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_init_st0 = valid_st0 && is_init_st0;
|
||||
wire do_flush_st0 = valid_st0 && is_flush_st0;
|
||||
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
|
||||
wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0;
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_init_st0);
|
||||
|
||||
wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0;
|
||||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
||||
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0, tag_matches_st1;
|
||||
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
||||
wire [NUM_WAYS-1:0] repl_way_st0;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] repl_tag_st0;
|
||||
|
||||
`RESET_RELAY (tag_reset, reset);
|
||||
|
||||
VX_cache_tags #(
|
||||
.INSTANCE_ID(INSTANCE_ID),
|
||||
.INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
|
@ -261,30 +303,37 @@ module VX_cache_bank #(
|
|||
|
||||
.stall (pipe_stall),
|
||||
|
||||
// read/Fill
|
||||
// init/fill/lookup/flush
|
||||
.init (do_init_st0 || do_flush_st0),
|
||||
.fill (do_fill_st0),
|
||||
.lookup (do_lookup_st0),
|
||||
.line_addr (addr_st0),
|
||||
.fill (do_fill_st0),
|
||||
.init (do_init_st0),
|
||||
.way_sel (way_sel_st0),
|
||||
.tag_matches(tag_matches_st0)
|
||||
.tag_matches(tag_matches_st0),
|
||||
|
||||
// replacement
|
||||
.repl_way (repl_way_st0),
|
||||
.repl_tag (repl_tag_st0)
|
||||
);
|
||||
|
||||
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
|
||||
|
||||
assign way_sel_st0 = is_fill_st0 ? repl_way_st0 : (is_flush_st0 ? flush_way_st0 : tag_matches_st0);
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_r_st0 = (is_fill_st0 || is_flush_st0) ? {repl_tag_st0, addr_st0[`CS_LINE_SEL_BITS-1:0]} : addr_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + NUM_WAYS + 1),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_st0, is_replay_st0, is_fill_st0, is_creq_st0, rw_st0, addr_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, tag_matches_st0, way_sel_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_replay_st1, is_fill_st1, is_creq_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, tag_matches_st1, way_sel_st1, mshr_pending_st1})
|
||||
.data_in ({valid_st0, is_flush_st0, is_replay_st0, is_fill_st0, is_creq_st0, creq_flush_st0, rw_st0, addr_r_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_flush_st1, is_replay_st1, is_fill_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
// we have a tag hit
|
||||
wire is_hit_st1 = (| tag_matches_st1);
|
||||
wire is_hit_st1 = (| way_sel_st1);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
|
@ -292,37 +341,62 @@ module VX_cache_bank #(
|
|||
assign req_uuid_st1 = 0;
|
||||
end
|
||||
|
||||
wire do_creq_rd_st1 = valid_st1 && is_creq_st1 && ~rw_st1;
|
||||
wire do_creq_wr_st1 = valid_st1 && is_creq_st1 && rw_st1;
|
||||
wire is_read_st1 = is_creq_st1 && ~rw_st1;
|
||||
wire is_write_st1 = is_creq_st1 && rw_st1;
|
||||
wire do_creq_rd_st1 = valid_st1 && is_read_st1;
|
||||
wire do_creq_wr_st1 = valid_st1 && is_write_st1;
|
||||
wire do_fill_st1 = valid_st1 && is_fill_st1;
|
||||
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
|
||||
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
|
||||
|
||||
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
|
||||
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
|
||||
|
||||
wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1;
|
||||
wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1;
|
||||
|
||||
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
|
||||
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
|
||||
|
||||
wire do_flush_st1 = valid_st1 && is_flush_st1;
|
||||
|
||||
`UNUSED_VAR (do_write_miss_st1)
|
||||
|
||||
// ensure mshr replay always get a hit
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: invalid mshr replay"));
|
||||
|
||||
// detect BRAM's read-during-write hazard
|
||||
assign rdw_hazard_st0 = do_fill_st0; // after a fill
|
||||
always @(posedge clk) begin
|
||||
rdw_hazard_st1 <= (do_creq_rd_st0 && do_write_hit_st1 && (addr_st0 == addr_st1))
|
||||
&& ~rdw_hazard_st1; // after a write to same address
|
||||
assign rdw_hazard_st0 = do_fill_st0; // stall cycle after a fill
|
||||
wire rdw_case1 = do_cache_rd_st0 && do_cache_wr_st1 && (addr_st0 == addr_st1); // standard cache access
|
||||
wire rdw_case2 = WRITEBACK && (do_flush_st0 || do_fill_st0) && do_cache_wr_st1; // a writeback can evict preceeding write
|
||||
always @(posedge clk) begin // after a write to same address
|
||||
rdw_hazard_st1 <= (rdw_case1 || rdw_case2)
|
||||
&& ~rdw_hazard_st1; // invalidate if pipeline stalled to avoid repeats
|
||||
end
|
||||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0];
|
||||
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
|
||||
wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1;
|
||||
wire [LINE_SIZE-1:0] write_byteen_st1;
|
||||
|
||||
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
|
||||
wire [LINE_SIZE-1:0] dirty_byteen_st1;
|
||||
wire dirty_valid_st1;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
reg [LINE_SIZE-1:0] write_byteen_r;
|
||||
always @(*) begin
|
||||
write_byteen_r = '0;
|
||||
write_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1;
|
||||
end
|
||||
assign write_byteen_st1 = write_byteen_r;
|
||||
end else begin
|
||||
assign write_byteen_st1 = byteen_st1;
|
||||
end
|
||||
|
||||
`RESET_RELAY (data_reset, reset);
|
||||
|
||||
VX_cache_data #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
|
@ -330,6 +404,7 @@ module VX_cache_bank #(
|
|||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
) cache_data (
|
||||
.clk (clk),
|
||||
|
@ -339,23 +414,38 @@ module VX_cache_bank #(
|
|||
|
||||
.stall (pipe_stall),
|
||||
|
||||
.read (do_read_hit_st1 || do_replay_rd_st1),
|
||||
.fill (do_fill_st1),
|
||||
.write (do_write_hit_st1 || do_replay_wr_st1),
|
||||
.way_sel (way_sel_st1 | tag_matches_st1),
|
||||
.read (do_cache_rd_st1),
|
||||
.fill (do_fill_st1 && ~rdw_hazard_st1),
|
||||
.flush (do_flush_st1),
|
||||
.write (do_cache_wr_st1),
|
||||
.way_sel (way_sel_st1),
|
||||
.line_addr (addr_st1),
|
||||
.wsel (wsel_st1),
|
||||
.byteen (byteen_st1),
|
||||
.fill_data (fill_data_st1),
|
||||
.write_data (write_data_st1),
|
||||
.read_data (read_data_st1)
|
||||
.write_byteen(write_byteen_st1),
|
||||
.read_data (read_data_st1),
|
||||
.dirty_valid(dirty_valid_st1),
|
||||
.dirty_data (dirty_data_st1),
|
||||
.dirty_byteen(dirty_byteen_st1)
|
||||
);
|
||||
|
||||
wire [MSHR_SIZE-1:0] mshr_matches_st0;
|
||||
wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0;
|
||||
wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0;
|
||||
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall;
|
||||
wire mshr_lookup_st0 = mshr_allocate_st0;
|
||||
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall;
|
||||
wire mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
|
||||
|
||||
// release allocated mshr entry if we had a hit
|
||||
wire mshr_release_st1;
|
||||
if (WRITEBACK) begin
|
||||
assign mshr_release_st1 = is_hit_st1;
|
||||
end else begin
|
||||
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
|
||||
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
|
||||
// this can happen when writes are sent late, when the fill was already in flight.
|
||||
assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
|
||||
end
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (MSHR_SIZE)
|
||||
|
@ -364,15 +454,17 @@ module VX_cache_bank #(
|
|||
.reset (reset),
|
||||
.incr (core_req_fire),
|
||||
.decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)),
|
||||
.empty (mshr_empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (mshr_alm_full),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
`RESET_RELAY (mshr_reset, reset);
|
||||
|
||||
VX_cache_mshr #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
|
@ -412,7 +504,8 @@ module VX_cache_bank #(
|
|||
// lookup
|
||||
.lookup_valid (mshr_lookup_st0),
|
||||
.lookup_addr (addr_st0),
|
||||
.lookup_matches (mshr_matches_st0),
|
||||
.lookup_pending (mshr_lookup_pending_st0),
|
||||
.lookup_rw (mshr_lookup_rw_st0),
|
||||
|
||||
// finalize
|
||||
.finalize_valid (mshr_finalize_st1),
|
||||
|
@ -422,10 +515,12 @@ module VX_cache_bank #(
|
|||
.finalize_prev (mshr_prev_st1)
|
||||
);
|
||||
|
||||
// ignore allocated id from mshr matches
|
||||
// check if there are pending requests to same line in the MSHR
|
||||
wire [MSHR_SIZE-1:0] lookup_matches;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
|
||||
assign lookup_matches[i] = (i != mshr_alloc_id_st0) && mshr_matches_st0[i];
|
||||
assign lookup_matches[i] = mshr_lookup_pending_st0[i]
|
||||
&& (i != mshr_alloc_id_st0) // exclude current mshr id
|
||||
&& (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough
|
||||
end
|
||||
assign mshr_pending_st0 = (| lookup_matches);
|
||||
|
||||
|
@ -436,7 +531,7 @@ module VX_cache_bank #(
|
|||
wire [REQ_SEL_WIDTH-1:0] crsp_queue_idx;
|
||||
wire [TAG_WIDTH-1:0] crsp_queue_tag;
|
||||
|
||||
assign crsp_queue_valid = do_read_hit_st1 || do_replay_rd_st1;
|
||||
assign crsp_queue_valid = do_cache_rd_st1;
|
||||
assign crsp_queue_idx = req_idx_st1;
|
||||
assign crsp_queue_data = read_data_st1;
|
||||
assign crsp_queue_tag = tag_st1;
|
||||
|
@ -463,29 +558,40 @@ module VX_cache_bank #(
|
|||
// schedule memory request
|
||||
|
||||
wire mreq_queue_push, mreq_queue_pop, mreq_queue_empty;
|
||||
wire [`CS_WORD_WIDTH-1:0] mreq_queue_data;
|
||||
wire [WORD_SIZE-1:0] mreq_queue_byteen;
|
||||
wire [WORD_SEL_WIDTH-1:0] mreq_queue_wsel;
|
||||
wire [`CS_LINE_WIDTH-1:0] mreq_queue_data;
|
||||
wire [LINE_SIZE-1:0] mreq_queue_byteen;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mreq_queue_id;
|
||||
wire mreq_queue_rw;
|
||||
wire mreq_queue_flush;
|
||||
|
||||
assign mreq_queue_push = (do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1;
|
||||
wire is_evict_st1 = (is_fill_st1 || is_flush_st1) && dirty_valid_st1;
|
||||
wire do_writeback_st1 = valid_st1 && is_evict_st1;
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
|
||||
if (WRITEBACK) begin
|
||||
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|
||||
|| do_writeback_st1)
|
||||
&& ~rdw_hazard_st1;
|
||||
end else begin
|
||||
`UNUSED_VAR (dirty_valid_st1)
|
||||
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1)
|
||||
&& ~rdw_hazard_st1;
|
||||
end
|
||||
|
||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||
|
||||
assign mreq_queue_rw = WRITE_ENABLE && rw_st1;
|
||||
assign mreq_queue_rw = WRITE_ENABLE && (WRITEBACK ? is_evict_st1 : rw_st1);
|
||||
assign mreq_queue_addr = addr_st1;
|
||||
assign mreq_queue_id = mshr_id_st1;
|
||||
assign mreq_queue_wsel = wsel_st1;
|
||||
assign mreq_queue_byteen = byteen_st1;
|
||||
assign mreq_queue_data = write_data_st1;
|
||||
assign mreq_queue_data = is_write_st1 ? write_data_st1 : dirty_data_st1;
|
||||
assign mreq_queue_byteen = is_write_st1 ? write_byteen_st1 : dirty_byteen_st1;
|
||||
assign mreq_queue_flush = creq_flush_st1;
|
||||
|
||||
`RESET_RELAY (mreq_queue_reset, reset);
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH),
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1),
|
||||
.DEPTH (MREQ_SIZE),
|
||||
.ALM_FULL (MREQ_SIZE-PIPELINE_STAGES),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
|
@ -494,8 +600,8 @@ module VX_cache_bank #(
|
|||
.reset (mreq_queue_reset),
|
||||
.push (mreq_queue_push),
|
||||
.pop (mreq_queue_pop),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_wsel, mreq_queue_data}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_wsel, mem_req_data}),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_data, mem_req_flush}),
|
||||
.empty (mreq_queue_empty),
|
||||
.alm_full (mreq_queue_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
|
@ -515,35 +621,34 @@ module VX_cache_bank #(
|
|||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
|
||||
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire);
|
||||
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid || line_flush_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || line_flush_valid);
|
||||
always @(posedge clk) begin
|
||||
if (pipeline_stall) begin
|
||||
`TRACE(3, ("%d: *** %s-bank%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, BANK_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full));
|
||||
end
|
||||
if (init_enable) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d init: addr=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(init_line_sel, BANK_ID)));
|
||||
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw_st0=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard_st0));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
|
||||
`TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
|
||||
end
|
||||
if (replay_fire) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
|
||||
`TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
|
||||
end
|
||||
if (core_req_fire) begin
|
||||
if (core_req_rw)
|
||||
`TRACE(2, ("%d: %s-bank%0d core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
|
||||
`TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
|
||||
else
|
||||
`TRACE(2, ("%d: %s-bank%0d core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
|
||||
`TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
|
||||
end
|
||||
if (crsp_queue_fire) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
|
||||
`TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
|
||||
end
|
||||
if (mreq_queue_push) begin
|
||||
if (do_creq_wr_st1)
|
||||
`TRACE(2, ("%d: %s-bank%0d writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
|
||||
if (do_creq_wr_st1 && !WRITEBACK)
|
||||
`TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
|
||||
else if (do_writeback_st1)
|
||||
`TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%b, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data));
|
||||
else
|
||||
`TRACE(2, ("%d: %s-bank%0d fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1));
|
||||
`TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
91
hw/rtl/cache/VX_cache_bypass.sv
vendored
91
hw/rtl/cache/VX_cache_bypass.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -18,16 +18,16 @@ module VX_cache_bypass #(
|
|||
parameter TAG_SEL_IDX = 0,
|
||||
|
||||
parameter PASSTHRU = 0,
|
||||
parameter NC_ENABLE = 0,
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
parameter WORD_SIZE = 1,
|
||||
parameter LINE_SIZE = 1,
|
||||
parameter LINE_SIZE = 1,
|
||||
|
||||
parameter CORE_ADDR_WIDTH = 1,
|
||||
|
||||
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
parameter MEM_ADDR_WIDTH = 1,
|
||||
|
||||
parameter MEM_ADDR_WIDTH = 1,
|
||||
parameter MEM_TAG_IN_WIDTH = 1,
|
||||
parameter MEM_TAG_OUT_WIDTH = 1,
|
||||
|
||||
|
@ -35,9 +35,9 @@ module VX_cache_bypass #(
|
|||
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
parameter MEM_OUT_BUF = 0,
|
||||
|
||||
|
||||
parameter CORE_DATA_WIDTH = WORD_SIZE * 8
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -71,40 +71,39 @@ module VX_cache_bypass #(
|
|||
|
||||
wire core_req_nc_valid;
|
||||
wire [NUM_REQS-1:0] core_req_nc_valids;
|
||||
wire [NUM_REQS-1:0] core_req_nc_idxs;
|
||||
wire [NUM_REQS-1:0] core_req_nc_idxs;
|
||||
wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire core_req_nc_ready;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (PASSTHRU != 0) begin
|
||||
assign core_req_nc_idxs[i] = 1'b1;
|
||||
end else if (NC_ENABLE) begin
|
||||
assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO];
|
||||
end else begin
|
||||
assign core_req_nc_idxs[i] = 1'b0;
|
||||
end
|
||||
end
|
||||
assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i];
|
||||
end
|
||||
end
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.TYPE (PASSTHRU ? "R" : "P"),
|
||||
.LOCK_ENABLE (1)
|
||||
.TYPE (PASSTHRU ? "R" : "P")
|
||||
) core_req_nc_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (core_req_nc_valids),
|
||||
.reset (reset),
|
||||
.requests (core_req_nc_valids),
|
||||
.grant_index (core_req_nc_idx),
|
||||
.grant_onehot (core_req_nc_sel),
|
||||
.grant_valid (core_req_nc_valid),
|
||||
.grant_unlock (core_req_nc_ready)
|
||||
.grant_ready (core_req_nc_ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i];
|
||||
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
|
||||
assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i])
|
||||
assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i])
|
||||
: core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
|
@ -118,7 +117,7 @@ module VX_cache_bypass #(
|
|||
wire [`CS_LINE_WIDTH-1:0] mem_req_out_data;
|
||||
wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag;
|
||||
wire mem_req_out_ready;
|
||||
|
||||
|
||||
wire core_req_nc_sel_rw;
|
||||
wire [WORD_SIZE-1:0] core_req_nc_sel_byteen;
|
||||
wire [CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr;
|
||||
|
@ -129,22 +128,22 @@ module VX_cache_bypass #(
|
|||
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_nc_mux_in[i] = {
|
||||
core_bus_in_if[i].req_data.rw,
|
||||
core_bus_in_if[i].req_data.rw,
|
||||
core_bus_in_if[i].req_data.byteen,
|
||||
core_bus_in_if[i].req_data.addr,
|
||||
core_bus_in_if[i].req_data.atype,
|
||||
core_bus_in_if[i].req_data.data,
|
||||
core_bus_in_if[i].req_data.tag
|
||||
core_bus_in_if[i].req_data.tag
|
||||
};
|
||||
end
|
||||
|
||||
|
||||
assign {
|
||||
core_req_nc_sel_rw,
|
||||
core_req_nc_sel_byteen,
|
||||
core_req_nc_sel_addr,
|
||||
core_req_nc_sel_atype,
|
||||
core_req_nc_sel_data,
|
||||
core_req_nc_sel_tag
|
||||
core_req_nc_sel_tag
|
||||
} = core_req_nc_mux_in[core_req_nc_idx];
|
||||
|
||||
assign core_req_nc_ready = ~mem_bus_in_if.req_valid && mem_req_out_ready;
|
||||
|
@ -157,11 +156,11 @@ module VX_cache_bypass #(
|
|||
wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass;
|
||||
|
||||
wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0];
|
||||
|
||||
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r;
|
||||
reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
|
||||
|
||||
|
||||
wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0];
|
||||
|
||||
always @(*) begin
|
||||
|
@ -176,7 +175,7 @@ module VX_cache_bypass #(
|
|||
assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r;
|
||||
if (NUM_REQS > 1) begin
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id});
|
||||
end else begin
|
||||
end else begin
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id});
|
||||
end
|
||||
end else begin
|
||||
|
@ -189,7 +188,7 @@ module VX_cache_bypass #(
|
|||
end
|
||||
end
|
||||
|
||||
wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass;
|
||||
wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass};
|
||||
|
@ -202,7 +201,7 @@ module VX_cache_bypass #(
|
|||
`UNUSED_VAR (mem_bus_in_if.req_data.tag)
|
||||
end else begin
|
||||
if (NC_ENABLE) begin
|
||||
VX_bits_insert #(
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_OUT_WIDTH-1),
|
||||
.S (1),
|
||||
.POS (TAG_SEL_IDX)
|
||||
|
@ -213,8 +212,8 @@ module VX_cache_bypass #(
|
|||
);
|
||||
end else begin
|
||||
assign mem_req_out_tag = mem_bus_in_if.req_data.tag;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_bus_in_if.req_ready = mem_req_out_ready;
|
||||
|
||||
|
@ -225,11 +224,11 @@ module VX_cache_bypass #(
|
|||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_out_valid),
|
||||
.ready_in (mem_req_out_ready),
|
||||
.valid_in (mem_req_out_valid),
|
||||
.ready_in (mem_req_out_ready),
|
||||
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}),
|
||||
.data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.atype, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}),
|
||||
.valid_out (mem_bus_out_if.req_valid),
|
||||
.valid_out (mem_bus_out_if.req_valid),
|
||||
.ready_out (mem_bus_out_if.req_ready)
|
||||
);
|
||||
|
||||
|
@ -253,7 +252,7 @@ module VX_cache_bypass #(
|
|||
|
||||
wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc;
|
||||
|
||||
VX_bits_remove #(
|
||||
VX_bits_remove #(
|
||||
.N (MEM_TAG_OUT_WIDTH),
|
||||
.S (NC_ENABLE),
|
||||
.POS (TAG_SEL_IDX)
|
||||
|
@ -265,10 +264,10 @@ module VX_cache_bypass #(
|
|||
wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx;
|
||||
if (NUM_REQS > 1) begin
|
||||
assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS];
|
||||
end else begin
|
||||
end else begin
|
||||
assign rsp_idx = 1'b0;
|
||||
end
|
||||
|
||||
|
||||
reg [NUM_REQS-1:0] rsp_nc_valid_r;
|
||||
always @(*) begin
|
||||
rsp_nc_valid_r = '0;
|
||||
|
@ -277,13 +276,13 @@ module VX_cache_bypass #(
|
|||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i];
|
||||
assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i];
|
||||
assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i];
|
||||
end
|
||||
|
||||
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
|
||||
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ?
|
||||
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ?
|
||||
core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
|
@ -306,7 +305,7 @@ module VX_cache_bypass #(
|
|||
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2;
|
||||
end else begin
|
||||
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
@ -320,7 +319,7 @@ module VX_cache_bypass #(
|
|||
.valid_in (core_rsp_in_valid[i]),
|
||||
.ready_in (core_rsp_in_ready[i]),
|
||||
.data_in ({core_rsp_in_data[i], core_rsp_in_tag[i]}),
|
||||
.data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}),
|
||||
.data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus_in_if[i].rsp_valid),
|
||||
.ready_out (core_bus_in_if[i].rsp_ready)
|
||||
);
|
||||
|
@ -341,7 +340,7 @@ module VX_cache_bypass #(
|
|||
assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data;
|
||||
assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc;
|
||||
end
|
||||
|
||||
|
||||
wire [NUM_REQS-1:0] core_rsp_out_valid;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid;
|
||||
|
|
35
hw/rtl/cache/VX_cache_cluster.sv
vendored
35
hw/rtl/cache/VX_cache_cluster.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -24,20 +24,20 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 16384,
|
||||
parameter CACHE_SIZE = 16384,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 4,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
|
@ -46,6 +46,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -60,7 +63,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -74,17 +77,16 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
);
|
||||
localparam NUM_CACHES = `UP(NUM_UNITS);
|
||||
localparam PASSTHRU = (NUM_UNITS == 0);
|
||||
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
|
||||
`STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter"))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
cache_perf_t perf_cache_tmp[1], perf_cache_unit[NUM_CACHES];
|
||||
`PERF_CACHE_ADD (perf_cache_tmp, perf_cache_unit, 1, NUM_CACHES)
|
||||
assign cache_perf = perf_cache_tmp[0];
|
||||
cache_perf_t perf_cache_unit[NUM_CACHES];
|
||||
`PERF_CACHE_ADD (cache_perf, perf_cache_unit, NUM_CACHES)
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if #(
|
||||
|
@ -97,8 +99,6 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (ARB_TAG_WIDTH)
|
||||
) arb_core_bus_if[NUM_CACHES * NUM_REQS]();
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
|
@ -114,6 +114,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
|
||||
end
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (NUM_INPUTS),
|
||||
.NUM_OUTPUTS (NUM_CACHES),
|
||||
|
@ -135,9 +137,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (cache_reset, reset);
|
||||
for (genvar i = 0; i < NUM_CACHES; ++i) begin : caches
|
||||
|
||||
for (genvar i = 0; i < NUM_CACHES; ++i) begin
|
||||
`RESET_RELAY (cache_reset, reset);
|
||||
|
||||
VX_cache_wrap #(
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)),
|
||||
|
@ -152,6 +154,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (ARB_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
|
|
128
hw/rtl/cache/VX_cache_data.sv
vendored
128
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,17 +17,19 @@ module VX_cache_data #(
|
|||
parameter `STRING INSTANCE_ID= "",
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0
|
||||
) (
|
||||
|
@ -41,59 +43,100 @@ module VX_cache_data #(
|
|||
input wire stall,
|
||||
|
||||
input wire read,
|
||||
input wire fill,
|
||||
input wire fill,
|
||||
input wire flush,
|
||||
input wire write,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel,
|
||||
input wire [WORD_SIZE-1:0] byteen,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
|
||||
input wire [`CS_WORD_WIDTH-1:0] write_data,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] write_data,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
|
||||
input wire [NUM_WAYS-1:0] way_sel,
|
||||
|
||||
output wire [`CS_WORD_WIDTH-1:0] read_data
|
||||
output wire [`CS_WORD_WIDTH-1:0] read_data,
|
||||
output wire dirty_valid,
|
||||
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
|
||||
output wire [LINE_SIZE-1:0] dirty_byteen
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_PARAM (WORD_SIZE)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (stall)
|
||||
`UNUSED_VAR (line_addr)
|
||||
`UNUSED_VAR (read)
|
||||
`UNUSED_VAR (flush)
|
||||
|
||||
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
|
||||
|
||||
if (WRITEBACK) begin
|
||||
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0][LINE_SIZE-1:0] dirty_bytes_r;
|
||||
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0] dirty_blocks_r;
|
||||
|
||||
wire [`CLOG2(`CS_LINES_PER_BANK * NUM_WAYS)-1:0] way_addr;
|
||||
if (NUM_WAYS > 1) begin
|
||||
assign way_addr = {line_sel, way_idx};
|
||||
end else begin
|
||||
assign way_addr = line_sel;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (fill) begin
|
||||
dirty_bytes_r[way_addr] <= '0;
|
||||
end else if (write) begin
|
||||
dirty_bytes_r[way_addr] <= dirty_bytes_r[way_addr] | write_byteen;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer i = 0; i < `CS_LINES_PER_BANK * NUM_WAYS; ++i) begin
|
||||
dirty_blocks_r[i] <= 0;
|
||||
end
|
||||
end else begin
|
||||
if (fill) begin
|
||||
dirty_blocks_r[way_addr] <= 0;
|
||||
end else if (write) begin
|
||||
dirty_blocks_r[way_addr] <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign dirty_byteen = dirty_bytes_r[way_addr];
|
||||
assign dirty_valid = dirty_blocks_r[way_addr];
|
||||
end else begin
|
||||
assign dirty_byteen = '0;
|
||||
assign dirty_valid = 0;
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last.
|
||||
// this allows converting way index to binary in parallel with BRAM read.
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
|
||||
wire [BYTEENW-1:0] wren;
|
||||
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
|
||||
reg [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata_r;
|
||||
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
|
||||
|
||||
always @(*) begin
|
||||
wdata_r = {`CS_WORDS_PER_LINE{write_data}};
|
||||
wren_r = '0;
|
||||
wren_r[wsel] = byteen;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
assign wdata[i] = (fill || !WRITE_ENABLE) ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{write_data[i]}};
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last
|
||||
// this allows performing onehot encoding of the way index in parallel with BRAM read.
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
assign wdata[i] = fill ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{wdata_r[i]}};
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign wren_w[i][j] = (fill ? {WORD_SIZE{1'b1}} : wren_r[i])
|
||||
& {WORD_SIZE{((NUM_WAYS == 1) || way_sel[j])}};
|
||||
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
|
||||
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
|
||||
end
|
||||
end
|
||||
assign wren = wren_w;
|
||||
end else begin
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (byteen)
|
||||
`UNUSED_VAR (write_byteen)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign wdata = fill_data;
|
||||
assign wren = fill;
|
||||
end
|
||||
|
||||
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (NUM_WAYS)
|
||||
|
@ -105,8 +148,6 @@ module VX_cache_data #(
|
|||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
|
@ -119,34 +160,41 @@ module VX_cache_data #(
|
|||
.wren (wren),
|
||||
.addr (line_sel),
|
||||
.wdata (wdata),
|
||||
.rdata (rdata)
|
||||
.rdata (rdata)
|
||||
);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
assign per_way_rdata = rdata[wsel];
|
||||
end else begin
|
||||
`UNUSED_VAR (wsel)
|
||||
assign per_way_rdata = rdata;
|
||||
end
|
||||
|
||||
end
|
||||
assign read_data = per_way_rdata[way_idx];
|
||||
|
||||
`UNUSED_VAR (stall)
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] dirty_data_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign dirty_data_w[j][i] = rdata[i][j];
|
||||
end
|
||||
end
|
||||
assign dirty_data = dirty_data_w[way_idx];
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
always @(posedge clk) begin
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d data-fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b, byteen=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_valid, dirty_byteen));
|
||||
end
|
||||
if (read && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d data-read: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, read_data, req_uuid));
|
||||
end
|
||||
`TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid));
|
||||
end
|
||||
if (write && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d data-write: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, byteen, write_data, req_uuid));
|
||||
end
|
||||
end
|
||||
`TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
26
hw/rtl/cache/VX_cache_define.vh
vendored
26
hw/rtl/cache/VX_cache_define.vh
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,7 +14,7 @@
|
|||
`ifndef VX_CACHE_DEFINE_VH
|
||||
`define VX_CACHE_DEFINE_VH
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_define.vh"
|
||||
|
||||
`define CS_REQ_SEL_BITS `CLOG2(NUM_REQS)
|
||||
|
||||
|
@ -50,7 +50,7 @@
|
|||
`define CS_TAG_SEL_ADDR_START (1+`CS_LINE_SEL_ADDR_END)
|
||||
`define CS_TAG_SEL_ADDR_END (`CS_WORD_ADDR_WIDTH-1)
|
||||
|
||||
`define CS_LINE_TAG_ADDR(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS]
|
||||
`define CS_LINE_ADDR_TAG(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -64,14 +64,14 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define PERF_CACHE_ADD(dst, src, dcount, scount) \
|
||||
`PERF_COUNTER_ADD (dst, src, reads, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, writes, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, read_misses, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, write_misses, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, bank_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, mshr_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1))
|
||||
`define PERF_CACHE_ADD(dst, src, count) \
|
||||
`PERF_COUNTER_ADD (dst, src, reads, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, writes, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, read_misses, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, write_misses, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, bank_stalls, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, mshr_stalls, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, count, (count > 1))
|
||||
|
||||
`endif // VX_CACHE_DEFINE_VH
|
||||
|
|
154
hw/rtl/cache/VX_cache_flush.sv
vendored
Normal file
154
hw/rtl/cache/VX_cache_flush.sv
vendored
Normal file
|
@ -0,0 +1,154 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_flush #(
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Bank select latency
|
||||
parameter BANK_SEL_LATENCY = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
|
||||
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
|
||||
input wire [NUM_BANKS-1:0] bank_req_fire,
|
||||
output wire [NUM_BANKS-1:0] flush_valid,
|
||||
input wire [NUM_BANKS-1:0] flush_ready
|
||||
);
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_WAIT = 1;
|
||||
localparam STATE_FLUSH = 2;
|
||||
localparam STATE_DONE = 3;
|
||||
|
||||
// track in-flight core requests
|
||||
|
||||
wire no_inflight_reqs;
|
||||
|
||||
if (BANK_SEL_LATENCY != 0) begin
|
||||
|
||||
localparam NUM_REQS_W = `CLOG2(NUM_REQS+1);
|
||||
localparam NUM_BANKS_W = `CLOG2(NUM_BANKS+1);
|
||||
|
||||
wire [NUM_REQS-1:0] core_bus_out_fire;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_bus_out_fire[i] = core_bus_out_if[i].req_valid && core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
wire [NUM_REQS_W-1:0] core_bus_out_cnt;
|
||||
wire [NUM_BANKS_W-1:0] bank_req_cnt;
|
||||
|
||||
`POP_COUNT(core_bus_out_cnt, core_bus_out_fire);
|
||||
`POP_COUNT(bank_req_cnt, bank_req_fire);
|
||||
`UNUSED_VAR (core_bus_out_cnt)
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (BANK_SEL_LATENCY * NUM_BANKS),
|
||||
.INCRW (NUM_BANKS_W),
|
||||
.DECRW (NUM_BANKS_W)
|
||||
) pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (NUM_BANKS_W'(core_bus_out_cnt)),
|
||||
.decr (bank_req_cnt),
|
||||
.empty (no_inflight_reqs),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
end else begin
|
||||
assign no_inflight_reqs = 0;
|
||||
`UNUSED_VAR (bank_req_fire)
|
||||
end
|
||||
|
||||
|
||||
reg [1:0] state, state_n;
|
||||
reg [NUM_BANKS-1:0] flush_done, flush_done_n;
|
||||
|
||||
wire [NUM_REQS-1:0] flush_req_mask;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
|
||||
end
|
||||
wire flush_req_enable = (| flush_req_mask);
|
||||
|
||||
reg [NUM_REQS-1:0] lock_released, lock_released_n;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
wire input_enable = ~flush_req_enable || lock_released[i];
|
||||
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && input_enable;
|
||||
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
|
||||
assign core_bus_in_if[i].req_ready = core_bus_out_if[i].req_ready && input_enable;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_bus_in_if[i].rsp_valid = core_bus_out_if[i].rsp_valid;
|
||||
assign core_bus_in_if[i].rsp_data = core_bus_out_if[i].rsp_data;
|
||||
assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] core_bus_out_ready;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
state_n = state;
|
||||
flush_done_n = flush_done;
|
||||
lock_released_n = lock_released;
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (flush_req_enable) begin
|
||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT : STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_WAIT: begin
|
||||
if (no_inflight_reqs) begin
|
||||
state_n = STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
flush_done_n = flush_done | flush_ready;
|
||||
if (flush_done_n == 0) begin
|
||||
state_n = STATE_DONE;
|
||||
lock_released_n = flush_req_mask;
|
||||
end
|
||||
end
|
||||
STATE_DONE: begin
|
||||
lock_released_n = lock_released & ~core_bus_out_ready;
|
||||
if (lock_released_n == 0) begin
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
flush_done <= '0;
|
||||
lock_released <= '0;
|
||||
end else begin
|
||||
state <= state_n;
|
||||
flush_done <= flush_done_n;
|
||||
lock_released <= lock_released_n;
|
||||
end
|
||||
end
|
||||
|
||||
assign flush_valid = {NUM_BANKS{state == STATE_FLUSH}};
|
||||
|
||||
endmodule
|
1
hw/rtl/cache/VX_cache_init.sv
vendored
1
hw/rtl/cache/VX_cache_init.sv
vendored
|
@ -13,6 +13,7 @@
|
|||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
// cache flush unit
|
||||
module VX_cache_init #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
|
|
27
hw/rtl/cache/VX_cache_mshr.sv
vendored
27
hw/rtl/cache/VX_cache_mshr.sv
vendored
|
@ -104,7 +104,8 @@ module VX_cache_mshr #(
|
|||
// lookup
|
||||
input wire lookup_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr,
|
||||
output wire [MSHR_SIZE-1:0] lookup_matches,
|
||||
output wire [MSHR_SIZE-1:0] lookup_pending,
|
||||
output wire [MSHR_SIZE-1:0] lookup_rw,
|
||||
|
||||
// finalize
|
||||
input wire finalize_valid,
|
||||
|
@ -216,13 +217,13 @@ module VX_cache_mshr #(
|
|||
next_table <= next_table_n;
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s-bank%0d inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID,
|
||||
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid))
|
||||
|
||||
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s-bank%0d invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID,
|
||||
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
|
||||
|
||||
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s-bank%0d invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, BANK_ID,
|
||||
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
|
||||
|
||||
VX_dp_ram #(
|
||||
|
@ -251,7 +252,9 @@ module VX_cache_mshr #(
|
|||
assign dequeue_rw = write_table[dequeue_id_r];
|
||||
assign dequeue_id = dequeue_id_r;
|
||||
|
||||
assign lookup_matches = addr_matches & ~write_table;
|
||||
// return pending entries for the given cache line
|
||||
assign lookup_pending = addr_matches;
|
||||
assign lookup_rw = write_table;
|
||||
|
||||
`UNUSED_VAR (lookup_valid)
|
||||
|
||||
|
@ -264,22 +267,22 @@ module VX_cache_mshr #(
|
|||
show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire;
|
||||
end
|
||||
if (allocate_fire)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`TRACE(3, ("%d: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid));
|
||||
if (lookup_valid)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_matches, lkp_req_uuid));
|
||||
`TRACE(3, ("%d: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid));
|
||||
if (finalize_valid)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`TRACE(3, ("%d: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid));
|
||||
if (fill_valid)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id));
|
||||
if (dequeue_fire)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`TRACE(3, ("%d: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid));
|
||||
if (show_table) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-table", $time, INSTANCE_ID, BANK_ID));
|
||||
`TRACE(3, ("%d: %s table", $time, INSTANCE_ID));
|
||||
for (integer i = 0; i < MSHR_SIZE; ++i) begin
|
||||
if (valid_table[i]) begin
|
||||
`TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID)));
|
||||
|
|
90
hw/rtl/cache/VX_cache_tags.sv
vendored
90
hw/rtl/cache/VX_cache_tags.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,15 +17,15 @@ module VX_cache_tags #(
|
|||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
parameter WORD_SIZE = 1,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0
|
||||
) (
|
||||
|
@ -38,45 +38,63 @@ module VX_cache_tags #(
|
|||
|
||||
input wire stall,
|
||||
|
||||
// read/fill
|
||||
// init/fill/lookup
|
||||
input wire init,
|
||||
input wire fill,
|
||||
input wire lookup,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire fill,
|
||||
input wire init,
|
||||
output wire [NUM_WAYS-1:0] way_sel,
|
||||
output wire [NUM_WAYS-1:0] tag_matches
|
||||
output wire [NUM_WAYS-1:0] tag_matches,
|
||||
|
||||
// replacement
|
||||
output wire [NUM_WAYS-1:0] repl_way,
|
||||
output wire [`CS_TAG_SEL_BITS-1:0] repl_tag
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (lookup)
|
||||
|
||||
// valid, tag
|
||||
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_TAG_ADDR(line_addr);
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||
wire [NUM_WAYS-1:0] read_valid;
|
||||
|
||||
if (NUM_WAYS > 1) begin
|
||||
reg [NUM_WAYS-1:0] repl_way;
|
||||
reg [NUM_WAYS-1:0] repl_way_r;
|
||||
// cyclic assignment of replacement way
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
repl_way <= 1;
|
||||
repl_way_r <= 1;
|
||||
end else if (~stall) begin // hold the value on stalls prevent filling different slots twice
|
||||
repl_way <= {repl_way[NUM_WAYS-2:0], repl_way[NUM_WAYS-1]};
|
||||
repl_way_r <= {repl_way_r[NUM_WAYS-2:0], repl_way_r[NUM_WAYS-1]};
|
||||
end
|
||||
end
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
assign way_sel[i] = fill && repl_way[i];
|
||||
end
|
||||
|
||||
assign repl_way = repl_way_r;
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (`CS_TAG_SEL_BITS),
|
||||
.N (NUM_WAYS)
|
||||
) repl_tag_sel (
|
||||
.data_in (read_tag),
|
||||
.sel_in (repl_way_r),
|
||||
.data_out (repl_tag)
|
||||
);
|
||||
end else begin
|
||||
`UNUSED_VAR (stall)
|
||||
assign way_sel = fill;
|
||||
assign repl_way = 1'b1;
|
||||
assign repl_tag = read_tag;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
wire [`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||
wire read_valid;
|
||||
|
||||
wire do_fill = fill && repl_way[i];
|
||||
wire do_write = init || do_fill;
|
||||
wire line_valid = ~init;
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (TAG_WIDTH),
|
||||
|
@ -85,32 +103,34 @@ module VX_cache_tags #(
|
|||
) tag_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (way_sel[i] || init),
|
||||
`UNUSED_PIN (wren),
|
||||
.write (do_write),
|
||||
`UNUSED_PIN (wren),
|
||||
.addr (line_sel),
|
||||
.wdata ({~init, line_tag}),
|
||||
.rdata ({read_valid, read_tag})
|
||||
.wdata ({line_valid, line_tag}),
|
||||
.rdata ({read_valid[i], read_tag[i]})
|
||||
);
|
||||
|
||||
assign tag_matches[i] = read_valid && (line_tag == read_tag);
|
||||
end
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag));
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), repl_way, line_sel, line_tag));
|
||||
end
|
||||
if (init) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
|
||||
`TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
|
||||
end
|
||||
if (lookup && ~stall) begin
|
||||
if (tag_matches != 0) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag, req_uuid));
|
||||
`TRACE(3, ("%d: %s hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
end else begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
`TRACE(3, ("%d: %s miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
54
hw/rtl/cache/VX_cache_wrap.sv
vendored
54
hw/rtl/cache/VX_cache_wrap.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -23,20 +23,20 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 4096,
|
||||
parameter CACHE_SIZE = 4096,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
|
@ -45,6 +45,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -63,7 +66,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0
|
||||
) (
|
||||
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -80,7 +83,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
|
||||
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
|
@ -98,7 +101,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
) mem_bus_cache_if();
|
||||
|
||||
if (NC_OR_BYPASS) begin
|
||||
|
||||
|
||||
`RESET_RELAY (nc_bypass_reset, reset);
|
||||
|
||||
VX_cache_bypass #(
|
||||
|
@ -108,13 +111,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.PASSTHRU (PASSTHRU),
|
||||
.NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE),
|
||||
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
|
||||
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
|
||||
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
|
||||
.CORE_TAG_WIDTH (TAG_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
|
||||
.MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH),
|
||||
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH),
|
||||
|
||||
|
@ -132,15 +135,15 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.mem_bus_in_if (mem_bus_cache_if),
|
||||
.mem_bus_out_if (mem_bus_if)
|
||||
);
|
||||
|
||||
|
||||
end else begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
|
||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
|
||||
end
|
||||
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if);
|
||||
end
|
||||
end
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
|
||||
|
@ -152,7 +155,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
assign core_bus_cache_if[i].rsp_valid = 0;
|
||||
assign core_bus_cache_if[i].rsp_data = '0;
|
||||
`UNUSED_VAR (core_bus_cache_if[i].rsp_ready)
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_bus_cache_if.req_valid = 0;
|
||||
assign mem_bus_cache_if.req_data = '0;
|
||||
|
@ -183,6 +186,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
|
||||
|
@ -195,8 +199,8 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
.core_bus_if (core_bus_cache_if),
|
||||
.mem_bus_if (mem_bus_cache_if)
|
||||
);
|
||||
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
|
@ -225,9 +229,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
end
|
||||
if (core_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid;
|
||||
wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid;
|
||||
|
@ -246,17 +250,17 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_bus_if.req_data.rw)
|
||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_int #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter BLOCK_IDX = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
|
@ -29,7 +29,7 @@ module VX_alu_int #(
|
|||
VX_branch_ctl_if.master branch_ctl_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam LANE_BITS = `CLOG2(NUM_LANES);
|
||||
localparam LANE_WIDTH = `UP(LANE_BITS);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
|
@ -121,7 +121,7 @@ module VX_alu_int #(
|
|||
case ({is_alu_w, op_class})
|
||||
3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
|
||||
3'b001: alu_result[i] = sub_slt_br_result; // SUB, SLTU, SLTI, BR*
|
||||
3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO*
|
||||
3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO*
|
||||
3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI
|
||||
3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW
|
||||
3'b101: alu_result[i] = sub_result_w[i]; // SUBW
|
||||
|
@ -181,7 +181,7 @@ module VX_alu_int #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({br_enable, br_wid, br_taken, br_dest}),
|
||||
.data_in ({br_enable, br_wid, br_taken, br_dest}),
|
||||
.data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
|
@ -193,9 +193,9 @@ module VX_alu_int #(
|
|||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (branch_ctl_if.valid) begin
|
||||
`TRACE(1, ("%d: core%0d-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, CORE_ID, branch_ctl_if.wid, {commit_if.data.PC, 1'b0}, branch_ctl_if.taken, {branch_ctl_if.dest, 1'b0}, commit_if.data.uuid));
|
||||
if (br_enable) begin
|
||||
`TRACE(1, ("%d: %s-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_muldiv #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -26,7 +26,7 @@ module VX_alu_muldiv #(
|
|||
// Outputs
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam TAG_WIDTH = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + PID_WIDTH + 1 + 1;
|
||||
|
@ -69,7 +69,7 @@ module VX_alu_muldiv #(
|
|||
wire mul_fire_in = mul_valid_in && mul_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] mul_resultl, mul_resulth;
|
||||
reg [`XLEN-1:0] mul_resultl, mul_resulth;
|
||||
wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i];
|
||||
wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i];
|
||||
always @(*) begin
|
||||
|
@ -235,7 +235,7 @@ module VX_alu_muldiv #(
|
|||
wire div_fire_in = div_valid_in && div_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] div_quotient, div_remainder;
|
||||
reg [`XLEN-1:0] div_quotient, div_remainder;
|
||||
always @(*) begin
|
||||
dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder);
|
||||
end
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -27,7 +27,7 @@ module VX_alu_unit #(
|
|||
VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS]
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = `NUM_ALU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_ALU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
|
@ -75,7 +75,7 @@ module VX_alu_unit #(
|
|||
`RESET_RELAY (int_reset, block_reset);
|
||||
|
||||
VX_alu_int #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
|
||||
.BLOCK_IDX (block_idx),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) alu_int (
|
||||
|
@ -90,59 +90,61 @@ module VX_alu_unit #(
|
|||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_execute_if();
|
||||
) muldiv_execute_if();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_commit_if();
|
||||
) muldiv_commit_if();
|
||||
|
||||
assign mdv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
|
||||
assign mdv_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
|
||||
assign muldiv_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
|
||||
`RESET_RELAY (mdv_reset, block_reset);
|
||||
`RESET_RELAY (muldiv_reset, block_reset);
|
||||
|
||||
VX_alu_muldiv #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_unit (
|
||||
) muldiv_unit (
|
||||
.clk (clk),
|
||||
.reset (mdv_reset),
|
||||
.execute_if (mdv_execute_if),
|
||||
.commit_if (mdv_commit_if)
|
||||
.reset (muldiv_reset),
|
||||
.execute_if (muldiv_execute_if),
|
||||
.commit_if (muldiv_commit_if)
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
assign per_block_execute_if[block_idx].ready =
|
||||
`ifdef EXT_M_ENABLE
|
||||
is_muldiv_op ? mdv_execute_if.ready :
|
||||
is_muldiv_op ? muldiv_execute_if.ready :
|
||||
`endif
|
||||
int_execute_if.ready;
|
||||
|
||||
// send response
|
||||
|
||||
`RESET_RELAY (arb_reset, block_reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.reset (arb_reset),
|
||||
.valid_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.valid,
|
||||
muldiv_commit_if.valid,
|
||||
`endif
|
||||
int_commit_if.valid
|
||||
}),
|
||||
.ready_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.ready,
|
||||
muldiv_commit_if.ready,
|
||||
`endif
|
||||
int_commit_if.ready
|
||||
}),
|
||||
.data_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.data,
|
||||
muldiv_commit_if.data,
|
||||
`endif
|
||||
int_commit_if.data
|
||||
}),
|
||||
|
|
|
@ -13,8 +13,8 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_commit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -27,7 +27,7 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
VX_commit_csr_if.master commit_csr_if,
|
||||
VX_commit_sched_if.master commit_sched_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1;
|
||||
localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1);
|
||||
localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1;
|
||||
|
@ -36,12 +36,10 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_commit_if commit_arb_if[`ISSUE_WIDTH]();
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] commit_fire;
|
||||
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] commit_wid;
|
||||
wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] commit_tmask;
|
||||
wire [`ISSUE_WIDTH-1:0] commit_eop;
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
wire [`ISSUE_WIDTH-1:0] per_issue_commit_fire;
|
||||
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] per_issue_commit_wid;
|
||||
wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] per_issue_commit_tmask;
|
||||
wire [`ISSUE_WIDTH-1:0] per_issue_commit_eop;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
|
||||
|
@ -55,6 +53,8 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
assign commit_if[j * `ISSUE_WIDTH + i].ready = ready_in[j];
|
||||
end
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (`NUM_EX_UNITS),
|
||||
.DATAW (DATAW),
|
||||
|
@ -72,10 +72,10 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
assign commit_fire[i] = commit_arb_if[i].valid && commit_arb_if[i].ready;
|
||||
assign commit_tmask[i]= {`NUM_THREADS{commit_fire[i]}} & commit_arb_if[i].data.tmask;
|
||||
assign commit_wid[i] = commit_arb_if[i].data.wid;
|
||||
assign commit_eop[i] = commit_arb_if[i].data.eop;
|
||||
assign per_issue_commit_fire[i] = commit_arb_if[i].valid && commit_arb_if[i].ready;
|
||||
assign per_issue_commit_tmask[i]= {`NUM_THREADS{per_issue_commit_fire[i]}} & commit_arb_if[i].data.tmask;
|
||||
assign per_issue_commit_wid[i] = commit_arb_if[i].data.wid;
|
||||
assign per_issue_commit_eop[i] = commit_arb_if[i].data.eop;
|
||||
end
|
||||
|
||||
// CSRs update
|
||||
|
@ -84,11 +84,11 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
wire [COMMIT_ALL_SIZEW-1:0] commit_size_all_r, commit_size_all_rr;
|
||||
wire commit_fire_any, commit_fire_any_r, commit_fire_any_rr;
|
||||
|
||||
assign commit_fire_any = (| commit_fire);
|
||||
assign commit_fire_any = (| per_issue_commit_fire);
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
wire [COMMIT_SIZEW-1:0] count;
|
||||
`POP_COUNT(count, commit_tmask[i]);
|
||||
`POP_COUNT(count, per_issue_commit_tmask[i]);
|
||||
assign commit_size[i] = count;
|
||||
end
|
||||
|
||||
|
@ -136,19 +136,28 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
assign commit_csr_if.instret = instret;
|
||||
|
||||
// Committed instructions
|
||||
// Track committed instructions
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] committed = commit_fire & commit_eop;
|
||||
reg [`NUM_WARPS-1:0] committed_warps;
|
||||
|
||||
always @(*) begin
|
||||
committed_warps = 0;
|
||||
for (integer i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
if (per_issue_commit_fire[i] && per_issue_commit_eop[i]) begin
|
||||
committed_warps[per_issue_commit_wid[i]] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (`ISSUE_WIDTH * (1 + `NW_WIDTH)),
|
||||
.RESETW (`ISSUE_WIDTH)
|
||||
.DATAW (`NUM_WARPS),
|
||||
.RESETW (`NUM_WARPS)
|
||||
) committed_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({committed, commit_wid}),
|
||||
.data_out ({commit_sched_if.committed, commit_sched_if.committed_wid})
|
||||
.data_in (committed_warps),
|
||||
.data_out ({commit_sched_if.committed_warps})
|
||||
);
|
||||
|
||||
// Writeback
|
||||
|
@ -171,7 +180,7 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
|
||||
always @(posedge clk) begin
|
||||
if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}));
|
||||
`TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}));
|
||||
trace_ex_type(1, j);
|
||||
`TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS);
|
||||
|
|
|
@ -18,7 +18,8 @@
|
|||
`endif
|
||||
|
||||
module VX_core import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -94,13 +95,14 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
`SCOPE_IO_SWITCH (3)
|
||||
|
||||
VX_schedule #(
|
||||
.INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID)
|
||||
) schedule (
|
||||
.clk (clk),
|
||||
.reset (schedule_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_schedule_if (pipeline_perf_if.schedule),
|
||||
.sched_perf (pipeline_perf_if.sched),
|
||||
`endif
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
|
@ -121,7 +123,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_fetch #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-fetch", INSTANCE_ID))
|
||||
) fetch (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
|
@ -132,7 +134,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_decode #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID))
|
||||
) decode (
|
||||
.clk (clk),
|
||||
.reset (decode_reset),
|
||||
|
@ -142,7 +144,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_issue #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-issue", INSTANCE_ID))
|
||||
) issue (
|
||||
`SCOPE_IO_BIND (1)
|
||||
|
||||
|
@ -150,7 +152,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
.reset (issue_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_issue_if (pipeline_perf_if.issue),
|
||||
.issue_perf (pipeline_perf_if.issue),
|
||||
`endif
|
||||
|
||||
.decode_if (decode_if),
|
||||
|
@ -159,6 +161,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_execute #(
|
||||
.INSTANCE_ID ($sformatf("%s-execute", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID)
|
||||
) execute (
|
||||
`SCOPE_IO_BIND (2)
|
||||
|
@ -186,7 +189,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_commit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID))
|
||||
) commit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
|
@ -210,7 +213,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (lmem_unit_reset, reset);
|
||||
|
||||
VX_lmem_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID (INSTANCE_ID)
|
||||
) lmem_unit (
|
||||
.clk (clk),
|
||||
.reset (lmem_unit_reset),
|
||||
|
@ -229,20 +232,20 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
|
||||
`endif
|
||||
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (DCACHE_CHANNELS),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_coalesced_if[`NUM_LSU_BLOCKS]();
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin
|
||||
VX_lsu_mem_if #(
|
||||
.NUM_LANES (DCACHE_CHANNELS),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_coalesced_if();
|
||||
|
||||
`RESET_RELAY (coalescer_reset, reset);
|
||||
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
`RESET_RELAY (mem_coalescer_reset, reset);
|
||||
|
||||
VX_mem_coalescer #(
|
||||
.INSTANCE_ID ($sformatf("core%0d-coalescer", CORE_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)),
|
||||
.NUM_REQS (`NUM_LSU_LANES),
|
||||
.DATA_IN_SIZE (LSU_WORD_SIZE),
|
||||
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),
|
||||
|
@ -251,9 +254,9 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.QUEUE_SIZE (`LSUQ_OUT_SIZE)
|
||||
) coalescer (
|
||||
) mem_coalescer (
|
||||
.clk (clk),
|
||||
.reset (coalescer_reset),
|
||||
.reset (mem_coalescer_reset),
|
||||
|
||||
// Input request
|
||||
.in_req_valid (lsu_dcache_if[i].req_valid),
|
||||
|
@ -274,42 +277,37 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
.in_rsp_ready (lsu_dcache_if[i].rsp_ready),
|
||||
|
||||
// Output request
|
||||
.out_req_valid (dcache_coalesced_if[i].req_valid),
|
||||
.out_req_mask (dcache_coalesced_if[i].req_data.mask),
|
||||
.out_req_rw (dcache_coalesced_if[i].req_data.rw),
|
||||
.out_req_byteen (dcache_coalesced_if[i].req_data.byteen),
|
||||
.out_req_addr (dcache_coalesced_if[i].req_data.addr),
|
||||
.out_req_atype (dcache_coalesced_if[i].req_data.atype),
|
||||
.out_req_data (dcache_coalesced_if[i].req_data.data),
|
||||
.out_req_tag (dcache_coalesced_if[i].req_data.tag),
|
||||
.out_req_ready (dcache_coalesced_if[i].req_ready),
|
||||
.out_req_valid (dcache_coalesced_if.req_valid),
|
||||
.out_req_mask (dcache_coalesced_if.req_data.mask),
|
||||
.out_req_rw (dcache_coalesced_if.req_data.rw),
|
||||
.out_req_byteen (dcache_coalesced_if.req_data.byteen),
|
||||
.out_req_addr (dcache_coalesced_if.req_data.addr),
|
||||
.out_req_atype (dcache_coalesced_if.req_data.atype),
|
||||
.out_req_data (dcache_coalesced_if.req_data.data),
|
||||
.out_req_tag (dcache_coalesced_if.req_data.tag),
|
||||
.out_req_ready (dcache_coalesced_if.req_ready),
|
||||
|
||||
// Output response
|
||||
.out_rsp_valid (dcache_coalesced_if[i].rsp_valid),
|
||||
.out_rsp_mask (dcache_coalesced_if[i].rsp_data.mask),
|
||||
.out_rsp_data (dcache_coalesced_if[i].rsp_data.data),
|
||||
.out_rsp_tag (dcache_coalesced_if[i].rsp_data.tag),
|
||||
.out_rsp_ready (dcache_coalesced_if[i].rsp_ready)
|
||||
.out_rsp_valid (dcache_coalesced_if.rsp_valid),
|
||||
.out_rsp_mask (dcache_coalesced_if.rsp_data.mask),
|
||||
.out_rsp_data (dcache_coalesced_if.rsp_data.data),
|
||||
.out_rsp_tag (dcache_coalesced_if.rsp_data.tag),
|
||||
.out_rsp_ready (dcache_coalesced_if.rsp_ready)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]);
|
||||
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
`ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if[i], lsu_dcache_if[i]);
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
`RESET_RELAY (lsu_adapter_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_bus_tmp_if[DCACHE_CHANNELS]();
|
||||
|
||||
`RESET_RELAY (lsu_adapter_reset, reset);
|
||||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (DCACHE_CHANNELS),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
|
@ -320,15 +318,17 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
) lsu_adapter (
|
||||
.clk (clk),
|
||||
.reset (lsu_adapter_reset),
|
||||
.lsu_mem_if (dcache_coalesced_if[i]),
|
||||
.lsu_mem_if (dcache_coalesced_if),
|
||||
.mem_bus_if (dcache_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]);
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle;
|
||||
|
|
|
@ -144,6 +144,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
VX_core #(
|
||||
.INSTANCE_ID ($sformatf("core")),
|
||||
.CORE_ID (CORE_ID)
|
||||
) core (
|
||||
`SCOPE_IO_BIND (0)
|
||||
|
|
|
@ -26,13 +26,13 @@
|
|||
addr+12'h80 : dst = 32'(src[$bits(src)-1:32])
|
||||
`endif
|
||||
|
||||
|
||||
module VX_csr_data
|
||||
import VX_gpu_pkg::*;
|
||||
`ifdef EXT_F_ENABLE
|
||||
import VX_fpu_pkg::*;
|
||||
`endif
|
||||
#(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -147,7 +147,7 @@ import VX_fpu_pkg::*;
|
|||
mscratch <= write_data;
|
||||
end
|
||||
default: begin
|
||||
`ASSERT(0, ("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid));
|
||||
`ASSERT(0, ("%t: *** %s invalid CSR write address: %0h (#%0d)", $time, INSTANCE_ID, write_addr, write_uuid));
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
@ -212,21 +212,21 @@ import VX_fpu_pkg::*;
|
|||
`VX_DCR_MPM_CLASS_CORE: begin
|
||||
case (read_addr)
|
||||
// PERF: pipeline
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched_idles);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.ibf_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.scb_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_ALU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched.idles);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched.stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.issue.ibf_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.issue.scb_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_r, pipeline_perf_if.issue.opd_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_ALU]);
|
||||
`ifdef EXT_F_ENABLE
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_FPU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_FPU]);
|
||||
`else
|
||||
`VX_CSR_MPM_SCRB_FPU : read_data_ro_r = '0;
|
||||
`VX_CSR_MPM_SCRB_FPU_H : read_data_ro_r = '0;
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, `PERF_CTR_BITS'(0));
|
||||
`endif
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_LSU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_SFU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.sfu_uses[`SFU_CSRS]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.sfu_uses[`SFU_WCTL]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_LSU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_SFU]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]);
|
||||
// PERF: memory
|
||||
`CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_r, pipeline_perf_if.ifetches);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_r, pipeline_perf_if.loads);
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_unit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
|
@ -36,7 +37,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
VX_execute_if.slave execute_if,
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
|
@ -72,7 +73,8 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
wire csr_write_enable = (execute_if.data.op_type == `INST_SFU_CSRRW);
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.CORE_ID (CORE_ID)
|
||||
) csr_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -12,9 +12,8 @@
|
|||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_dcr_data import VX_gpu_pkg::*; (
|
||||
module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define USED_IREG(x) \
|
||||
|
@ -28,8 +27,8 @@
|
|||
use_``x = 1
|
||||
`endif
|
||||
|
||||
module VX_decode import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -44,7 +43,7 @@ module VX_decode import VX_gpu_pkg::*; #(
|
|||
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + (`NR_BITS * 4);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
|
@ -145,6 +144,12 @@ module VX_decode import VX_gpu_pkg::*; #(
|
|||
end
|
||||
`endif
|
||||
|
||||
`STATIC_ASSERT($bits(alu_args_t) == $bits(op_args_t), ("alu_args_t size mismatch: current=%0d, expected=%0d", $bits(alu_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(fpu_args_t) == $bits(op_args_t), ("fpu_args_t size mismatch: current=%0d, expected=%0d", $bits(fpu_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(lsu_args_t) == $bits(op_args_t), ("lsu_args_t size mismatch: current=%0d, expected=%0d", $bits(lsu_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(csr_args_t) == $bits(op_args_t), ("csr_args_t size mismatch: current=%0d, expected=%0d", $bits(csr_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(wctl_args_t) == $bits(op_args_t), ("wctl_args_t size mismatch: current=%0d, expected=%0d", $bits(wctl_args_t), $bits(op_args_t)));
|
||||
|
||||
always @(*) begin
|
||||
|
||||
ex_type = '0;
|
||||
|
@ -552,7 +557,7 @@ module VX_decode import VX_gpu_pkg::*; #(
|
|||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
`TRACE(1, ("%d: core%0d-decode: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, CORE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr));
|
||||
`TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr));
|
||||
trace_ex_type(1, decode_if.data.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);
|
||||
|
|
|
@ -12,10 +12,9 @@
|
|||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -24,12 +23,12 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
output wire [`PERF_CTR_BITS-1:0] perf_stalls [`NUM_EX_UNITS],
|
||||
`endif
|
||||
// inputs
|
||||
VX_operands_if.slave operands_if [`ISSUE_WIDTH],
|
||||
VX_operands_if.slave operands_if,
|
||||
|
||||
// outputs
|
||||
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH]
|
||||
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH;
|
||||
|
||||
|
@ -38,104 +37,71 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
assign tids[i] = `NT_WIDTH'(i);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
wire [`NT_WIDTH-1:0] last_active_tid;
|
||||
|
||||
wire [`NT_WIDTH-1:0] last_active_tid;
|
||||
VX_find_first #(
|
||||
.N (`NUM_THREADS),
|
||||
.DATAW (`NT_WIDTH),
|
||||
.REVERSE (1)
|
||||
) last_tid_select (
|
||||
.valid_in (operands_if.data.tmask),
|
||||
.data_in (tids),
|
||||
.data_out (last_active_tid),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
VX_find_first #(
|
||||
.N (`NUM_THREADS),
|
||||
.DATAW (`NT_WIDTH),
|
||||
.REVERSE (1)
|
||||
) last_tid_select (
|
||||
.valid_in (operands_if[i].data.tmask),
|
||||
.data_in (tids),
|
||||
.data_out (last_active_tid),
|
||||
`UNUSED_PIN (valid_out)
|
||||
wire [`NUM_EX_UNITS-1:0] operands_reset;
|
||||
assign operands_if.ready = operands_reset[operands_if.data.ex_type];
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
||||
|
||||
`RESET_RELAY (buffer_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2), // 2-cycle EB for area reduction
|
||||
.LUTRAM (1)
|
||||
) buffer (
|
||||
.clk (clk),
|
||||
.reset (buffer_reset),
|
||||
.valid_in (operands_if.valid && (operands_if.data.ex_type == `EX_BITS'(i))),
|
||||
.ready_in (operands_reset[i]),
|
||||
.data_in ({
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.wis,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.PC,
|
||||
operands_if.data.op_type,
|
||||
operands_if.data.op_args,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.rd,
|
||||
last_active_tid,
|
||||
operands_if.data.rs1_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs3_data
|
||||
}),
|
||||
.data_out (dispatch_if[i].data),
|
||||
.valid_out (dispatch_if[i].valid),
|
||||
.ready_out (dispatch_if[i].ready)
|
||||
);
|
||||
|
||||
wire [`NUM_EX_UNITS-1:0] operands_reset;
|
||||
|
||||
`RESET_RELAY (buf_reset, reset);
|
||||
|
||||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) buffer (
|
||||
.clk (clk),
|
||||
.reset (buf_reset),
|
||||
.valid_in (operands_if[i].valid && (operands_if[i].data.ex_type == j)),
|
||||
.ready_in (operands_reset[j]),
|
||||
.data_in (`TO_DISPATCH_DATA(operands_if[i].data, last_active_tid)),
|
||||
.data_out (dispatch_if[j * `ISSUE_WIDTH + i].data),
|
||||
.valid_out (dispatch_if[j * `ISSUE_WIDTH + i].valid),
|
||||
.ready_out (dispatch_if[j * `ISSUE_WIDTH + i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
assign operands_if[i].ready = operands_reset[operands_if[i].data.ex_type];
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [`NUM_EX_UNITS-1:0] perf_unit_stalls_per_cycle, perf_unit_stalls_per_cycle_r;
|
||||
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_stalls_per_cycle;
|
||||
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_r;
|
||||
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(*) begin
|
||||
perf_issue_unit_stalls_per_cycle[i] = '0;
|
||||
if (operands_if[i].valid && ~operands_if[i].ready) begin
|
||||
perf_issue_unit_stalls_per_cycle[i][operands_if[i].data.ex_type] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (`NUM_EX_UNITS),
|
||||
.N (`ISSUE_WIDTH),
|
||||
.OP ("|")
|
||||
) reduce (
|
||||
.data_in (perf_issue_unit_stalls_per_cycle),
|
||||
.data_out (perf_unit_stalls_per_cycle)
|
||||
);
|
||||
|
||||
`BUFFER(perf_unit_stalls_per_cycle_r, perf_unit_stalls_per_cycle);
|
||||
wire operands_if_stall = operands_if.valid && ~operands_if.ready;
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_stalls_r[i] <= '0;
|
||||
end else begin
|
||||
perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(perf_unit_stalls_per_cycle_r[i]);
|
||||
perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(operands_if_stall && operands_if.data.ex_type == `EX_BITS'(i));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i=0; i < `NUM_EX_UNITS; ++i) begin
|
||||
assign perf_stalls[i] = perf_stalls_r[i];
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (operands_if[i].valid && operands_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, wis_to_wid(operands_if[i].data.wis, i), {operands_if[i].data.PC, 1'b0}));
|
||||
trace_ex_type(1, operands_if[i].data.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
trace_ex_op(1, operands_if[i].data.ex_type, operands_if[i].data.op_type, operands_if[i].data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if[i].data.tmask, operands_if[i].data.wb, operands_if[i].data.rd));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs1_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs2_data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs2_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs3_data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs3_data, `NUM_THREADS);
|
||||
trace_op_args(1, operands_if[i].data.ex_type, operands_if[i].data.op_type, operands_if[i].data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if[i].data.uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_execute import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
@ -55,7 +56,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (sfu_reset, reset);
|
||||
|
||||
VX_alu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID))
|
||||
) alu_unit (
|
||||
.clk (clk),
|
||||
.reset (alu_reset),
|
||||
|
@ -67,7 +68,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
`SCOPE_IO_SWITCH (1)
|
||||
|
||||
VX_lsu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID))
|
||||
) lsu_unit (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
|
@ -81,7 +82,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (fpu_reset, reset);
|
||||
|
||||
VX_fpu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID))
|
||||
) fpu_unit (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
|
@ -92,6 +93,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
VX_sfu_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-sfu", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID)
|
||||
) sfu_unit (
|
||||
.clk (clk),
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fetch import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -30,7 +30,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
// outputs
|
||||
VX_fetch_if.master fetch_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire icache_req_valid;
|
||||
|
@ -78,9 +78,11 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
.reset (reset),
|
||||
.incr (icache_req_fire && schedule_if.data.wid == i),
|
||||
.decr (fetch_if.ibuf_pop[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (pending_ibuf_full[i]),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
end
|
||||
wire ibuf_ready = ~pending_ibuf_full[schedule_if.data.wid];
|
||||
|
@ -89,7 +91,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
|
||||
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, {schedule_if.data.PC, 1'b0}, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
("%t: *** %s invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, INSTANCE_ID, {schedule_if.data.PC, 1'b0}, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
|
||||
// Icache Request
|
||||
|
||||
|
@ -129,45 +131,33 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
assign icache_bus_if.rsp_ready = fetch_if.ready;
|
||||
|
||||
`ifdef DBG_SCOPE_FETCH
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (1),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
|
||||
ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
|
||||
(ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
schedule_fire,
|
||||
icache_req_fire,
|
||||
icache_rsp_fire
|
||||
}),
|
||||
.probes({
|
||||
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
|
||||
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
|
||||
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
|
||||
}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_fetch ila_fetch_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({reset, schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, schedule_if.ready, schedule_if.valid}),
|
||||
.probe1 ({icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.req_ready, icache_bus_if.req_valid}),
|
||||
.probe2 ({icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (1),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
|
||||
ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
|
||||
(ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH)
|
||||
) scope_tap (
|
||||
.clk (clk),
|
||||
.reset (scope_reset),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers ({
|
||||
reset,
|
||||
schedule_fire,
|
||||
icache_req_fire,
|
||||
icache_rsp_fire
|
||||
}),
|
||||
.probes ({
|
||||
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
|
||||
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
|
||||
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
|
||||
}),
|
||||
.bus_in (scope_bus_in),
|
||||
.bus_out (scope_bus_out)
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
@ -177,10 +167,10 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
wire fetch_fire = fetch_if.valid && fetch_if.ready;
|
||||
always @(posedge clk) begin
|
||||
if (schedule_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid));
|
||||
`TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid));
|
||||
end
|
||||
if (fetch_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
|
||||
`TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_unit import VX_fpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -26,7 +26,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_fpu_csr_if.master fpu_csr_if[`NUM_FPU_BLOCKS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = `NUM_FPU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_FPU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
|
@ -84,12 +84,14 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
wire execute_fire = per_block_execute_if[block_idx].valid && per_block_execute_if[block_idx].ready;
|
||||
wire fpu_rsp_fire = fpu_rsp_valid && fpu_rsp_ready;
|
||||
|
||||
`RESET_RELAY (ibuf_reset, block_reset);
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + PID_WIDTH + 1 + 1),
|
||||
.SIZE (`FPUQ_SIZE)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.reset (ibuf_reset),
|
||||
.acquire_en (execute_fire),
|
||||
.write_addr (fpu_req_tag),
|
||||
.write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}),
|
||||
|
@ -226,12 +228,14 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
// send response
|
||||
|
||||
`RESET_RELAY (rsp_reset, block_reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
||||
.SIZE (0)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.reset (rsp_reset),
|
||||
.valid_in (fpu_rsp_valid),
|
||||
.ready_in (fpu_rsp_ready),
|
||||
.data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),
|
||||
|
|
|
@ -14,33 +14,36 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_ibuffer import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire [`PERF_CTR_BITS-1:0] perf_stalls,
|
||||
`endif
|
||||
|
||||
// inputs
|
||||
VX_decode_if.slave decode_if,
|
||||
|
||||
// outputs
|
||||
VX_ibuffer_if.master ibuffer_if [`NUM_WARPS]
|
||||
VX_ibuffer_if.master ibuffer_if [PER_ISSUE_WARPS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4);
|
||||
|
||||
wire [`NUM_WARPS-1:0] ibuf_ready_in;
|
||||
|
||||
wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in;
|
||||
assign decode_if.ready = ibuf_ready_in[decode_if.data.wid];
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`IBUF_SIZE),
|
||||
.OUT_REG (2) // use a 2-cycle FIFO
|
||||
.OUT_REG (2) // 2-cycle EB for area reduction
|
||||
) instr_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (decode_if.valid && decode_if.data.wid == i),
|
||||
.valid_in (decode_if.valid && decode_if.data.wid == ISSUE_WIS_W'(w)),
|
||||
.data_in ({
|
||||
decode_if.data.uuid,
|
||||
decode_if.data.tmask,
|
||||
|
@ -52,15 +55,32 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
|
|||
decode_if.data.rd,
|
||||
decode_if.data.rs1,
|
||||
decode_if.data.rs2,
|
||||
decode_if.data.rs3}),
|
||||
.ready_in (ibuf_ready_in[i]),
|
||||
.valid_out(ibuffer_if[i].valid),
|
||||
.data_out (ibuffer_if[i].data),
|
||||
.ready_out(ibuffer_if[i].ready)
|
||||
decode_if.data.rs3
|
||||
}),
|
||||
.ready_in (ibuf_ready_in[w]),
|
||||
.valid_out(ibuffer_if[w].valid),
|
||||
.data_out (ibuffer_if[w].data),
|
||||
.ready_out(ibuffer_if[w].ready)
|
||||
);
|
||||
`ifndef L1_ENABLE
|
||||
assign decode_if.ibuf_pop[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
|
||||
assign decode_if.ibuf_pop[w] = ibuffer_if[w].valid && ibuffer_if[w].ready;
|
||||
`endif
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
|
||||
|
||||
wire decode_if_stall = decode_if.valid && ~decode_if.ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_ibf_stalls <= '0;
|
||||
end else begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(decode_if_stall);
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_stalls = perf_ibf_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -12,10 +12,9 @@
|
|||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_issue #(
|
||||
parameter CORE_ID = 0
|
||||
module VX_issue import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -23,137 +22,81 @@ module VX_issue #(
|
|||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_pipeline_perf_if.issue perf_issue_if,
|
||||
output issue_perf_t issue_perf,
|
||||
`endif
|
||||
|
||||
VX_decode_if.slave decode_if,
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH]
|
||||
);
|
||||
VX_ibuffer_if ibuffer_if [`NUM_WARPS]();
|
||||
VX_scoreboard_if scoreboard_if [`ISSUE_WIDTH]();
|
||||
VX_operands_if operands_if [`ISSUE_WIDTH]();
|
||||
|
||||
`RESET_RELAY (ibuf_reset, reset);
|
||||
`RESET_RELAY (scoreboard_reset, reset);
|
||||
`RESET_RELAY (operands_reset, reset);
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
|
||||
VX_ibuffer #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (ibuf_reset),
|
||||
.decode_if (decode_if),
|
||||
.ibuffer_if (ibuffer_if)
|
||||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (scoreboard_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_scb_stalls(perf_issue_if.scb_stalls),
|
||||
.perf_units_uses(perf_issue_if.units_uses),
|
||||
.perf_sfu_uses (perf_issue_if.sfu_uses),
|
||||
`endif
|
||||
.writeback_if (writeback_if),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.scoreboard_if (scoreboard_if)
|
||||
);
|
||||
|
||||
VX_operands #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) operands (
|
||||
.clk (clk),
|
||||
.reset (operands_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.scoreboard_if (scoreboard_if),
|
||||
.operands_if (operands_if)
|
||||
);
|
||||
|
||||
VX_dispatch #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) dispatch (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
`UNUSED_PIN (perf_stalls),
|
||||
`endif
|
||||
.operands_if (operands_if),
|
||||
.dispatch_if (dispatch_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_SCOPE_ISSUE
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
wire operands_if_fire = operands_if[0].valid && operands_if[0].ready;
|
||||
wire operands_if_not_ready = ~operands_if[0].ready;
|
||||
wire writeback_if_valid = writeback_if[0].valid;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (2),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS +
|
||||
1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) +
|
||||
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
operands_if_fire,
|
||||
operands_if_not_ready,
|
||||
writeback_if_valid
|
||||
}),
|
||||
.probes({
|
||||
operands_if[0].data.uuid,
|
||||
operands_if[0].data.tmask,
|
||||
operands_if[0].data.ex_type,
|
||||
operands_if[0].data.op_type,
|
||||
operands_if[0].data.wb,
|
||||
operands_if[0].data.rd,
|
||||
operands_if[0].data.rs1_data,
|
||||
operands_if[0].data.rs2_data,
|
||||
operands_if[0].data.rs3_data,
|
||||
writeback_if[0].data.uuid,
|
||||
writeback_if[0].data.tmask,
|
||||
writeback_if[0].data.rd,
|
||||
writeback_if[0].data.data,
|
||||
writeback_if[0].data.eop
|
||||
}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_issue ila_issue_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({operands_if.uuid, ibuffer.rs3, ibuffer.rs2, ibuffer.rs1, operands_if.PC, operands_if.tmask, operands_if.wid, operands_if.ex_type, operands_if.op_type, operands_if.ready, operands_if.valid}),
|
||||
.probe1 ({writeback_if.uuid, writeback_if.data[0], writeback_if.PC, writeback_if.tmask, writeback_if.wid, writeback_if.eop, writeback_if.valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
|
||||
|
||||
wire decode_stall = decode_if.valid && ~decode_if.ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_ibf_stalls <= '0;
|
||||
end else begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(decode_stall);
|
||||
end
|
||||
issue_perf_t per_issue_perf [`ISSUE_WIDTH];
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, ibf_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, scb_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, opd_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, units_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
end
|
||||
for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin
|
||||
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, sfu_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
|
||||
end
|
||||
|
||||
assign perf_issue_if.ibf_stalls = perf_ibf_stalls;
|
||||
`endif
|
||||
|
||||
wire [ISSUE_ISW_W-1:0] decode_isw = wid_to_isw(decode_if.data.wid);
|
||||
wire [ISSUE_WIS_W-1:0] decode_wis = wid_to_wis(decode_if.data.wid);
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] decode_ready_in;
|
||||
assign decode_if.ready = decode_ready_in[decode_isw];
|
||||
|
||||
`SCOPE_IO_SWITCH (`ISSUE_WIDTH)
|
||||
|
||||
for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : issue_slices
|
||||
VX_decode_if #(
|
||||
.NUM_WARPS (PER_ISSUE_WARPS)
|
||||
) per_issue_decode_if();
|
||||
|
||||
VX_dispatch_if per_issue_dispatch_if[`NUM_EX_UNITS]();
|
||||
|
||||
assign per_issue_decode_if.valid = decode_if.valid && (decode_isw == ISSUE_ISW_W'(issue_id));
|
||||
assign per_issue_decode_if.data.uuid = decode_if.data.uuid;
|
||||
assign per_issue_decode_if.data.wid = decode_wis;
|
||||
assign per_issue_decode_if.data.tmask = decode_if.data.tmask;
|
||||
assign per_issue_decode_if.data.PC = decode_if.data.PC;
|
||||
assign per_issue_decode_if.data.ex_type = decode_if.data.ex_type;
|
||||
assign per_issue_decode_if.data.op_type = decode_if.data.op_type;
|
||||
assign per_issue_decode_if.data.op_args = decode_if.data.op_args;
|
||||
assign per_issue_decode_if.data.wb = decode_if.data.wb;
|
||||
assign per_issue_decode_if.data.rd = decode_if.data.rd;
|
||||
assign per_issue_decode_if.data.rs1 = decode_if.data.rs1;
|
||||
assign per_issue_decode_if.data.rs2 = decode_if.data.rs2;
|
||||
assign per_issue_decode_if.data.rs3 = decode_if.data.rs3;
|
||||
assign decode_ready_in[issue_id] = per_issue_decode_if.ready;
|
||||
`ifndef L1_ENABLE
|
||||
assign decode_if.ibuf_pop[issue_id * PER_ISSUE_WARPS +: PER_ISSUE_WARPS] = per_issue_decode_if.ibuf_pop;
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
VX_issue_slice #(
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)),
|
||||
.ISSUE_ID (issue_id)
|
||||
) issue_slice (
|
||||
`SCOPE_IO_BIND(issue_id)
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.issue_perf (per_issue_perf[issue_id]),
|
||||
`endif
|
||||
.decode_if (per_issue_decode_if),
|
||||
.writeback_if (writeback_if[issue_id]),
|
||||
.dispatch_if (per_issue_dispatch_if)
|
||||
);
|
||||
|
||||
// Assign transposed dispatch_if
|
||||
for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin
|
||||
`ASSIGN_VX_IF(dispatch_if[ex_id * `ISSUE_WIDTH + issue_id], per_issue_dispatch_if[ex_id]);
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
159
hw/rtl/core/VX_issue_slice.sv
Normal file
159
hw/rtl/core/VX_issue_slice.sv
Normal file
|
@ -0,0 +1,159 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter ISSUE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output issue_perf_t issue_perf,
|
||||
`endif
|
||||
|
||||
VX_decode_if.slave decode_if,
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS]
|
||||
);
|
||||
`UNUSED_PARAM (ISSUE_ID)
|
||||
|
||||
VX_ibuffer_if ibuffer_if [PER_ISSUE_WARPS]();
|
||||
VX_scoreboard_if scoreboard_if();
|
||||
VX_operands_if operands_if();
|
||||
|
||||
`RESET_RELAY (ibuf_reset, reset);
|
||||
`RESET_RELAY (scoreboard_reset, reset);
|
||||
`RESET_RELAY (operands_reset, reset);
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
|
||||
VX_ibuffer #(
|
||||
.INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID))
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (ibuf_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (issue_perf.ibf_stalls),
|
||||
`endif
|
||||
.decode_if (decode_if),
|
||||
.ibuffer_if (ibuffer_if)
|
||||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID))
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (scoreboard_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (issue_perf.scb_stalls),
|
||||
.perf_units_uses(issue_perf.units_uses),
|
||||
.perf_sfu_uses (issue_perf.sfu_uses),
|
||||
`endif
|
||||
.writeback_if (writeback_if),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.scoreboard_if (scoreboard_if)
|
||||
);
|
||||
|
||||
VX_operands #(
|
||||
.INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID))
|
||||
) operands (
|
||||
.clk (clk),
|
||||
.reset (operands_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (issue_perf.opd_stalls),
|
||||
`endif
|
||||
.writeback_if (writeback_if),
|
||||
.scoreboard_if (scoreboard_if),
|
||||
.operands_if (operands_if)
|
||||
);
|
||||
|
||||
VX_dispatch #(
|
||||
.INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID))
|
||||
) dispatch (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
`UNUSED_PIN (perf_stalls),
|
||||
`endif
|
||||
.operands_if (operands_if),
|
||||
.dispatch_if (dispatch_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_SCOPE_ISSUE
|
||||
wire operands_if_fire = operands_if.valid && operands_if.ready;
|
||||
wire operands_if_not_ready = ~operands_if.ready;
|
||||
wire writeback_if_valid = writeback_if.valid;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (2),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS +
|
||||
1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) +
|
||||
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
|
||||
) scope_tap (
|
||||
.clk (clk),
|
||||
.reset (scope_reset),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers ({
|
||||
reset,
|
||||
operands_if_fire,
|
||||
operands_if_not_ready,
|
||||
writeback_if_valid
|
||||
}),
|
||||
.probes ({
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.ex_type,
|
||||
operands_if.data.op_type,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.rd,
|
||||
operands_if.data.rs1_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs3_data,
|
||||
writeback_if.data.uuid,
|
||||
writeback_if.data.tmask,
|
||||
writeback_if.data.rd,
|
||||
writeback_if.data.data,
|
||||
writeback_if.data.eop
|
||||
}),
|
||||
.bus_in (scope_bus_in),
|
||||
.bus_out (scope_bus_out)
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (operands_if.valid && operands_if.ready) begin
|
||||
`TRACE(1, ("%d: %s wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0}));
|
||||
trace_ex_type(1, operands_if.data.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs2_data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs3_data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS);
|
||||
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
132
hw/rtl/core/VX_issue_top.sv
Normal file
132
hw/rtl/core/VX_issue_top.sv
Normal file
|
@ -0,0 +1,132 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue_top import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "issue"
|
||||
) (
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire decode_valid,
|
||||
input wire [`UUID_WIDTH-1:0] decode_uuid,
|
||||
input wire [`NW_WIDTH-1:0] decode_wid,
|
||||
input wire [`NUM_THREADS-1:0] decode_tmask,
|
||||
input wire [`PC_BITS-1:0] decode_PC,
|
||||
input wire [`EX_BITS-1:0] decode_ex_type,
|
||||
input wire [`INST_OP_BITS-1:0] decode_op_type,
|
||||
input op_args_t decode_op_args,
|
||||
input wire decode_wb,
|
||||
input wire [`NR_BITS-1:0] decode_rd,
|
||||
input wire [`NR_BITS-1:0] decode_rs1,
|
||||
input wire [`NR_BITS-1:0] decode_rs2,
|
||||
input wire [`NR_BITS-1:0] decode_rs3,
|
||||
output wire decode_ready,
|
||||
|
||||
input wire writeback_valid[`ISSUE_WIDTH],
|
||||
input wire [`UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH],
|
||||
input wire [ISSUE_WIS_W-1:0] writeback_wis[`ISSUE_WIDTH],
|
||||
input wire [`NUM_THREADS-1:0] writeback_tmask[`ISSUE_WIDTH],
|
||||
input wire [`PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH],
|
||||
input wire [`NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH],
|
||||
input wire [`NUM_THREADS-1:0][`XLEN-1:0] writeback_data[`ISSUE_WIDTH],
|
||||
input wire writeback_sop[`ISSUE_WIDTH],
|
||||
input wire writeback_eop[`ISSUE_WIDTH],
|
||||
|
||||
output wire dispatch_valid[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`UUID_WIDTH-1:0] dispatch_uuid[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [ISSUE_WIS_W-1:0] dispatch_wis[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0] dispatch_tmask[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`PC_BITS-1:0] dispatch_PC[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`INST_ALU_BITS-1:0] dispatch_op_type[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output op_args_t dispatch_op_args[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire dispatch_wb[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NR_BITS-1:0] dispatch_rd[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NT_WIDTH-1:0] dispatch_tid[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
input wire dispatch_ready[`NUM_EX_UNITS * `ISSUE_WIDTH]
|
||||
);
|
||||
|
||||
VX_decode_if decode_if();
|
||||
VX_dispatch_if dispatch_if[`NUM_EX_UNITS * `ISSUE_WIDTH]();
|
||||
VX_writeback_if writeback_if[`ISSUE_WIDTH]();
|
||||
|
||||
assign decode_if.valid = decode_valid;
|
||||
assign decode_if.data.uuid = decode_uuid;
|
||||
assign decode_if.data.wid = decode_wid;
|
||||
assign decode_if.data.tmask = decode_tmask;
|
||||
assign decode_if.data.PC = decode_PC;
|
||||
assign decode_if.data.ex_type = decode_ex_type;
|
||||
assign decode_if.data.op_type = decode_op_type;
|
||||
assign decode_if.data.op_args = decode_op_args;
|
||||
assign decode_if.data.wb = decode_wb;
|
||||
assign decode_if.data.rd = decode_rd;
|
||||
assign decode_if.data.rs1 = decode_rs1;
|
||||
assign decode_if.data.rs2 = decode_rs2;
|
||||
assign decode_if.data.rs3 = decode_rs3;
|
||||
assign decode_ready = decode_if.ready;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign writeback_if[i].valid = writeback_valid[i];
|
||||
assign writeback_if[i].data.uuid = writeback_uuid[i];
|
||||
assign writeback_if[i].data.wis = writeback_wis[i];
|
||||
assign writeback_if[i].data.tmask = writeback_tmask[i];
|
||||
assign writeback_if[i].data.PC = writeback_PC[i];
|
||||
assign writeback_if[i].data.rd = writeback_rd[i];
|
||||
assign writeback_if[i].data.data = writeback_data[i];
|
||||
assign writeback_if[i].data.sop = writeback_sop[i];
|
||||
assign writeback_if[i].data.eop = writeback_eop[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin
|
||||
assign dispatch_valid[i] = dispatch_if[i].valid;
|
||||
assign dispatch_uuid[i] = dispatch_if[i].data.uuid;
|
||||
assign dispatch_wis[i] = dispatch_if[i].data.wis;
|
||||
assign dispatch_tmask[i] = dispatch_if[i].data.tmask;
|
||||
assign dispatch_PC[i] = dispatch_if[i].data.PC;
|
||||
assign dispatch_op_type[i] = dispatch_if[i].data.op_type;
|
||||
assign dispatch_op_args[i] = dispatch_if[i].data.op_args;
|
||||
assign dispatch_wb[i] = dispatch_if[i].data.wb;
|
||||
assign dispatch_rd[i] = dispatch_if[i].data.rd;
|
||||
assign dispatch_tid[i] = dispatch_if[i].data.tid;
|
||||
assign dispatch_rs1_data[i] = dispatch_if[i].data.rs1_data;
|
||||
assign dispatch_rs2_data[i] = dispatch_if[i].data.rs2_data;
|
||||
assign dispatch_rs3_data[i] = dispatch_if[i].data.rs3_data;
|
||||
assign dispatch_if[i].ready = dispatch_ready[i];
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
issue_perf_t issue_perf = '0;
|
||||
`endif
|
||||
|
||||
VX_issue #(
|
||||
.INSTANCE_ID (INSTANCE_ID)
|
||||
) issue (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.issue_perf (issue_perf),
|
||||
`endif
|
||||
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.dispatch_if (dispatch_if)
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,11 +14,11 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_lmem_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t cache_perf,
|
||||
`endif
|
||||
|
@ -37,31 +37,31 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_lsu_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
`RESET_RELAY (req_reset, reset);
|
||||
) lsu_switch_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
|
||||
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
|
||||
assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL];
|
||||
end
|
||||
|
||||
|
||||
wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask);
|
||||
wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask);
|
||||
wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask);
|
||||
|
||||
wire req_global_ready;
|
||||
wire req_local_ready;
|
||||
|
||||
`RESET_RELAY (switch_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (1)
|
||||
) req_global_buf (
|
||||
.clk (clk),
|
||||
.reset (req_reset),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
|
||||
.reset (switch_reset),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
|
||||
.data_in ({
|
||||
lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask,
|
||||
lsu_mem_in_if[i].req_data.rw,
|
||||
|
@ -81,7 +81,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
lsu_mem_out_if[i].req_data.atype,
|
||||
lsu_mem_out_if[i].req_data.data,
|
||||
lsu_mem_out_if[i].req_data.tag
|
||||
}),
|
||||
}),
|
||||
.ready_out (lsu_mem_out_if[i].req_ready)
|
||||
);
|
||||
|
||||
|
@ -91,8 +91,8 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
.OUT_REG (0)
|
||||
) req_local_buf (
|
||||
.clk (clk),
|
||||
.reset (req_reset),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
|
||||
.reset (switch_reset),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
|
||||
.data_in ({
|
||||
lsu_mem_in_if[i].req_data.mask & is_addr_local_mask,
|
||||
lsu_mem_in_if[i].req_data.rw,
|
||||
|
@ -103,73 +103,47 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
lsu_mem_in_if[i].req_data.tag
|
||||
}),
|
||||
.ready_in (req_local_ready),
|
||||
.valid_out (lmem_lsu_if[i].req_valid),
|
||||
.valid_out (lsu_switch_if[i].req_valid),
|
||||
.data_out ({
|
||||
lmem_lsu_if[i].req_data.mask,
|
||||
lmem_lsu_if[i].req_data.rw,
|
||||
lmem_lsu_if[i].req_data.byteen,
|
||||
lmem_lsu_if[i].req_data.addr,
|
||||
lmem_lsu_if[i].req_data.atype,
|
||||
lmem_lsu_if[i].req_data.data,
|
||||
lmem_lsu_if[i].req_data.tag
|
||||
}),
|
||||
.ready_out (lmem_lsu_if[i].req_ready)
|
||||
lsu_switch_if[i].req_data.mask,
|
||||
lsu_switch_if[i].req_data.rw,
|
||||
lsu_switch_if[i].req_data.byteen,
|
||||
lsu_switch_if[i].req_data.addr,
|
||||
lsu_switch_if[i].req_data.atype,
|
||||
lsu_switch_if[i].req_data.data,
|
||||
lsu_switch_if[i].req_data.tag
|
||||
}),
|
||||
.ready_out (lsu_switch_if[i].req_ready)
|
||||
);
|
||||
|
||||
assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global)
|
||||
assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global)
|
||||
|| (req_local_ready && is_addr_local);
|
||||
end
|
||||
|
||||
`RESET_RELAY (rsp_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
wire rsp_arb_valid;
|
||||
wire rsp_arb_index;
|
||||
wire rsp_arb_ready;
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (2),
|
||||
.LOCK_ENABLE (1),
|
||||
.TYPE ("R")
|
||||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (rsp_reset),
|
||||
.requests ({
|
||||
lmem_lsu_if[i].rsp_valid,
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (1)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (switch_reset),
|
||||
.valid_in ({
|
||||
lsu_switch_if[i].rsp_valid,
|
||||
lsu_mem_out_if[i].rsp_valid
|
||||
}),
|
||||
.grant_valid (rsp_arb_valid),
|
||||
.grant_index (rsp_arb_index),
|
||||
`UNUSED_PIN (grant_onehot),
|
||||
.grant_unlock(rsp_arb_ready)
|
||||
);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (RSP_DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (0)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (rsp_reset),
|
||||
.valid_in (rsp_arb_valid),
|
||||
.data_in ({
|
||||
rsp_arb_index ? lmem_lsu_if[i].rsp_data.mask : lsu_mem_out_if[i].rsp_data.mask,
|
||||
rsp_arb_index ? lmem_lsu_if[i].rsp_data.data : lsu_mem_out_if[i].rsp_data.data,
|
||||
rsp_arb_index ? lmem_lsu_if[i].rsp_data.tag : lsu_mem_out_if[i].rsp_data.tag
|
||||
.ready_in ({
|
||||
lsu_switch_if[i].rsp_ready,
|
||||
lsu_mem_out_if[i].rsp_ready
|
||||
}),
|
||||
.ready_in (rsp_arb_ready),
|
||||
.data_in ({
|
||||
lsu_switch_if[i].rsp_data,
|
||||
lsu_mem_out_if[i].rsp_data
|
||||
}),
|
||||
.data_out (lsu_mem_in_if[i].rsp_data),
|
||||
.valid_out (lsu_mem_in_if[i].rsp_valid),
|
||||
.data_out ({
|
||||
lsu_mem_in_if[i].rsp_data.mask,
|
||||
lsu_mem_in_if[i].rsp_data.data,
|
||||
lsu_mem_in_if[i].rsp_data.tag
|
||||
}),
|
||||
.ready_out (lsu_mem_in_if[i].rsp_ready)
|
||||
.ready_out (lsu_mem_in_if[i].rsp_ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
assign lsu_mem_out_if[i].rsp_ready = rsp_arb_ready && ~rsp_arb_index;
|
||||
assign lmem_lsu_if[i].rsp_ready = rsp_arb_ready && rsp_arb_index;
|
||||
end
|
||||
|
||||
VX_mem_bus_if #(
|
||||
|
@ -177,25 +151,25 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_bus_if[LSU_NUM_REQS]();
|
||||
|
||||
`RESET_RELAY (adapter_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_bus_tmp_if[`NUM_LSU_LANES]();
|
||||
|
||||
`RESET_RELAY (adapter_reset, reset);
|
||||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
|
||||
.REQ_OUT_BUF (2),
|
||||
.RSP_OUT_BUF (1)
|
||||
.REQ_OUT_BUF (3),
|
||||
.RSP_OUT_BUF (0)
|
||||
) lsu_adapter (
|
||||
.clk (clk),
|
||||
.reset (adapter_reset),
|
||||
.lsu_mem_if (lmem_lsu_if[i]),
|
||||
.lsu_mem_if (lsu_switch_if[i]),
|
||||
.mem_bus_if (lmem_bus_tmp_if)
|
||||
);
|
||||
|
||||
|
@ -205,17 +179,18 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
`RESET_RELAY (lmem_reset, reset);
|
||||
|
||||
|
||||
VX_local_mem #(
|
||||
.INSTANCE_ID($sformatf("core%0d-lmem", CORE_ID)),
|
||||
.INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)),
|
||||
.SIZE (1 << `LMEM_LOG_SIZE),
|
||||
.NUM_REQS (LSU_NUM_REQS),
|
||||
.NUM_BANKS (`LMEM_NUM_BANKS),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) local_mem (
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.OUT_BUF (3)
|
||||
) local_mem (
|
||||
.clk (clk),
|
||||
.reset (lmem_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,10 +14,10 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_adapter import VX_gpu_pkg::*; #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter DATA_SIZE = 1,
|
||||
parameter TAG_WIDTH = 1,
|
||||
parameter TAG_SEL_BITS = 0,
|
||||
parameter NUM_LANES = 1,
|
||||
parameter DATA_SIZE = 1,
|
||||
parameter TAG_WIDTH = 1,
|
||||
parameter TAG_SEL_BITS = 0,
|
||||
parameter `STRING ARBITER = "P",
|
||||
parameter REQ_OUT_BUF = 0,
|
||||
parameter RSP_OUT_BUF = 0
|
||||
|
@ -63,12 +63,12 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #(
|
|||
assign mem_bus_if[i].req_data.tag = req_tag_out[i];
|
||||
assign req_ready_out[i] = mem_bus_if[i].req_ready;
|
||||
end
|
||||
|
||||
|
||||
VX_stream_unpack #(
|
||||
.NUM_REQS (NUM_LANES),
|
||||
.DATA_WIDTH (REQ_DATA_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.OUT_BUF (REQ_OUT_BUF)
|
||||
.NUM_REQS (NUM_LANES),
|
||||
.DATA_WIDTH (REQ_DATA_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.OUT_BUF (REQ_OUT_BUF)
|
||||
) stream_unpack (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -77,7 +77,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #(
|
|||
.data_in (req_data_in),
|
||||
.tag_in (lsu_mem_if.req_data.tag),
|
||||
.ready_in (lsu_mem_if.req_ready),
|
||||
.valid_out (req_valid_out),
|
||||
.valid_out (req_valid_out),
|
||||
.data_out (req_data_out),
|
||||
.tag_out (req_tag_out),
|
||||
.ready_out (req_ready_out)
|
||||
|
|
|
@ -13,9 +13,8 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_slice import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter BLOCK_ID = 0
|
||||
module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -88,7 +87,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
wire [NUM_LANES-1:0] mem_req_mask;
|
||||
wire mem_req_rw;
|
||||
wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] mem_req_addr;
|
||||
reg [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] mem_req_byteen;
|
||||
wire [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] mem_req_byteen;
|
||||
reg [NUM_LANES-1:0][LSU_WORD_SIZE*8-1:0] mem_req_data;
|
||||
wire [TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_ready;
|
||||
|
@ -159,27 +158,30 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
|
||||
// byte enable formatting
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
reg [LSU_WORD_SIZE-1:0] mem_req_byteen_r;
|
||||
always @(*) begin
|
||||
mem_req_byteen[i] = '0;
|
||||
mem_req_byteen_r = '0;
|
||||
case (`INST_LSU_WSIZE(execute_if.data.op_type))
|
||||
0: begin // 8-bit
|
||||
mem_req_byteen[i][req_align[i]] = 1'b1;
|
||||
mem_req_byteen_r[req_align[i]] = 1'b1;
|
||||
end
|
||||
1: begin // 16 bit
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1;
|
||||
end
|
||||
`ifdef XLEN_64
|
||||
2: begin // 32 bit
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1;
|
||||
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1;
|
||||
end
|
||||
`endif
|
||||
default : mem_req_byteen[i] = {LSU_WORD_SIZE{1'b1}};
|
||||
// 3: 64 bit
|
||||
default : mem_req_byteen_r = {LSU_WORD_SIZE{1'b1}};
|
||||
endcase
|
||||
end
|
||||
assign mem_req_byteen[i] = mem_req_byteen_r;
|
||||
end
|
||||
|
||||
// memory misalignment not supported!
|
||||
|
@ -312,7 +314,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (mem_scheduler_reset, reset);
|
||||
|
||||
VX_mem_scheduler #(
|
||||
.INSTANCE_ID ($sformatf("core%0d-lsu-memsched%0d", CORE_ID, BLOCK_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)),
|
||||
.CORE_REQS (NUM_LANES),
|
||||
.MEM_CHANNELS(NUM_LANES),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
|
@ -504,11 +506,11 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`ifdef DBG_TRACE_MEM
|
||||
always @(posedge clk) begin
|
||||
if (execute_if.valid && fence_lock) begin
|
||||
`TRACE(1, ("%d: *** D$%0d fence wait\n", $time, CORE_ID));
|
||||
`TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID));
|
||||
end
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%d: D$%0d Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
|
@ -516,7 +518,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid));
|
||||
end else begin
|
||||
`TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
|
@ -524,8 +526,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, CORE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE(1, ("%d: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid));
|
||||
end
|
||||
|
@ -533,36 +535,20 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
`ifdef DBG_SCOPE_LSU
|
||||
if (CORE_ID == 0 && BLOCK_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (3),
|
||||
.TRIGGERW (3),
|
||||
.PROBEW (`UUID_WIDTH+NUM_LANES*(`XLEN+4+`XLEN)+1+`UUID_WIDTH+NUM_LANES*`XLEN)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({reset, mem_req_fire, mem_rsp_fire}),
|
||||
.probes({execute_if.data.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
wire [31:0] full_addr_0 = full_addr[0];
|
||||
wire [31:0] mem_req_data_0 = mem_req_data[0];
|
||||
wire [31:0] rsp_data_0 = rsp_data[0];
|
||||
ila_lsu ila_lsu_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({mem_req_data_0, execute_if.data.uuid, execute_if.data.wid, execute_if.data.PC, mem_req_mask, full_addr_0, mem_req_byteen, mem_req_rw, mem_req_ready, mem_req_valid}),
|
||||
.probe1 ({rsp_data_0, rsp_uuid, mem_rsp_eop, rsp_pc, rsp_rd, mem_rsp_mask, rsp_wid, mem_rsp_ready, mem_rsp_valid}),
|
||||
.probe2 ({lsu_mem_if.req_data.data, lsu_mem_if.req_data.tag, lsu_mem_if.req_data.byteen, lsu_mem_if.req_data.addr, lsu_mem_if.req_data.rw, lsu_mem_if.req_ready, lsu_mem_if.req_valid}),
|
||||
.probe3 ({lsu_mem_if.rsp_data.data, lsu_mem_if.rsp_data.tag, lsu_mem_if.rsp_ready, lsu_mem_if.rsp_valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (3),
|
||||
.TRIGGERW (3),
|
||||
.PROBEW (1 + NUM_LANES*(`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE*8) + `UUID_WIDTH + NUM_LANES*LSU_WORD_SIZE*8 + `UUID_WIDTH)
|
||||
) scope_tap (
|
||||
.clk (clk),
|
||||
.reset (scope_reset),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers({reset, mem_req_fire, mem_rsp_fire}),
|
||||
.probes ({mem_req_rw, full_addr, mem_req_byteen, mem_req_data, execute_if.data.uuid, rsp_data, rsp_uuid}),
|
||||
.bus_in (scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,8 +14,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
|
@ -24,7 +24,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
// Inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// Outputs
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_lsu_mem_if.master lsu_mem_if [`NUM_LSU_BLOCKS]
|
||||
);
|
||||
|
@ -32,10 +32,9 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
localparam NUM_LANES = `NUM_LSU_LANES;
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_lsu = 0;
|
||||
`SCOPE_IO_SWITCH (BLOCK_SIZE);
|
||||
`endif
|
||||
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_execute_if[BLOCK_SIZE]();
|
||||
|
@ -55,17 +54,16 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_slices
|
||||
|
||||
`RESET_RELAY (block_reset, reset);
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
VX_lsu_slice #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.BLOCK_ID (block_idx)
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx))
|
||||
) lsu_slice(
|
||||
`SCOPE_IO_BIND (scope_lsu+block_idx)
|
||||
`SCOPE_IO_BIND (block_idx)
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.reset (slice_reset),
|
||||
.execute_if (per_block_execute_if[block_idx]),
|
||||
.commit_if (per_block_commit_if[block_idx]),
|
||||
.lsu_mem_if (lsu_mem_if[block_idx])
|
||||
|
@ -82,5 +80,5 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
.commit_in_if (per_block_commit_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,29 +14,288 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_operands import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_BANKS = 4,
|
||||
parameter OUT_BUF = 4 // using 2-cycle EB for area reduction
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_scoreboard_if.slave scoreboard_if [`ISSUE_WIDTH],
|
||||
VX_operands_if.master operands_if [`ISSUE_WIDTH]
|
||||
`ifdef PERF_ENABLE
|
||||
output wire [`PERF_CTR_BITS-1:0] perf_stalls,
|
||||
`endif
|
||||
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_scoreboard_if.slave scoreboard_if,
|
||||
VX_operands_if.master operands_if
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam NUM_SRC_REGS = 3;
|
||||
localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_REGS);
|
||||
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
|
||||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
|
||||
localparam METADATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS;
|
||||
localparam DATAW = `UUID_WIDTH + METADATAW + 3 * `NUM_THREADS * `XLEN;
|
||||
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
|
||||
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
|
||||
localparam XLEN_SIZE = `XLEN / 8;
|
||||
localparam BYTEENW = `NUM_THREADS * XLEN_SIZE;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
`UNUSED_VAR (writeback_if.data.sop)
|
||||
|
||||
VX_gpr_slice #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) gpr_slice (
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
.writeback_if (writeback_if[i]),
|
||||
.scoreboard_if(scoreboard_if[i]),
|
||||
.operands_if (operands_if[i])
|
||||
wire [NUM_SRC_REGS-1:0] src_valid;
|
||||
wire [NUM_SRC_REGS-1:0] req_in_valid;
|
||||
wire [NUM_SRC_REGS-1:0] req_in_ready;
|
||||
wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data;
|
||||
wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
|
||||
|
||||
wire [NUM_BANKS-1:0] gpr_rd_valid_n, gpr_rd_ready;
|
||||
reg [NUM_BANKS-1:0] gpr_rd_valid;
|
||||
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr_n;
|
||||
reg [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr;
|
||||
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx_n;
|
||||
reg [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx;
|
||||
|
||||
wire pipe_in_ready;
|
||||
reg pipe_out_valid;
|
||||
wire pipe_out_ready;
|
||||
reg [`UUID_WIDTH-1:0] pipe_out_uuid;
|
||||
reg [METADATAW-1:0] pipe_out_data;
|
||||
|
||||
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data, src_data_n;
|
||||
reg [NUM_SRC_REGS-1:0] data_fetched;
|
||||
reg has_collision, has_collision_n;
|
||||
|
||||
wire stg_in_valid, stg_in_ready;
|
||||
|
||||
wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3,
|
||||
scoreboard_if.data.rs2,
|
||||
scoreboard_if.data.rs1};
|
||||
|
||||
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
|
||||
if (ISSUE_WIS != 0) begin
|
||||
assign req_in_data[i] = {src_regs[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis};
|
||||
end else begin
|
||||
assign req_in_data[i] = src_regs[i][`NR_BITS-1:BANK_SEL_BITS];
|
||||
end
|
||||
if (NUM_BANKS != 1) begin
|
||||
assign req_bank_idx[i] = src_regs[i][BANK_SEL_BITS-1:0];
|
||||
end else begin
|
||||
assign req_bank_idx[i] = '0;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
|
||||
assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched[i];
|
||||
end
|
||||
|
||||
assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid;
|
||||
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_SRC_REGS),
|
||||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (PER_BANK_ADDRW),
|
||||
.ARBITER ("P"), // use priority arbiter
|
||||
.PERF_CTR_BITS(`PERF_CTR_BITS),
|
||||
.OUT_BUF (0) // no output buffering
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`UNUSED_PIN(collisions),
|
||||
.valid_in (req_in_valid),
|
||||
.data_in (req_in_data),
|
||||
.sel_in (req_bank_idx),
|
||||
.ready_in (req_in_ready),
|
||||
.valid_out (gpr_rd_valid_n),
|
||||
.data_out (gpr_rd_addr_n),
|
||||
.sel_out (gpr_rd_req_idx_n),
|
||||
.ready_out (gpr_rd_ready)
|
||||
);
|
||||
|
||||
assign gpr_rd_ready = {NUM_BANKS{stg_in_ready}};
|
||||
|
||||
always @(*) begin
|
||||
has_collision_n = 0;
|
||||
for (integer i = 0; i < NUM_SRC_REGS; ++i) begin
|
||||
for (integer j = 1; j < (NUM_SRC_REGS-i); ++j) begin
|
||||
has_collision_n |= src_valid[i]
|
||||
&& src_valid[j+i]
|
||||
&& (req_bank_idx[i] == req_bank_idx[j+i]);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
src_data_n = src_data;
|
||||
for (integer b = 0; b < NUM_BANKS; ++b) begin
|
||||
if (gpr_rd_valid[b]) begin
|
||||
src_data_n[gpr_rd_req_idx[b]] = gpr_rd_data[b];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire pipe_stall = pipe_out_valid && ~pipe_out_ready;
|
||||
assign pipe_in_ready = ~pipe_stall;
|
||||
|
||||
assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n;
|
||||
|
||||
wire stg_in_fire = stg_in_valid && stg_in_ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pipe_out_valid <= 0;
|
||||
gpr_rd_valid <= '0;
|
||||
data_fetched <= '0;
|
||||
src_data <= '0;
|
||||
end else begin
|
||||
if (~pipe_stall) begin
|
||||
pipe_out_valid <= scoreboard_if.valid;
|
||||
gpr_rd_valid <= gpr_rd_valid_n;
|
||||
if (scoreboard_if.ready) begin
|
||||
data_fetched <= '0;
|
||||
end else begin
|
||||
data_fetched <= data_fetched | req_in_ready;
|
||||
end
|
||||
if (stg_in_fire) begin
|
||||
src_data <= '0;
|
||||
end else begin
|
||||
src_data <= src_data_n;
|
||||
end
|
||||
end
|
||||
end
|
||||
if (~pipe_stall) begin
|
||||
pipe_out_uuid <= scoreboard_if.data.uuid;
|
||||
pipe_out_data <= {
|
||||
scoreboard_if.data.wis,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.rd
|
||||
};
|
||||
has_collision <= has_collision_n;
|
||||
gpr_rd_addr <= gpr_rd_addr_n;
|
||||
gpr_rd_req_idx <= gpr_rd_req_idx_n;
|
||||
end
|
||||
end
|
||||
|
||||
assign pipe_out_ready = stg_in_ready;
|
||||
assign stg_in_valid = pipe_out_valid && ~has_collision;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
|
||||
.LUTRAM (1)
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (stg_in_valid),
|
||||
.ready_in (stg_in_ready),
|
||||
.data_in ({
|
||||
pipe_out_uuid,
|
||||
pipe_out_data,
|
||||
src_data_n[0],
|
||||
src_data_n[1],
|
||||
src_data_n[2]
|
||||
}),
|
||||
.data_out ({
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.wis,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.PC,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.ex_type,
|
||||
operands_if.data.op_type,
|
||||
operands_if.data.op_args,
|
||||
operands_if.data.rd,
|
||||
operands_if.data.rs1_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs3_data
|
||||
}),
|
||||
.valid_out (operands_if.valid),
|
||||
.ready_out (operands_if.ready)
|
||||
);
|
||||
|
||||
wire [PER_BANK_ADDRW-1:0] gpr_wr_addr;
|
||||
if (ISSUE_WIS != 0) begin
|
||||
assign gpr_wr_addr = {writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS], writeback_if.data.wis};
|
||||
end else begin
|
||||
assign gpr_wr_addr = writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS];
|
||||
end
|
||||
|
||||
wire [BANK_SEL_WIDTH-1:0] gpr_wr_bank_idx;
|
||||
if (NUM_BANKS != 1) begin
|
||||
assign gpr_wr_bank_idx = writeback_if.data.rd[BANK_SEL_BITS-1:0];
|
||||
end else begin
|
||||
assign gpr_wr_bank_idx = '0;
|
||||
end
|
||||
|
||||
`ifdef GPR_RESET
|
||||
reg wr_enabled = 0;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_enabled <= 1;
|
||||
end
|
||||
end
|
||||
`else
|
||||
wire wr_enabled = 1;
|
||||
`endif
|
||||
|
||||
for (genvar b = 0; b < NUM_BANKS; ++b) begin
|
||||
wire gpr_wr_enabled;
|
||||
if (BANK_SEL_BITS != 0) begin
|
||||
assign gpr_wr_enabled = wr_enabled
|
||||
&& writeback_if.valid
|
||||
&& (gpr_wr_bank_idx == BANK_SEL_BITS'(b));
|
||||
end else begin
|
||||
assign gpr_wr_enabled = wr_enabled && writeback_if.valid;
|
||||
end
|
||||
|
||||
wire [BYTEENW-1:0] wren;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}};
|
||||
end
|
||||
|
||||
`ifdef GPR_RESET
|
||||
VX_dp_ram_rst #(
|
||||
`else
|
||||
VX_dp_ram #(
|
||||
`endif
|
||||
.DATAW (`XLEN * `NUM_THREADS),
|
||||
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
|
||||
.WRENW (BYTEENW),
|
||||
.NO_RWCHECK (1)
|
||||
) gpr_ram (
|
||||
.clk (clk),
|
||||
`ifdef GPR_RESET
|
||||
.reset (reset),
|
||||
`endif
|
||||
.read (1'b1),
|
||||
.wren (wren),
|
||||
.write (gpr_wr_enabled),
|
||||
.waddr (gpr_wr_addr),
|
||||
.wdata (writeback_if.data.data),
|
||||
.raddr (gpr_rd_addr[b]),
|
||||
.rdata (gpr_rd_data[b])
|
||||
);
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] collisions_r;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
collisions_r <= '0;
|
||||
end else begin
|
||||
collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_in_ready && has_collision_n);
|
||||
end
|
||||
end
|
||||
assign perf_stalls = collisions_r;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -14,13 +14,14 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_schedule import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_pipeline_perf_if.schedule perf_schedule_if,
|
||||
output sched_perf_t sched_perf,
|
||||
`endif
|
||||
|
||||
// configuration
|
||||
|
@ -42,6 +43,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
// status
|
||||
output wire busy
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // updated when a warp is activated or disabled
|
||||
|
@ -290,7 +292,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (split_join_reset, reset);
|
||||
|
||||
VX_split_join #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID))
|
||||
) split_join (
|
||||
.clk (clk),
|
||||
.reset (split_join_reset),
|
||||
|
@ -368,24 +370,42 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
|
||||
assign schedule_if.data.uuid = instr_uuid;
|
||||
|
||||
`RESET_RELAY (pending_instr_reset, reset);
|
||||
// Track pending instructions per warp
|
||||
|
||||
wire no_pending_instr;
|
||||
VX_pending_instr #(
|
||||
.CTR_WIDTH (12),
|
||||
.DECR_COUNT (`ISSUE_WIDTH),
|
||||
.ALM_EMPTY (1)
|
||||
) pending_instr(
|
||||
.clk (clk),
|
||||
.reset (pending_instr_reset),
|
||||
.incr (schedule_if_fire),
|
||||
.incr_wid (schedule_if.data.wid),
|
||||
.decr (commit_sched_if.committed),
|
||||
.decr_wid (commit_sched_if.committed_wid),
|
||||
.alm_empty_wid (sched_csr_if.alm_empty_wid),
|
||||
.alm_empty (sched_csr_if.alm_empty),
|
||||
.empty (no_pending_instr)
|
||||
);
|
||||
reg [`NUM_WARPS-1:0] per_warp_incr;
|
||||
always @(*) begin
|
||||
per_warp_incr = 0;
|
||||
if (schedule_if_fire) begin
|
||||
per_warp_incr[schedule_if.data.wid] = 1;
|
||||
end
|
||||
end
|
||||
|
||||
wire [`NUM_WARPS-1:0] pending_warp_empty;
|
||||
wire [`NUM_WARPS-1:0] pending_warp_alm_empty;
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
`RESET_RELAY (pending_instr_reset, reset);
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (4096),
|
||||
.ALM_EMPTY (1)
|
||||
) counter (
|
||||
.clk (clk),
|
||||
.reset (pending_instr_reset),
|
||||
.incr (per_warp_incr[i]),
|
||||
.decr (commit_sched_if.committed_warps[i]),
|
||||
.empty (pending_warp_empty[i]),
|
||||
.alm_empty (pending_warp_alm_empty[i]),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
end
|
||||
|
||||
assign sched_csr_if.alm_empty = pending_warp_alm_empty[sched_csr_if.alm_empty_wid];
|
||||
|
||||
wire no_pending_instr = (& pending_warp_empty);
|
||||
|
||||
`BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1);
|
||||
|
||||
|
@ -412,7 +432,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
end
|
||||
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** core%0d-scheduler-timeout: stalled_warps=%b", $time, CORE_ID, stalled_warps))
|
||||
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** %s timeout: stalled_warps=%b", $time, INSTANCE_ID, stalled_warps))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_sched_idles;
|
||||
|
@ -431,8 +451,8 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
assign perf_schedule_if.sched_idles = perf_sched_idles;
|
||||
assign perf_schedule_if.sched_stalls = perf_sched_stalls;
|
||||
assign sched_perf.idles = perf_sched_idles;
|
||||
assign sched_perf.stalls = perf_sched_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -14,39 +14,37 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_scoreboard import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output reg [`PERF_CTR_BITS-1:0] perf_scb_stalls,
|
||||
output reg [`PERF_CTR_BITS-1:0] perf_units_uses [`NUM_EX_UNITS],
|
||||
output reg [`PERF_CTR_BITS-1:0] perf_sfu_uses [`NUM_SFU_UNITS],
|
||||
output reg [`PERF_CTR_BITS-1:0] perf_stalls,
|
||||
output reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_units_uses,
|
||||
output reg [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_sfu_uses,
|
||||
`endif
|
||||
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_ibuffer_if.slave ibuffer_if [`NUM_WARPS],
|
||||
VX_scoreboard_if.master scoreboard_if [`ISSUE_WIDTH]
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_ibuffer_if.slave ibuffer_if [PER_ISSUE_WARPS],
|
||||
VX_scoreboard_if.master scoreboard_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4) + 1;
|
||||
|
||||
VX_ibuffer_if staging_if [PER_ISSUE_WARPS]();
|
||||
reg [PER_ISSUE_WARPS-1:0] operands_ready;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`NUM_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle;
|
||||
reg [PER_ISSUE_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle;
|
||||
wire [`NUM_EX_UNITS-1:0] perf_units_per_cycle, perf_units_per_cycle_r;
|
||||
|
||||
reg [`NUM_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle;
|
||||
reg [PER_ISSUE_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle;
|
||||
wire [`NUM_SFU_UNITS-1:0] perf_sfu_per_cycle, perf_sfu_per_cycle_r;
|
||||
|
||||
wire [`NUM_WARPS-1:0] perf_issue_stalls_per_cycle;
|
||||
wire [`CLOG2(`NUM_WARPS+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r;
|
||||
|
||||
`POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle);
|
||||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (`NUM_EX_UNITS),
|
||||
.N (`NUM_WARPS),
|
||||
.N (PER_ISSUE_WARPS),
|
||||
.OP ("|")
|
||||
) perf_units_reduce (
|
||||
.data_in (perf_inuse_units_per_cycle),
|
||||
|
@ -55,22 +53,28 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (`NUM_SFU_UNITS),
|
||||
.N (`NUM_WARPS),
|
||||
.N (PER_ISSUE_WARPS),
|
||||
.OP ("|")
|
||||
) perf_sfu_reduce (
|
||||
.data_in (perf_inuse_sfu_per_cycle),
|
||||
.data_out (perf_sfu_per_cycle)
|
||||
);
|
||||
|
||||
`BUFFER(perf_stalls_per_cycle_r, perf_stalls_per_cycle);
|
||||
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT));
|
||||
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT));
|
||||
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
|
||||
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
|
||||
|
||||
wire [PER_ISSUE_WARPS-1:0] stg_valid_in;
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
assign stg_valid_in[w] = staging_if[w].valid;
|
||||
end
|
||||
|
||||
wire perf_stall_per_cycle = (|stg_valid_in) && ~(|(stg_valid_in & operands_ready));
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_scb_stalls <= '0;
|
||||
perf_stalls <= '0;
|
||||
end else begin
|
||||
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(perf_stalls_per_cycle_r);
|
||||
perf_stalls <= perf_stalls + `PERF_CTR_BITS'(perf_stall_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -95,138 +99,121 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
`endif
|
||||
|
||||
VX_ibuffer_if staging_if [`NUM_WARPS]();
|
||||
wire [`NUM_WARPS-1:0][3:0] staging_opds_busy;
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (1)
|
||||
) stanging_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (ibuffer_if[i].valid),
|
||||
.data_in (ibuffer_if[i].data),
|
||||
.ready_in (ibuffer_if[i].ready),
|
||||
.valid_out(staging_if[i].valid),
|
||||
.data_out (staging_if[i].data),
|
||||
.ready_out(staging_if[i].ready)
|
||||
.valid_in (ibuffer_if[w].valid),
|
||||
.data_in (ibuffer_if[w].data),
|
||||
.ready_in (ibuffer_if[w].ready),
|
||||
.valid_out(staging_if[w].valid),
|
||||
.data_out (staging_if[w].data),
|
||||
.ready_out(staging_if[w].ready)
|
||||
);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
reg [`NUM_REGS-1:0] inuse_regs;
|
||||
|
||||
reg [3:0] operands_busy_r, operands_busy_n;
|
||||
reg [3:0] operands_busy, operands_busy_n;
|
||||
|
||||
localparam iw = i % `ISSUE_WIDTH;
|
||||
localparam wis = i / `ISSUE_WIDTH;
|
||||
wire ibuffer_fire = ibuffer_if[w].valid && ibuffer_if[w].ready;
|
||||
|
||||
wire ibuffer_fire = ibuffer_if[i].valid && ibuffer_if[i].ready;
|
||||
wire staging_fire = staging_if[w].valid && staging_if[w].ready;
|
||||
|
||||
wire staging_fire = staging_if[i].valid && staging_if[i].ready;
|
||||
|
||||
wire writeback_fire = writeback_if[iw].valid
|
||||
&& (writeback_if[iw].data.wis == ISSUE_WIS_W'(wis))
|
||||
&& writeback_if[iw].data.eop;
|
||||
wire writeback_fire = writeback_if.valid
|
||||
&& (writeback_if.data.wis == ISSUE_WIS_W'(w))
|
||||
&& writeback_if.data.eop;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units;
|
||||
reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu;
|
||||
|
||||
reg [`SFU_WIDTH-1:0] sfu_type;
|
||||
always @(*) begin
|
||||
case (staging_if[i].data.op_type)
|
||||
`INST_SFU_CSRRW,
|
||||
`INST_SFU_CSRRS,
|
||||
`INST_SFU_CSRRC: sfu_type = `SFU_CSRS;
|
||||
default: sfu_type = `SFU_WCTL;
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
perf_inuse_units_per_cycle[i] = '0;
|
||||
perf_inuse_sfu_per_cycle[i] = '0;
|
||||
if (staging_if[i].valid) begin
|
||||
if (operands_busy_r[0]) begin
|
||||
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rd]] = 1;
|
||||
if (inuse_units[staging_if[i].data.rd] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rd]] = 1;
|
||||
perf_inuse_units_per_cycle[w] = '0;
|
||||
perf_inuse_sfu_per_cycle[w] = '0;
|
||||
if (staging_if[w].valid) begin
|
||||
if (operands_busy[0]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rd]] = 1;
|
||||
if (inuse_units[staging_if[w].data.rd] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rd]] = 1;
|
||||
end
|
||||
end
|
||||
if (operands_busy_r[1]) begin
|
||||
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs1]] = 1;
|
||||
if (inuse_units[staging_if[i].data.rs1] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs1]] = 1;
|
||||
if (operands_busy[1]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs1]] = 1;
|
||||
if (inuse_units[staging_if[w].data.rs1] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs1]] = 1;
|
||||
end
|
||||
end
|
||||
if (operands_busy_r[2]) begin
|
||||
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs2]] = 1;
|
||||
if (inuse_units[staging_if[i].data.rs2] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs2]] = 1;
|
||||
if (operands_busy[2]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs2]] = 1;
|
||||
if (inuse_units[staging_if[w].data.rs2] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs2]] = 1;
|
||||
end
|
||||
end
|
||||
if (operands_busy_r[3]) begin
|
||||
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs3]] = 1;
|
||||
if (inuse_units[staging_if[i].data.rs3] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs3]] = 1;
|
||||
if (operands_busy[3]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs3]] = 1;
|
||||
if (inuse_units[staging_if[w].data.rs3] == `EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs3]] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
assign perf_issue_stalls_per_cycle[i] = staging_if[i].valid && ~staging_if[i].ready;
|
||||
`endif
|
||||
|
||||
always @(*) begin
|
||||
operands_busy_n = operands_busy_r;
|
||||
operands_busy_n = operands_busy;
|
||||
if (ibuffer_fire) begin
|
||||
operands_busy_n = {
|
||||
inuse_regs[ibuffer_if[i].data.rs3],
|
||||
inuse_regs[ibuffer_if[i].data.rs2],
|
||||
inuse_regs[ibuffer_if[i].data.rs1],
|
||||
inuse_regs[ibuffer_if[i].data.rd]
|
||||
inuse_regs[ibuffer_if[w].data.rs3],
|
||||
inuse_regs[ibuffer_if[w].data.rs2],
|
||||
inuse_regs[ibuffer_if[w].data.rs1],
|
||||
inuse_regs[ibuffer_if[w].data.rd]
|
||||
};
|
||||
end
|
||||
if (writeback_fire) begin
|
||||
if (ibuffer_fire) begin
|
||||
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rd) begin
|
||||
if (writeback_if.data.rd == ibuffer_if[w].data.rd) begin
|
||||
operands_busy_n[0] = 0;
|
||||
end
|
||||
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs1) begin
|
||||
if (writeback_if.data.rd == ibuffer_if[w].data.rs1) begin
|
||||
operands_busy_n[1] = 0;
|
||||
end
|
||||
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs2) begin
|
||||
if (writeback_if.data.rd == ibuffer_if[w].data.rs2) begin
|
||||
operands_busy_n[2] = 0;
|
||||
end
|
||||
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs3) begin
|
||||
if (writeback_if.data.rd == ibuffer_if[w].data.rs3) begin
|
||||
operands_busy_n[3] = 0;
|
||||
end
|
||||
end else begin
|
||||
if (writeback_if[iw].data.rd == staging_if[i].data.rd) begin
|
||||
if (writeback_if.data.rd == staging_if[w].data.rd) begin
|
||||
operands_busy_n[0] = 0;
|
||||
end
|
||||
if (writeback_if[iw].data.rd == staging_if[i].data.rs1) begin
|
||||
if (writeback_if.data.rd == staging_if[w].data.rs1) begin
|
||||
operands_busy_n[1] = 0;
|
||||
end
|
||||
if (writeback_if[iw].data.rd == staging_if[i].data.rs2) begin
|
||||
if (writeback_if.data.rd == staging_if[w].data.rs2) begin
|
||||
operands_busy_n[2] = 0;
|
||||
end
|
||||
if (writeback_if[iw].data.rd == staging_if[i].data.rs3) begin
|
||||
if (writeback_if.data.rd == staging_if[w].data.rs3) begin
|
||||
operands_busy_n[3] = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
if (staging_fire && staging_if[i].data.wb) begin
|
||||
if (staging_if[i].data.rd == ibuffer_if[i].data.rd) begin
|
||||
if (staging_fire && staging_if[w].data.wb) begin
|
||||
if (staging_if[w].data.rd == ibuffer_if[w].data.rd) begin
|
||||
operands_busy_n[0] = 1;
|
||||
end
|
||||
if (staging_if[i].data.rd == ibuffer_if[i].data.rs1) begin
|
||||
if (staging_if[w].data.rd == ibuffer_if[w].data.rs1) begin
|
||||
operands_busy_n[1] = 1;
|
||||
end
|
||||
if (staging_if[i].data.rd == ibuffer_if[i].data.rs2) begin
|
||||
if (staging_if[w].data.rd == ibuffer_if[w].data.rs2) begin
|
||||
operands_busy_n[2] = 1;
|
||||
end
|
||||
if (staging_if[i].data.rd == ibuffer_if[i].data.rs3) begin
|
||||
if (staging_if[w].data.rd == ibuffer_if[w].data.rs3) begin
|
||||
operands_busy_n[3] = 1;
|
||||
end
|
||||
end
|
||||
|
@ -237,25 +224,24 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
inuse_regs <= '0;
|
||||
end else begin
|
||||
if (writeback_fire) begin
|
||||
inuse_regs[writeback_if[iw].data.rd] <= 0;
|
||||
inuse_regs[writeback_if.data.rd] <= 0;
|
||||
end
|
||||
if (staging_fire && staging_if[i].data.wb) begin
|
||||
inuse_regs[staging_if[i].data.rd] <= 1;
|
||||
if (staging_fire && staging_if[w].data.wb) begin
|
||||
inuse_regs[staging_if[w].data.rd] <= 1;
|
||||
end
|
||||
end
|
||||
operands_busy_r <= operands_busy_n;
|
||||
operands_busy <= operands_busy_n;
|
||||
operands_ready[w] <= ~(| operands_busy_n);
|
||||
`ifdef PERF_ENABLE
|
||||
if (staging_fire && staging_if[i].data.wb) begin
|
||||
inuse_units[staging_if[i].data.rd] <= staging_if[i].data.ex_type;
|
||||
if (staging_if[i].data.ex_type == `EX_SFU) begin
|
||||
inuse_sfu[staging_if[i].data.rd] <= sfu_type;
|
||||
if (staging_fire && staging_if[w].data.wb) begin
|
||||
inuse_units[staging_if[w].data.rd] <= staging_if[w].data.ex_type;
|
||||
if (staging_if[w].data.ex_type == `EX_SFU) begin
|
||||
inuse_sfu[staging_if[w].data.rd] <= op_to_sfu_type(staging_if[w].data.op_type);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
||||
assign staging_opds_busy[i] = operands_busy_r;
|
||||
|
||||
`ifdef SIMULATION
|
||||
reg [31:0] timeout_ctr;
|
||||
|
||||
|
@ -263,11 +249,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
if (reset) begin
|
||||
timeout_ctr <= '0;
|
||||
end else begin
|
||||
if (staging_if[i].valid && ~staging_if[i].ready) begin
|
||||
if (staging_if[w].valid && ~staging_if[w].ready) begin
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
`TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
$time, CORE_ID, i, {staging_if[i].data.PC, 1'b0}, staging_if[i].data.tmask, timeout_ctr,
|
||||
operands_busy_r, staging_if[i].data.uuid));
|
||||
`TRACE(3, ("%d: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
$time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
|
||||
operands_busy, staging_if[w].data.uuid));
|
||||
`endif
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
end else if (ibuffer_fire) begin
|
||||
|
@ -277,59 +263,57 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
|
||||
("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
||||
$time, CORE_ID, i, {staging_if[i].data.PC, 1'b0}, staging_if[i].data.tmask, timeout_ctr,
|
||||
operands_busy_r, staging_if[i].data.uuid));
|
||||
("%t: *** %s timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
||||
$time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
|
||||
operands_busy, staging_if[w].data.uuid));
|
||||
|
||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[iw].data.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
$time, CORE_ID, i, {writeback_if[iw].data.PC, 1'b0}, writeback_if[iw].data.tmask, writeback_if[iw].data.rd, writeback_if[iw].data.uuid));
|
||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if.data.rd] != 0,
|
||||
("%t: *** %s invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
$time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid));
|
||||
`endif
|
||||
|
||||
end
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
wire [PER_ISSUE_WARPS-1:0] arb_valid_in;
|
||||
wire [PER_ISSUE_WARPS-1:0][DATAW-1:0] arb_data_in;
|
||||
wire [PER_ISSUE_WARPS-1:0] arb_ready_in;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
wire [ISSUE_RATIO-1:0] valid_in;
|
||||
wire [ISSUE_RATIO-1:0][DATAW-1:0] data_in;
|
||||
wire [ISSUE_RATIO-1:0] ready_in;
|
||||
|
||||
for (genvar j = 0; j < ISSUE_RATIO; ++j) begin
|
||||
wire operands_ready = ~(| staging_opds_busy[j * `ISSUE_WIDTH + i]);
|
||||
assign valid_in[j] = staging_if[j * `ISSUE_WIDTH + i].valid && operands_ready;
|
||||
assign data_in[j] = staging_if[j * `ISSUE_WIDTH + i].data;
|
||||
assign staging_if[j * `ISSUE_WIDTH + i].ready = ready_in[j] && operands_ready;
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (ISSUE_RATIO),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (2)
|
||||
) out_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.data_in (data_in),
|
||||
.data_out ({
|
||||
scoreboard_if[i].data.uuid,
|
||||
scoreboard_if[i].data.tmask,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.op_type,
|
||||
scoreboard_if[i].data.op_args,
|
||||
scoreboard_if[i].data.wb,
|
||||
scoreboard_if[i].data.rd,
|
||||
scoreboard_if[i].data.rs1,
|
||||
scoreboard_if[i].data.rs2,
|
||||
scoreboard_if[i].data.rs3
|
||||
}),
|
||||
.valid_out (scoreboard_if[i].valid),
|
||||
.ready_out (scoreboard_if[i].ready),
|
||||
.sel_out (scoreboard_if[i].data.wis)
|
||||
);
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
|
||||
assign arb_valid_in[w] = staging_if[w].valid && operands_ready[w];
|
||||
assign arb_data_in[w] = staging_if[w].data;
|
||||
assign staging_if[w].ready = arb_ready_in[w] && operands_ready[w];
|
||||
end
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (PER_ISSUE_WARPS),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER ("F"),
|
||||
.LUTRAM (1),
|
||||
.OUT_BUF (4) // using 2-cycle EB for area reduction
|
||||
) out_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.valid_in (arb_valid_in),
|
||||
.ready_in (arb_ready_in),
|
||||
.data_in (arb_data_in),
|
||||
.data_out ({
|
||||
scoreboard_if.data.uuid,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.rd,
|
||||
scoreboard_if.data.rs1,
|
||||
scoreboard_if.data.rs2,
|
||||
scoreboard_if.data.rs3
|
||||
}),
|
||||
.valid_out (scoreboard_if.valid),
|
||||
.ready_out (scoreboard_if.ready),
|
||||
.sel_out (scoreboard_if.data.wis)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_sfu_unit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -39,7 +40,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_warp_ctl_if.master warp_ctl_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_SFU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
|
@ -83,7 +84,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (wctl_reset, reset);
|
||||
|
||||
VX_wctl_unit #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_unit (
|
||||
.clk (clk),
|
||||
|
@ -111,6 +112,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (csr_reset, reset);
|
||||
|
||||
VX_csr_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_unit (
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_split_join import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -31,7 +31,7 @@ module VX_split_join import VX_gpu_pkg::*; #(
|
|||
input wire [`NW_WIDTH-1:0] stack_wid,
|
||||
output wire [`DV_STACK_SIZEW-1:0] stack_ptr
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_data [`NUM_WARPS-1:0];
|
||||
wire [`DV_STACK_SIZEW-1:0] ipdom_q_ptr [`NUM_WARPS-1:0];
|
||||
|
|
399
hw/rtl/core/VX_trace_pkg.sv
Normal file
399
hw/rtl/core/VX_trace_pkg.sv
Normal file
|
@ -0,0 +1,399 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VX_TRACE_PKG_VH
|
||||
`define VX_TRACE_PKG_VH
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
package VX_trace_pkg;
|
||||
|
||||
`ifdef SIMULATION
|
||||
|
||||
`ifdef SV_DPI
|
||||
import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/);
|
||||
`endif
|
||||
|
||||
import VX_gpu_pkg::*;
|
||||
|
||||
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
|
||||
case (ex_type)
|
||||
`EX_ALU: `TRACE(level, ("ALU"));
|
||||
`EX_LSU: `TRACE(level, ("LSU"));
|
||||
`EX_FPU: `TRACE(level, ("FPU"));
|
||||
`EX_SFU: `TRACE(level, ("SFU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_ex_op(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
case (op_args.alu.xtype)
|
||||
`ALU_TYPE_ARITH: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDIW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLIW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLIW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAIW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDW"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUBW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDI"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLI"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLI"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAI"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLTI"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XORI"));
|
||||
`INST_ALU_OR: `TRACE(level, ("ORI"));
|
||||
`INST_ALU_AND: `TRACE(level, ("ANDI"));
|
||||
`INST_ALU_LUI: `TRACE(level, ("LUI"));
|
||||
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADD"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUB"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLL"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRL"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRA"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLT"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XOR"));
|
||||
`INST_ALU_OR: `TRACE(level, ("OR"));
|
||||
`INST_ALU_AND: `TRACE(level, ("AND"));
|
||||
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"));
|
||||
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
`ALU_TYPE_BRANCH: begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: `TRACE(level, ("BEQ"));
|
||||
`INST_BR_NE: `TRACE(level, ("BNE"));
|
||||
`INST_BR_LT: `TRACE(level, ("BLT"));
|
||||
`INST_BR_GE: `TRACE(level, ("BGE"));
|
||||
`INST_BR_LTU: `TRACE(level, ("BLTU"));
|
||||
`INST_BR_GEU: `TRACE(level, ("BGEU"));
|
||||
`INST_BR_JAL: `TRACE(level, ("JAL"));
|
||||
`INST_BR_JALR: `TRACE(level, ("JALR"));
|
||||
`INST_BR_ECALL: `TRACE(level, ("ECALL"));
|
||||
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"));
|
||||
`INST_BR_URET: `TRACE(level, ("URET"));
|
||||
`INST_BR_SRET: `TRACE(level, ("SRET"));
|
||||
`INST_BR_MRET: `TRACE(level, ("MRET"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`ALU_TYPE_MULDIV: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MULW"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIVW"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVUW"));
|
||||
`INST_M_REM: `TRACE(level, ("REMW"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMUW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MUL"));
|
||||
`INST_M_MULH: `TRACE(level, ("MULH"));
|
||||
`INST_M_MULHSU:`TRACE(level, ("MULHSU"));
|
||||
`INST_M_MULHU: `TRACE(level, ("MULHU"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIV"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVU"));
|
||||
`INST_M_REM: `TRACE(level, ("REM"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`EX_LSU: begin
|
||||
if (op_args.lsu.is_float) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LW: `TRACE(level, ("FLW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("FLD"));
|
||||
`INST_LSU_SW: `TRACE(level, ("FSW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("FSD"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: `TRACE(level, ("LB"));
|
||||
`INST_LSU_LH: `TRACE(level, ("LH"));
|
||||
`INST_LSU_LW: `TRACE(level, ("LW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("LD"));
|
||||
`INST_LSU_LBU:`TRACE(level, ("LBU"));
|
||||
`INST_LSU_LHU:`TRACE(level, ("LHU"));
|
||||
`INST_LSU_LWU:`TRACE(level, ("LWU"));
|
||||
`INST_LSU_SB: `TRACE(level, ("SB"));
|
||||
`INST_LSU_SH: `TRACE(level, ("SH"));
|
||||
`INST_LSU_SW: `TRACE(level, ("SW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("SD"));
|
||||
`INST_LSU_FENCE:`TRACE(level,("FENCE"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_FPU: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FADD.S"));
|
||||
end
|
||||
`INST_FPU_SUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FSUB.S"));
|
||||
end
|
||||
`INST_FPU_MUL: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMUL.D"));
|
||||
else
|
||||
`TRACE(level, ("FMUL.S"));
|
||||
end
|
||||
`INST_FPU_DIV: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FDIV.D"));
|
||||
else
|
||||
`TRACE(level, ("FDIV.S"));
|
||||
end
|
||||
`INST_FPU_SQRT: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FSQRT.D"));
|
||||
else
|
||||
`TRACE(level, ("FSQRT.S"));
|
||||
end
|
||||
`INST_FPU_MADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FMADD.S"));
|
||||
end
|
||||
`INST_FPU_MSUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FMSUB.S"));
|
||||
end
|
||||
`INST_FPU_NMADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FNMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMADD.S"));
|
||||
end
|
||||
`INST_FPU_NMSUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FNMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMSUB.S"));
|
||||
end
|
||||
`INST_FPU_CMP: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.D"));
|
||||
1: `TRACE(level, ("FLT.D"));
|
||||
2: `TRACE(level, ("FEQ.D"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.S"));
|
||||
1: `TRACE(level, ("FLT.S"));
|
||||
2: `TRACE(level, ("FEQ.S"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FCVT.D.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.D"));
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2I: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.D"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2U: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.D"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_I2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.W"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.W"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_U2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.WU"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.WU"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MISC: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.D"));
|
||||
1: `TRACE(level, ("FSGNJN.D"));
|
||||
2: `TRACE(level, ("FSGNJX.D"));
|
||||
3: `TRACE(level, ("FCLASS.D"));
|
||||
4: `TRACE(level, ("FMV.X.D"));
|
||||
5: `TRACE(level, ("FMV.D.X"));
|
||||
6: `TRACE(level, ("FMIN.D"));
|
||||
7: `TRACE(level, ("FMAX.D"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.S"));
|
||||
1: `TRACE(level, ("FSGNJN.S"));
|
||||
2: `TRACE(level, ("FSGNJX.S"));
|
||||
3: `TRACE(level, ("FCLASS.S"));
|
||||
4: `TRACE(level, ("FMV.X.S"));
|
||||
5: `TRACE(level, ("FMV.S.X"));
|
||||
6: `TRACE(level, ("FMIN.S"));
|
||||
7: `TRACE(level, ("FMAX.S"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`EX_SFU: begin
|
||||
case (`INST_SFU_BITS'(op_type))
|
||||
`INST_SFU_TMC: `TRACE(level, ("TMC"));
|
||||
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"));
|
||||
`INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end
|
||||
`INST_SFU_JOIN: `TRACE(level, ("JOIN"));
|
||||
`INST_SFU_BAR: `TRACE(level, ("BAR"));
|
||||
`INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end
|
||||
`INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
|
||||
`INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
|
||||
`INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_op_args(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
`TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm));
|
||||
end
|
||||
`EX_LSU: begin
|
||||
`TRACE(level, (", offset=0x%0h", op_args.lsu.offset));
|
||||
end
|
||||
`EX_FPU: begin
|
||||
`TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm));
|
||||
end
|
||||
`EX_SFU: begin
|
||||
if (`INST_SFU_IS_CSR(op_type)) begin
|
||||
`TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm));
|
||||
end
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
|
||||
case (addr)
|
||||
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"));
|
||||
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"));
|
||||
`VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0"));
|
||||
`VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1"));
|
||||
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
`endif
|
||||
|
||||
endpackage
|
||||
|
||||
`endif // VX_TRACE_PKG_VH
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_wctl_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -27,7 +27,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam LANE_BITS = `CLOG2(NUM_LANES);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -15,7 +15,7 @@
|
|||
|
||||
`ifdef FPU_DPI
|
||||
|
||||
module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
||||
module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter TAG_WIDTH = 1,
|
||||
parameter OUT_BUF = 0
|
||||
|
@ -29,7 +29,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0] mask_in,
|
||||
|
||||
input wire [TAG_WIDTH-1:0] tag_in,
|
||||
|
||||
|
||||
input wire [`INST_FPU_BITS-1:0] op_type,
|
||||
input wire [`INST_FMT_BITS-1:0] fmt,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
@ -37,7 +37,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
|
||||
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
|
||||
input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
|
||||
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
|
||||
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -55,31 +55,31 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
localparam FPC_BITS = `LOG2UP(NUM_FPC);
|
||||
|
||||
localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH;
|
||||
|
||||
|
||||
wire [NUM_FPC-1:0] per_core_ready_in;
|
||||
wire [NUM_FPC-1:0][NUM_LANES-1:0][`XLEN-1:0] per_core_result;
|
||||
wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out;
|
||||
reg [NUM_FPC-1:0] per_core_ready_out;
|
||||
wire [NUM_FPC-1:0] per_core_valid_out;
|
||||
wire [NUM_FPC-1:0] per_core_has_fflags;
|
||||
fflags_t [NUM_FPC-1:0] per_core_fflags;
|
||||
wire [NUM_FPC-1:0] per_core_valid_out;
|
||||
wire [NUM_FPC-1:0] per_core_has_fflags;
|
||||
fflags_t [NUM_FPC-1:0] per_core_fflags;
|
||||
|
||||
wire div_ready_in, sqrt_ready_in;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_result, sqrt_result;
|
||||
wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out;
|
||||
wire div_ready_out, sqrt_ready_out;
|
||||
wire div_valid_out, sqrt_valid_out;
|
||||
wire div_has_fflags, sqrt_has_fflags;
|
||||
wire div_valid_out, sqrt_valid_out;
|
||||
wire div_has_fflags, sqrt_has_fflags;
|
||||
fflags_t div_fflags, sqrt_fflags;
|
||||
|
||||
reg [FPC_BITS-1:0] core_select;
|
||||
|
||||
reg is_fadd, is_fsub, is_fmul, is_fmadd, is_fmsub, is_fnmadd, is_fnmsub;
|
||||
reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f;
|
||||
reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f;
|
||||
reg dst_fmt, int_fmt;
|
||||
|
||||
reg [NUM_LANES-1:0][63:0] operands [3];
|
||||
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
operands[0][i] = 64'(dataa[i]);
|
||||
|
@ -92,23 +92,23 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
|
||||
always @(*) begin
|
||||
is_fadd = 0;
|
||||
is_fsub = 0;
|
||||
is_fmul = 0;
|
||||
is_fsub = 0;
|
||||
is_fmul = 0;
|
||||
is_fmadd = 0;
|
||||
is_fmsub = 0;
|
||||
is_fnmadd = 0;
|
||||
is_fnmsub = 0;
|
||||
is_div = 0;
|
||||
is_fnmadd = 0;
|
||||
is_fnmsub = 0;
|
||||
is_div = 0;
|
||||
is_fcmp = 0;
|
||||
is_itof = 0;
|
||||
is_utof = 0;
|
||||
is_ftoi = 0;
|
||||
is_ftou = 0;
|
||||
is_f2f = 0;
|
||||
|
||||
|
||||
dst_fmt = 0;
|
||||
int_fmt = 0;
|
||||
|
||||
|
||||
`ifdef FLEN_64
|
||||
dst_fmt = fmt[0];
|
||||
`endif
|
||||
|
@ -132,23 +132,23 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
`INST_FPU_F2U: begin core_select = FPU_CVT; is_ftou = 1; end
|
||||
`INST_FPU_I2F: begin core_select = FPU_CVT; is_itof = 1; end
|
||||
`INST_FPU_U2F: begin core_select = FPU_CVT; is_utof = 1; end
|
||||
`INST_FPU_F2F: begin core_select = FPU_CVT; is_f2f = 1; end
|
||||
`INST_FPU_F2F: begin core_select = FPU_CVT; is_f2f = 1; end
|
||||
default: begin core_select = FPU_NCP; end
|
||||
endcase
|
||||
end
|
||||
|
||||
generate
|
||||
generate
|
||||
begin : fma
|
||||
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fma;
|
||||
wire [NUM_LANES-1:0][63:0] result_fadd;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsub;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmul;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmadd;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmsub;
|
||||
wire [NUM_LANES-1:0][63:0] result_fnmadd;
|
||||
wire [NUM_LANES-1:0][63:0] result_fnmsub;
|
||||
|
||||
reg [NUM_LANES-1:0][63:0] result_fadd;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsub;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmul;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmadd;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmsub;
|
||||
reg [NUM_LANES-1:0][63:0] result_fnmadd;
|
||||
reg [NUM_LANES-1:0][63:0] result_fnmsub;
|
||||
|
||||
fflags_t [NUM_LANES-1:0] fflags_fma;
|
||||
fflags_t [NUM_LANES-1:0] fflags_fadd;
|
||||
fflags_t [NUM_LANES-1:0] fflags_fsub;
|
||||
|
@ -162,7 +162,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
wire fma_ready = per_core_ready_out[FPU_FMA] || ~per_core_valid_out[FPU_FMA];
|
||||
wire fma_fire = fma_valid && fma_ready;
|
||||
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_fadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]);
|
||||
dpi_fsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]);
|
||||
|
@ -175,20 +175,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
result_fma[i] = is_fadd ? result_fadd[i][`XLEN-1:0] :
|
||||
is_fsub ? result_fsub[i][`XLEN-1:0] :
|
||||
is_fmul ? result_fmul[i][`XLEN-1:0] :
|
||||
is_fmadd ? result_fmadd[i][`XLEN-1:0] :
|
||||
is_fmadd ? result_fmadd[i][`XLEN-1:0] :
|
||||
is_fmsub ? result_fmsub[i][`XLEN-1:0] :
|
||||
is_fnmadd ? result_fnmadd[i][`XLEN-1:0] :
|
||||
is_fnmadd ? result_fnmadd[i][`XLEN-1:0] :
|
||||
is_fnmsub ? result_fnmsub[i][`XLEN-1:0] :
|
||||
'0;
|
||||
|
||||
fflags_fma[i] = is_fadd ? fflags_fadd[i] :
|
||||
is_fsub ? fflags_fsub[i] :
|
||||
is_fmul ? fflags_fmul[i] :
|
||||
is_fmadd ? fflags_fmadd[i] :
|
||||
is_fmadd ? fflags_fmadd[i] :
|
||||
is_fmsub ? fflags_fmsub[i] :
|
||||
is_fnmadd ? fflags_fnmadd[i] :
|
||||
is_fnmsub ? fflags_fnmsub[i] :
|
||||
'0;
|
||||
is_fnmadd ? fflags_fnmadd[i] :
|
||||
is_fnmsub ? fflags_fnmsub[i] :
|
||||
'0;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -213,19 +213,19 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
generate
|
||||
begin : fdiv
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fdiv_r;
|
||||
wire [NUM_LANES-1:0][63:0] result_fdiv;
|
||||
reg [NUM_LANES-1:0][63:0] result_fdiv;
|
||||
fflags_t [NUM_LANES-1:0] fflags_fdiv;
|
||||
|
||||
wire fdiv_valid = (valid_in && core_select == FPU_DIVSQRT) && is_div;
|
||||
wire fdiv_ready = div_ready_out || ~div_valid_out;
|
||||
wire fdiv_fire = fdiv_valid && fdiv_ready;
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_fdiv (fdiv_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]);
|
||||
result_fdiv_r[i] = result_fdiv[i][`XLEN-1:0];
|
||||
end
|
||||
|
@ -252,18 +252,18 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
generate
|
||||
begin : fsqrt
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fsqrt_r;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsqrt;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsqrt;
|
||||
fflags_t [NUM_LANES-1:0] fflags_fsqrt;
|
||||
|
||||
wire fsqrt_valid = (valid_in && core_select == FPU_DIVSQRT) && ~is_div;
|
||||
wire fsqrt_ready = sqrt_ready_out || ~sqrt_valid_out;
|
||||
wire fsqrt_ready = sqrt_ready_out || ~sqrt_valid_out;
|
||||
wire fsqrt_fire = fsqrt_valid && fsqrt_ready;
|
||||
|
||||
always @(*) begin
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_fsqrt (fsqrt_fire, int'(dst_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]);
|
||||
result_fsqrt_r[i] = result_fsqrt[i][`XLEN-1:0];
|
||||
|
@ -295,12 +295,12 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
begin : fcvt
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fcvt;
|
||||
wire [NUM_LANES-1:0][63:0] result_itof;
|
||||
wire [NUM_LANES-1:0][63:0] result_utof;
|
||||
wire [NUM_LANES-1:0][63:0] result_ftoi;
|
||||
wire [NUM_LANES-1:0][63:0] result_ftou;
|
||||
wire [NUM_LANES-1:0][63:0] result_f2f;
|
||||
|
||||
reg [NUM_LANES-1:0][63:0] result_itof;
|
||||
reg [NUM_LANES-1:0][63:0] result_utof;
|
||||
reg [NUM_LANES-1:0][63:0] result_ftoi;
|
||||
reg [NUM_LANES-1:0][63:0] result_ftou;
|
||||
reg [NUM_LANES-1:0][63:0] result_f2f;
|
||||
|
||||
fflags_t [NUM_LANES-1:0] fflags_fcvt;
|
||||
fflags_t [NUM_LANES-1:0] fflags_itof;
|
||||
fflags_t [NUM_LANES-1:0] fflags_utof;
|
||||
|
@ -310,20 +310,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
wire fcvt_valid = (valid_in && core_select == FPU_CVT);
|
||||
wire fcvt_ready = per_core_ready_out[FPU_CVT] || ~per_core_valid_out[FPU_CVT];
|
||||
wire fcvt_fire = fcvt_valid && fcvt_ready;
|
||||
|
||||
always @(*) begin
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_itof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]);
|
||||
dpi_utof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]);
|
||||
dpi_ftoi (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]);
|
||||
dpi_ftou (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]);
|
||||
dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]);
|
||||
dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]);
|
||||
|
||||
result_fcvt[i] = is_itof ? result_itof[i][`XLEN-1:0] :
|
||||
is_utof ? result_utof[i][`XLEN-1:0] :
|
||||
is_ftoi ? result_ftoi[i][`XLEN-1:0] :
|
||||
is_ftou ? result_ftou[i][`XLEN-1:0] :
|
||||
is_f2f ? result_f2f[i][`XLEN-1:0] :
|
||||
is_ftou ? result_ftou[i][`XLEN-1:0] :
|
||||
is_f2f ? result_f2f[i][`XLEN-1:0] :
|
||||
'0;
|
||||
|
||||
fflags_fcvt[i] = is_itof ? fflags_itof[i] :
|
||||
|
@ -355,19 +355,19 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
generate
|
||||
begin : fncp
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fncp;
|
||||
wire [NUM_LANES-1:0][63:0] result_fclss;
|
||||
wire [NUM_LANES-1:0][63:0] result_flt;
|
||||
wire [NUM_LANES-1:0][63:0] result_fle;
|
||||
wire [NUM_LANES-1:0][63:0] result_feq;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmin;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmax;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsgnj;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsgnjn;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsgnjx;
|
||||
reg [NUM_LANES-1:0][63:0] result_fclss;
|
||||
reg [NUM_LANES-1:0][63:0] result_flt;
|
||||
reg [NUM_LANES-1:0][63:0] result_fle;
|
||||
reg [NUM_LANES-1:0][63:0] result_feq;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmin;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmax;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsgnj;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsgnjn;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsgnjx;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmvx;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmvf;
|
||||
|
||||
|
@ -381,15 +381,15 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
wire fncp_valid = (valid_in && core_select == FPU_NCP);
|
||||
wire fncp_ready = per_core_ready_out[FPU_NCP] || ~per_core_valid_out[FPU_NCP];
|
||||
wire fncp_fire = fncp_valid && fncp_ready;
|
||||
|
||||
always @(*) begin
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_LANES; ++i) begin
|
||||
dpi_fclss (fncp_fire, int'(dst_fmt), operands[0][i], result_fclss[i]);
|
||||
dpi_fle (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]);
|
||||
dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]);
|
||||
dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]);
|
||||
dpi_feq (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]);
|
||||
dpi_fmin (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]);
|
||||
dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]);
|
||||
dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]);
|
||||
dpi_fsgnj (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnj[i]);
|
||||
dpi_fsgnjn (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]);
|
||||
dpi_fsgnjx (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]);
|
||||
|
@ -431,7 +431,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
.data_in ({fncp_valid, tag_in, has_fflags_fncp, result_fncp, fflags_merged}),
|
||||
.data_out ({per_core_valid_out[FPU_NCP], per_core_tag_out[FPU_NCP], per_core_has_fflags[FPU_NCP], per_core_result[FPU_NCP], per_core_fflags[FPU_NCP]})
|
||||
);
|
||||
|
||||
|
||||
assign per_core_ready_in[FPU_NCP] = fncp_ready;
|
||||
|
||||
end
|
||||
|
@ -443,15 +443,15 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_DATAW),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (0)
|
||||
) div_sqrt_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in ({sqrt_valid_out, div_valid_out}),
|
||||
.valid_in ({sqrt_valid_out, div_valid_out}),
|
||||
.ready_in ({sqrt_ready_out, div_ready_out}),
|
||||
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
|
||||
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
|
||||
{div_result, div_has_fflags, div_fflags, div_tag_out}}),
|
||||
.data_out ({per_core_result[FPU_DIVSQRT], per_core_has_fflags[FPU_DIVSQRT], per_core_fflags[FPU_DIVSQRT], per_core_tag_out[FPU_DIVSQRT]}),
|
||||
.valid_out (per_core_valid_out[FPU_DIVSQRT]),
|
||||
|
@ -469,13 +469,13 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_FPC),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_valid_out),
|
||||
.valid_in (per_core_valid_out),
|
||||
.ready_in (per_core_ready_out),
|
||||
.data_in (per_core_data_out),
|
||||
.data_out ({result, has_fflags, fflags, tag_out}),
|
||||
|
|
|
@ -289,14 +289,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
end
|
||||
|
||||
wire [NUM_LANES-1:0][31:0] result_s;
|
||||
|
||||
|
||||
wire [1:0] op_ret_int_out;
|
||||
`UNUSED_VAR (op_ret_int_out)
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_FPC),
|
||||
.DATAW (RSP_DATAW + 2),
|
||||
.ARBITER ("R"),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,7 +21,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
parameter TAG_WIDTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
@ -29,7 +29,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0] mask_in,
|
||||
|
||||
input wire [TAG_WIDTH-1:0] tag_in,
|
||||
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire is_madd,
|
||||
|
@ -39,7 +39,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
input wire [NUM_LANES-1:0][31:0] datac,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -52,11 +52,11 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
`UNUSED_VAR (frm)
|
||||
|
||||
wire [NUM_LANES-1:0][3*32-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][3*32-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
|
@ -66,7 +66,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
always @(*) begin
|
||||
if (is_madd) begin
|
||||
// MADD / MSUB / NMADD / NMSUB
|
||||
a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
|
||||
a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
|
||||
b[i] = datab[i];
|
||||
c[i] = (is_neg ^ is_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i];
|
||||
end else begin
|
||||
|
@ -81,7 +81,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
b[i] = dataa[i];
|
||||
c[i] = is_sub ? {~datab[i][31], datab[i][30:0]} : datab[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -90,15 +90,15 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
assign data_in[i][32 +: 32] = b[i];
|
||||
assign data_in[i][64 +: 32] = c[i];
|
||||
end
|
||||
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FMA),
|
||||
.DATA_IN_WIDTH(3*32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (1)
|
||||
.PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -123,7 +123,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
fflags_t [NUM_LANES-1:0] per_lane_fflags;
|
||||
|
||||
`ifdef QUARTUS
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
acl_fmadd fmadd (
|
||||
.clk (clk),
|
||||
|
@ -136,7 +136,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
);
|
||||
assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x;
|
||||
end
|
||||
|
||||
|
||||
assign has_fflags = 0;
|
||||
assign per_lane_fflags = 'x;
|
||||
|
||||
|
@ -144,7 +144,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
wire [2:0] tuser;
|
||||
|
||||
|
||||
xil_fma fma (
|
||||
.aclk (clk),
|
||||
.aclken (pe_enable),
|
||||
|
@ -172,15 +172,15 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
`UNUSED_VAR (r)
|
||||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
dpi_fmadd (
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][64 +: 32]},
|
||||
frm,
|
||||
r,
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][64 +: 32]},
|
||||
frm,
|
||||
r,
|
||||
f
|
||||
);
|
||||
end
|
||||
|
|
|
@ -105,7 +105,7 @@ module VX_fpu_fpnew
|
|||
`UNUSED_VAR (fmt)
|
||||
|
||||
always @(*) begin
|
||||
fpu_op = 'x;
|
||||
fpu_op = fpnew_pkg::operation_e'('x);
|
||||
fpu_rnd = frm;
|
||||
fpu_op_mod = 0;
|
||||
fpu_has_fflags = 1;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -15,17 +15,14 @@
|
|||
|
||||
interface VX_commit_sched_if ();
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] committed;
|
||||
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] committed_wid;
|
||||
wire [`NUM_WARPS-1:0] committed_warps;
|
||||
|
||||
modport master (
|
||||
output committed,
|
||||
output committed_wid
|
||||
output committed_warps
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input committed,
|
||||
input committed_wid
|
||||
input committed_warps
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -13,11 +13,14 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_decode_if import VX_gpu_pkg::*; ();
|
||||
interface VX_decode_if import VX_gpu_pkg::*; #(
|
||||
parameter NUM_WARPS = `NUM_WARPS,
|
||||
parameter NW_WIDTH = `LOG2UP(NUM_WARPS)
|
||||
);
|
||||
|
||||
typedef struct packed {
|
||||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [`NW_WIDTH-1:0] wid;
|
||||
logic [NW_WIDTH-1:0] wid;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
logic [`PC_BITS-1:0] PC;
|
||||
logic [`EX_BITS-1:0] ex_type;
|
||||
|
@ -34,7 +37,7 @@ interface VX_decode_if import VX_gpu_pkg::*; ();
|
|||
data_t data;
|
||||
logic ready;
|
||||
`ifndef L1_ENABLE
|
||||
wire [`NUM_WARPS-1:0] ibuf_pop;
|
||||
wire [NUM_WARPS-1:0] ibuf_pop;
|
||||
`endif
|
||||
|
||||
modport master (
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,39 +13,29 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_pipeline_perf_if ();
|
||||
wire [`PERF_CTR_BITS-1:0] sched_idles;
|
||||
wire [`PERF_CTR_BITS-1:0] sched_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] scb_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS];
|
||||
wire [`PERF_CTR_BITS-1:0] sfu_uses [`NUM_SFU_UNITS];
|
||||
interface VX_pipeline_perf_if import VX_gpu_pkg::*; ();
|
||||
sched_perf_t sched;
|
||||
issue_perf_t issue;
|
||||
|
||||
wire [`PERF_CTR_BITS-1:0] ifetches;
|
||||
wire [`PERF_CTR_BITS-1:0] loads;
|
||||
wire [`PERF_CTR_BITS-1:0] stores;
|
||||
wire [`PERF_CTR_BITS-1:0] stores;
|
||||
wire [`PERF_CTR_BITS-1:0] ifetch_latency;
|
||||
wire [`PERF_CTR_BITS-1:0] load_latency;
|
||||
|
||||
modport schedule (
|
||||
output sched_idles,
|
||||
output sched_stalls
|
||||
);
|
||||
|
||||
modport issue (
|
||||
output ibf_stalls,
|
||||
output scb_stalls,
|
||||
output units_uses,
|
||||
output sfu_uses
|
||||
modport master (
|
||||
output sched,
|
||||
output issue,
|
||||
output ifetches,
|
||||
output loads,
|
||||
output stores,
|
||||
output ifetch_latency,
|
||||
output load_latency
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input sched_idles,
|
||||
input sched_stalls,
|
||||
input ibf_stalls,
|
||||
input scb_stalls,
|
||||
input units_uses,
|
||||
input sfu_uses,
|
||||
input sched,
|
||||
input issue,
|
||||
input ifetches,
|
||||
input loads,
|
||||
input stores,
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,11 +14,11 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_avs_adapter #(
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter ADDR_WIDTH = 1,
|
||||
module VX_avs_adapter #(
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter ADDR_WIDTH = 1,
|
||||
parameter BURST_WIDTH = 1,
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter TAG_WIDTH = 1,
|
||||
parameter RD_QUEUE_SIZE = 1,
|
||||
parameter REQ_OUT_BUF = 0,
|
||||
|
@ -29,15 +29,15 @@ module VX_avs_adapter #(
|
|||
|
||||
// Memory request
|
||||
input wire mem_req_valid,
|
||||
input wire mem_req_rw,
|
||||
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
|
||||
input wire mem_req_rw,
|
||||
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
|
||||
input wire [ADDR_WIDTH-1:0] mem_req_addr,
|
||||
input wire [DATA_WIDTH-1:0] mem_req_data,
|
||||
input wire [TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
output wire mem_rsp_valid,
|
||||
// Memory response
|
||||
output wire mem_rsp_valid,
|
||||
output wire [DATA_WIDTH-1:0] mem_rsp_data,
|
||||
output wire [TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
input wire mem_rsp_ready,
|
||||
|
@ -60,7 +60,7 @@ module VX_avs_adapter #(
|
|||
localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS;
|
||||
|
||||
// Requests handling //////////////////////////////////////////////////////
|
||||
|
||||
|
||||
wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out;
|
||||
wire [NUM_BANKS-1:0] req_queue_going_full;
|
||||
|
@ -70,38 +70,40 @@ module VX_avs_adapter #(
|
|||
wire [NUM_BANKS-1:0] bank_req_ready;
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
|
||||
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
|
||||
end else begin
|
||||
assign req_bank_sel = '0;
|
||||
end
|
||||
|
||||
assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
VX_pending_size #(
|
||||
VX_pending_size #(
|
||||
.SIZE (RD_QUEUE_SIZE)
|
||||
) pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (req_queue_push[i]),
|
||||
.decr (req_queue_pop[i]),
|
||||
.decr (req_queue_pop[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (req_queue_going_full[i]),
|
||||
.size (req_queue_size[i]),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
`UNUSED_PIN (alm_full),
|
||||
.size (req_queue_size[i])
|
||||
);
|
||||
`UNUSED_VAR (req_queue_size)
|
||||
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (TAG_WIDTH),
|
||||
.DEPTH (RD_QUEUE_SIZE)
|
||||
) rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (req_queue_push[i]),
|
||||
.push (req_queue_push[i]),
|
||||
.pop (req_queue_pop[i]),
|
||||
.data_in (mem_req_tag),
|
||||
.data_out (req_queue_tag_out[i]),
|
||||
|
@ -111,9 +113,9 @@ module VX_avs_adapter #(
|
|||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
wire valid_out;
|
||||
wire rw_out;
|
||||
wire [DATA_SIZE-1:0] byteen_out;
|
||||
|
@ -174,7 +176,7 @@ module VX_avs_adapter #(
|
|||
.reset (reset),
|
||||
.push (avs_readdatavalid[i]),
|
||||
.pop (req_queue_pop[i]),
|
||||
.data_in (avs_readdata[i]),
|
||||
.data_in (avs_readdata[i]),
|
||||
.data_out (rsp_queue_data_out[i]),
|
||||
.empty (rsp_queue_empty[i]),
|
||||
`UNUSED_PIN (full),
|
||||
|
@ -183,7 +185,7 @@ module VX_avs_adapter #(
|
|||
`UNUSED_PIN (size)
|
||||
);
|
||||
end
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign rsp_arb_valid_in[i] = !rsp_queue_empty[i];
|
||||
assign rsp_arb_data_in[i] = {rsp_queue_data_out[i], req_queue_tag_out[i]};
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,22 +16,21 @@
|
|||
`TRACING_OFF
|
||||
module VX_cyclic_arbiter #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LOCK_ENABLE = 0,
|
||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
input wire grant_ready
|
||||
);
|
||||
if (NUM_REQS == 1) begin
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
assign grant_index = '0;
|
||||
assign grant_onehot = requests;
|
||||
assign grant_valid = requests[0];
|
||||
|
@ -45,10 +44,10 @@ module VX_cyclic_arbiter #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
grant_index_r <= '0;
|
||||
end else begin
|
||||
end else begin
|
||||
if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin
|
||||
grant_index_r <= '0;
|
||||
end else if (!LOCK_ENABLE || ~grant_valid || grant_unlock) begin
|
||||
end else if (~grant_valid || grant_ready) begin
|
||||
grant_index_r <= grant_index_r + LOG_NUM_REQS'(1);
|
||||
end
|
||||
end
|
||||
|
@ -60,11 +59,11 @@ module VX_cyclic_arbiter #(
|
|||
grant_onehot_r[grant_index_r] = 1'b1;
|
||||
end
|
||||
|
||||
assign grant_index = grant_index_r;
|
||||
assign grant_index = grant_index_r;
|
||||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = requests[grant_index_r];
|
||||
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,20 +17,21 @@
|
|||
module VX_dp_ram #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter ADDR_MIN = 0,
|
||||
parameter WRENW = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter NO_RWCHECK = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire read,
|
||||
input wire write,
|
||||
input wire [WRENW-1:0] wren,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire [ADDRW-1:0] raddr,
|
||||
output wire [DATAW-1:0] rdata
|
||||
|
@ -48,16 +49,16 @@ module VX_dp_ram #(
|
|||
ram[i] = INIT_VALUE; \
|
||||
end \
|
||||
end
|
||||
|
||||
|
||||
`UNUSED_VAR (read)
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
if (WRENW > 1) begin
|
||||
`ifdef QUARTUS
|
||||
if (LUTRAM != 0) begin
|
||||
if (OUT_REG != 0) begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0];
|
||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -72,7 +73,7 @@ module VX_dp_ram #(
|
|||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0];
|
||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -87,7 +88,7 @@ module VX_dp_ram #(
|
|||
end else begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0];
|
||||
reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -103,7 +104,7 @@ module VX_dp_ram #(
|
|||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0];
|
||||
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -115,7 +116,7 @@ module VX_dp_ram #(
|
|||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0];
|
||||
reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -132,9 +133,9 @@ module VX_dp_ram #(
|
|||
`else
|
||||
// default synthesis
|
||||
if (LUTRAM != 0) begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
if (OUT_REG != 0) begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -161,7 +162,7 @@ module VX_dp_ram #(
|
|||
end
|
||||
end else begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
|
@ -178,7 +179,7 @@ module VX_dp_ram #(
|
|||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -190,7 +191,7 @@ module VX_dp_ram #(
|
|||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -208,9 +209,9 @@ module VX_dp_ram #(
|
|||
end else begin
|
||||
// (WRENW == 1)
|
||||
if (LUTRAM != 0) begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
if (OUT_REG != 0) begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -231,7 +232,7 @@ module VX_dp_ram #(
|
|||
end
|
||||
end else begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
|
@ -245,7 +246,7 @@ module VX_dp_ram #(
|
|||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -254,7 +255,7 @@ module VX_dp_ram #(
|
|||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -265,10 +266,10 @@ module VX_dp_ram #(
|
|||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`else
|
||||
// RAM emulation
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
wire [DATAW-1:0] ram_n;
|
||||
|
@ -276,8 +277,8 @@ module VX_dp_ram #(
|
|||
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
|
||||
end
|
||||
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= ram_n;
|
||||
|
@ -287,7 +288,7 @@ module VX_dp_ram #(
|
|||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
end else begin
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_waddr;
|
||||
reg prev_write;
|
||||
|
@ -298,7 +299,7 @@ module VX_dp_ram #(
|
|||
prev_write <= (| wren);
|
||||
prev_data <= ram[waddr];
|
||||
prev_waddr <= waddr;
|
||||
end
|
||||
end
|
||||
if (LUTRAM || !NO_RWCHECK) begin
|
||||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
|
|
115
hw/rtl/libs/VX_dp_ram_rst.sv
Normal file
115
hw/rtl/libs/VX_dp_ram_rst.sv
Normal file
|
@ -0,0 +1,115 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_dp_ram_rst #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter ADDR_MIN = 0,
|
||||
parameter WRENW = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter NO_RWCHECK = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire read,
|
||||
input wire write,
|
||||
input wire [WRENW-1:0] wren,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire [ADDRW-1:0] raddr,
|
||||
output wire [DATAW-1:0] rdata
|
||||
);
|
||||
localparam WSELW = DATAW / WRENW;
|
||||
`STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter"))
|
||||
|
||||
`define RAM_INITIALIZATION \
|
||||
if (INIT_ENABLE != 0) begin \
|
||||
if (INIT_FILE != "") begin \
|
||||
initial $readmemh(INIT_FILE, ram); \
|
||||
end else begin \
|
||||
initial \
|
||||
for (integer i = 0; i < SIZE; ++i) \
|
||||
ram[i] = INIT_VALUE; \
|
||||
end \
|
||||
end
|
||||
|
||||
`UNUSED_VAR (read)
|
||||
|
||||
// RAM emulation
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
wire [DATAW-1:0] ram_n;
|
||||
for (genvar i = 0; i < WRENW; ++i) begin
|
||||
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
|
||||
end
|
||||
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer i = 0; i < SIZE; ++i) begin
|
||||
ram[i] <= DATAW'(INIT_VALUE);
|
||||
end
|
||||
rdata_r <= '0;
|
||||
end else begin
|
||||
if (write) begin
|
||||
ram[waddr] <= ram_n;
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_waddr;
|
||||
reg prev_write;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer i = 0; i < SIZE; ++i) begin
|
||||
ram[i] <= DATAW'(INIT_VALUE);
|
||||
end
|
||||
prev_write <= 0;
|
||||
prev_data <= '0;
|
||||
prev_waddr <= '0;
|
||||
end else begin
|
||||
if (write) begin
|
||||
ram[waddr] <= ram_n;
|
||||
end
|
||||
prev_write <= (| wren);
|
||||
prev_data <= ram[waddr];
|
||||
prev_waddr <= waddr;
|
||||
end
|
||||
end
|
||||
if (LUTRAM || !NO_RWCHECK) begin
|
||||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
`UNUSED_VAR (prev_waddr)
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -19,14 +19,14 @@ module VX_elastic_buffer #(
|
|||
parameter SIZE = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter LUTRAM = 0
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
output wire ready_in,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
|
||||
|
||||
output wire [DATAW-1:0] data_out,
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -55,7 +55,7 @@ module VX_elastic_buffer #(
|
|||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
end else if (SIZE == 2) begin
|
||||
end else if (SIZE == 2 && LUTRAM == 0) begin
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW),
|
||||
|
@ -71,9 +71,9 @@ module VX_elastic_buffer #(
|
|||
.data_out (data_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
|
||||
end else begin
|
||||
|
||||
|
||||
wire empty, full;
|
||||
|
||||
wire [DATAW-1:0] data_out_t;
|
||||
|
@ -93,7 +93,7 @@ module VX_elastic_buffer #(
|
|||
.push (push),
|
||||
.pop (pop),
|
||||
.data_in(data_in),
|
||||
.data_out(data_out_t),
|
||||
.data_out(data_out_t),
|
||||
.empty (empty),
|
||||
.full (full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
|
@ -105,15 +105,15 @@ module VX_elastic_buffer #(
|
|||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (OUT_REG == 2)
|
||||
.SIZE ((OUT_REG == 2) ? 1 : 0)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (~empty),
|
||||
.data_in (data_out_t),
|
||||
.ready_in (ready_out_t),
|
||||
.ready_in (ready_out_t),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.data_out (data_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,53 +16,52 @@
|
|||
`TRACING_OFF
|
||||
module VX_fair_arbiter #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LOCK_ENABLE = 0,
|
||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
input wire grant_ready
|
||||
);
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (grant_ready)
|
||||
|
||||
assign grant_index = '0;
|
||||
assign grant_onehot = requests;
|
||||
assign grant_valid = requests[0];
|
||||
|
||||
end else begin
|
||||
end else begin
|
||||
|
||||
reg [NUM_REQS-1:0] buffer;
|
||||
reg [NUM_REQS-1:0] grant_mask;
|
||||
|
||||
wire [NUM_REQS-1:0] buffer_qual = buffer & requests;
|
||||
wire [NUM_REQS-1:0] requests_qual = (| buffer) ? buffer_qual : requests;
|
||||
wire [NUM_REQS-1:0] buffer_n = requests_qual & ~grant_onehot;
|
||||
wire [NUM_REQS-1:0] requests_rem = requests & ~grant_mask;
|
||||
wire rem_valid = (| requests_rem);
|
||||
wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_rem : requests;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
buffer <= '0;
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
buffer <= buffer_n;
|
||||
grant_mask <= '0;
|
||||
end else if (grant_ready) begin
|
||||
grant_mask <= rem_valid ? (grant_mask | grant_onehot) : grant_onehot;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
VX_priority_arbiter #(
|
||||
.NUM_REQS (NUM_REQS)
|
||||
) priority_arbiter (
|
||||
.requests (requests_qual),
|
||||
.requests (requests_qual),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot),
|
||||
.grant_valid (grant_valid)
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -22,28 +22,28 @@ module VX_fifo_queue #(
|
|||
parameter OUT_REG = 0,
|
||||
parameter LUTRAM = 1,
|
||||
parameter SIZEW = `CLOG2(DEPTH+1)
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
input wire pop,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [DATAW-1:0] data_out,
|
||||
output wire empty,
|
||||
output wire empty,
|
||||
output wire alm_empty,
|
||||
output wire full,
|
||||
output wire full,
|
||||
output wire alm_full,
|
||||
output wire [SIZEW-1:0] size
|
||||
);
|
||||
|
||||
localparam ADDRW = `CLOG2(DEPTH);
|
||||
);
|
||||
|
||||
localparam ADDRW = `CLOG2(DEPTH);
|
||||
|
||||
`STATIC_ASSERT(ALM_FULL > 0, ("alm_full must be greater than 0!"))
|
||||
`STATIC_ASSERT(ALM_FULL < DEPTH, ("alm_full must be smaller than size!"))
|
||||
`STATIC_ASSERT(ALM_EMPTY > 0, ("alm_empty must be greater than 0!"))
|
||||
`STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!"))
|
||||
`STATIC_ASSERT(`IS_POW2(DEPTH), ("size must be a power of 2!"))
|
||||
|
||||
|
||||
if (DEPTH == 1) begin
|
||||
|
||||
reg [DATAW-1:0] head_r;
|
||||
|
@ -52,7 +52,7 @@ module VX_fifo_queue #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
head_r <= '0;
|
||||
size_r <= '0;
|
||||
size_r <= '0;
|
||||
end else begin
|
||||
`ASSERT(~push || ~full, ("runtime error: writing to a full queue"));
|
||||
`ASSERT(~pop || ~empty, ("runtime error: reading an empty queue"));
|
||||
|
@ -63,11 +63,11 @@ module VX_fifo_queue #(
|
|||
end else if (pop) begin
|
||||
size_r <= '0;
|
||||
end
|
||||
if (push) begin
|
||||
if (push) begin
|
||||
head_r <= data_in;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = head_r;
|
||||
assign empty = (size_r == 0);
|
||||
|
@ -77,7 +77,7 @@ module VX_fifo_queue #(
|
|||
assign size = size_r;
|
||||
|
||||
end else begin
|
||||
|
||||
|
||||
reg empty_r, alm_empty_r;
|
||||
reg full_r, alm_full_r;
|
||||
reg [ADDRW-1:0] used_r;
|
||||
|
@ -86,8 +86,8 @@ module VX_fifo_queue #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
empty_r <= 1;
|
||||
alm_empty_r <= 1;
|
||||
full_r <= 0;
|
||||
alm_empty_r <= 1;
|
||||
full_r <= 0;
|
||||
alm_full_r <= 0;
|
||||
used_r <= '0;
|
||||
end else begin
|
||||
|
@ -106,21 +106,21 @@ module VX_fifo_queue #(
|
|||
end else if (pop) begin
|
||||
full_r <= 0;
|
||||
if (used_r == ADDRW'(ALM_FULL))
|
||||
alm_full_r <= 0;
|
||||
alm_full_r <= 0;
|
||||
if (used_r == ADDRW'(1))
|
||||
empty_r <= 1;
|
||||
if (used_r == ADDRW'(ALM_EMPTY+1))
|
||||
alm_empty_r <= 1;
|
||||
end
|
||||
used_r <= used_n;
|
||||
end
|
||||
end
|
||||
used_r <= used_n;
|
||||
end
|
||||
end
|
||||
|
||||
if (DEPTH == 2) begin
|
||||
if (DEPTH == 2 && LUTRAM == 0) begin
|
||||
|
||||
assign used_n = used_r ^ (push ^ pop);
|
||||
|
||||
if (0 == OUT_REG) begin
|
||||
if (0 == OUT_REG) begin
|
||||
|
||||
reg [1:0][DATAW-1:0] shift_reg;
|
||||
|
||||
|
@ -131,8 +131,8 @@ module VX_fifo_queue #(
|
|||
end
|
||||
end
|
||||
|
||||
assign data_out = shift_reg[!used_r[0]];
|
||||
|
||||
assign data_out = shift_reg[!used_r[0]];
|
||||
|
||||
end else begin
|
||||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
|
@ -152,16 +152,16 @@ module VX_fifo_queue #(
|
|||
assign data_out = data_out_r;
|
||||
|
||||
end
|
||||
|
||||
|
||||
end else begin
|
||||
|
||||
|
||||
assign used_n = $signed(used_r) + ADDRW'($signed(2'(push) - 2'(pop)));
|
||||
|
||||
if (0 == OUT_REG) begin
|
||||
if (0 == OUT_REG) begin
|
||||
|
||||
reg [ADDRW-1:0] rd_ptr_r;
|
||||
reg [ADDRW-1:0] wr_ptr_r;
|
||||
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr_r <= '0;
|
||||
|
@ -169,7 +169,7 @@ module VX_fifo_queue #(
|
|||
end else begin
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
|
||||
rd_ptr_r <= rd_ptr_r + ADDRW'(pop);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
|
@ -179,8 +179,8 @@ module VX_fifo_queue #(
|
|||
) dp_ram (
|
||||
.clk(clk),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.waddr (wr_ptr_r),
|
||||
.wdata (data_in),
|
||||
.raddr (rd_ptr_r),
|
||||
|
@ -196,18 +196,18 @@ module VX_fifo_queue #(
|
|||
reg [ADDRW-1:0] rd_ptr_n_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
if (reset) begin
|
||||
wr_ptr_r <= '0;
|
||||
rd_ptr_r <= '0;
|
||||
rd_ptr_n_r <= 1;
|
||||
end else begin
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
|
||||
if (pop) begin
|
||||
rd_ptr_r <= rd_ptr_n_r;
|
||||
if (DEPTH > 2) begin
|
||||
rd_ptr_r <= rd_ptr_n_r;
|
||||
if (DEPTH > 2) begin
|
||||
rd_ptr_n_r <= rd_ptr_r + ADDRW'(2);
|
||||
end else begin // (DEPTH == 2);
|
||||
rd_ptr_n_r <= ~rd_ptr_n_r;
|
||||
rd_ptr_n_r <= ~rd_ptr_n_r;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -227,13 +227,13 @@ module VX_fifo_queue #(
|
|||
) dp_ram (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.waddr (wr_ptr_r),
|
||||
.wdata (data_in),
|
||||
.raddr (rd_ptr_n_r),
|
||||
.rdata (dout)
|
||||
);
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push && (empty_r || (going_empty && pop))) begin
|
||||
|
@ -246,12 +246,12 @@ module VX_fifo_queue #(
|
|||
assign data_out = dout_r;
|
||||
end
|
||||
end
|
||||
|
||||
assign empty = empty_r;
|
||||
|
||||
assign empty = empty_r;
|
||||
assign alm_empty = alm_empty_r;
|
||||
assign full = full_r;
|
||||
assign alm_full = alm_full_r;
|
||||
assign size = {full_r, used_r};
|
||||
assign size = {full_r, used_r};
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,29 +16,27 @@
|
|||
`TRACING_OFF
|
||||
module VX_generic_arbiter #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LOCK_ENABLE = 0,
|
||||
parameter `STRING TYPE = "P",
|
||||
parameter `STRING TYPE = "P",
|
||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
input wire grant_ready
|
||||
);
|
||||
if (TYPE == "P") begin
|
||||
|
||||
`UNUSED_PARAM (LOCK_ENABLE)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
`UNUSED_VAR (grant_ready)
|
||||
|
||||
VX_priority_arbiter #(
|
||||
.NUM_REQS (NUM_REQS)
|
||||
) priority_arbiter (
|
||||
.requests (requests),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot)
|
||||
|
@ -47,68 +45,64 @@ module VX_generic_arbiter #(
|
|||
end else if (TYPE == "R") begin
|
||||
|
||||
VX_rr_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (LOCK_ENABLE)
|
||||
.NUM_REQS (NUM_REQS)
|
||||
) rr_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (requests),
|
||||
.reset (reset),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot),
|
||||
.grant_unlock (grant_unlock)
|
||||
.grant_ready (grant_ready)
|
||||
);
|
||||
|
||||
end else if (TYPE == "F") begin
|
||||
|
||||
VX_fair_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (LOCK_ENABLE)
|
||||
.NUM_REQS (NUM_REQS)
|
||||
) fair_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (requests),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot),
|
||||
.grant_unlock (grant_unlock)
|
||||
.grant_ready (grant_ready)
|
||||
);
|
||||
|
||||
end else if (TYPE == "M") begin
|
||||
|
||||
VX_matrix_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (LOCK_ENABLE)
|
||||
.NUM_REQS (NUM_REQS)
|
||||
) matrix_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (requests),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot),
|
||||
.grant_unlock (grant_unlock)
|
||||
.grant_ready (grant_ready)
|
||||
);
|
||||
|
||||
end else if (TYPE == "C") begin
|
||||
|
||||
VX_cyclic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (LOCK_ENABLE)
|
||||
.NUM_REQS (NUM_REQS)
|
||||
) cyclic_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (requests),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot),
|
||||
.grant_unlock (grant_unlock)
|
||||
.grant_ready (grant_ready)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`ERROR(("invalid parameter"));
|
||||
|
||||
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,52 +16,51 @@
|
|||
`TRACING_OFF
|
||||
module VX_matrix_arbiter #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LOCK_ENABLE = 0,
|
||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
input wire grant_ready
|
||||
);
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
|
||||
`UNUSED_VAR (grant_ready)
|
||||
|
||||
assign grant_index = '0;
|
||||
assign grant_onehot = requests;
|
||||
assign grant_valid = requests[0];
|
||||
|
||||
end else begin
|
||||
|
||||
reg [NUM_REQS-1:1] state [NUM_REQS-1:0];
|
||||
reg [NUM_REQS-1:1] state [NUM_REQS-1:0];
|
||||
wire [NUM_REQS-1:0] pri [NUM_REQS-1:0];
|
||||
wire [NUM_REQS-1:0] grant_unqual;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar j = 0; j < NUM_REQS; ++j) begin
|
||||
if (j > i) begin
|
||||
assign pri[j][i] = requests[i] && state[i][j];
|
||||
end
|
||||
end
|
||||
else if (j < i) begin
|
||||
assign pri[j][i] = requests[i] && !state[j][i];
|
||||
end
|
||||
end
|
||||
else begin
|
||||
assign pri[j][i] = 0;
|
||||
assign pri[j][i] = 0;
|
||||
end
|
||||
end
|
||||
assign grant_unqual[i] = requests[i] && !(| pri[i]);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar j = i + 1; j < NUM_REQS; ++j) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state[i][j] <= '0;
|
||||
end else begin
|
||||
state[i][j] <= (state[i][j] || grant_unqual[j]) && !grant_unqual[i];
|
||||
|
@ -70,20 +69,15 @@ module VX_matrix_arbiter #(
|
|||
end
|
||||
end
|
||||
|
||||
if (LOCK_ENABLE == 0) begin
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
assign grant_onehot = grant_unqual;
|
||||
end else begin
|
||||
reg [NUM_REQS-1:0] grant_unqual_prev;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
grant_unqual_prev <= '0;
|
||||
end else if (grant_unlock) begin
|
||||
grant_unqual_prev <= grant_unqual;
|
||||
end
|
||||
reg [NUM_REQS-1:0] grant_unqual_prev;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
grant_unqual_prev <= '0;
|
||||
end else if (grant_ready) begin
|
||||
grant_unqual_prev <= grant_unqual;
|
||||
end
|
||||
assign grant_onehot = grant_unlock ? grant_unqual : grant_unqual_prev;
|
||||
end
|
||||
assign grant_onehot = grant_ready ? grant_unqual : grant_unqual_prev;
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (NUM_REQS)
|
||||
|
@ -96,6 +90,6 @@ module VX_matrix_arbiter #(
|
|||
assign grant_valid = (| requests);
|
||||
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -24,13 +24,13 @@ module VX_mem_coalescer #(
|
|||
parameter TAG_WIDTH = 8,
|
||||
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
|
||||
parameter QUEUE_SIZE = 8,
|
||||
|
||||
|
||||
parameter DATA_IN_WIDTH = DATA_IN_SIZE * 8,
|
||||
parameter DATA_OUT_WIDTH= DATA_OUT_SIZE * 8,
|
||||
parameter OUT_REQS = (NUM_REQS * DATA_IN_WIDTH) / DATA_OUT_WIDTH,
|
||||
parameter BATCH_SIZE = DATA_OUT_SIZE / DATA_IN_SIZE,
|
||||
parameter BATCH_SIZE_W = `LOG2UP(BATCH_SIZE),
|
||||
parameter OUT_ADDR_WIDTH= ADDR_WIDTH - BATCH_SIZE_W,
|
||||
parameter DATA_RATIO = DATA_OUT_SIZE / DATA_IN_SIZE,
|
||||
parameter DATA_RATIO_W = `LOG2UP(DATA_RATIO),
|
||||
parameter OUT_REQS = NUM_REQS / DATA_RATIO,
|
||||
parameter OUT_ADDR_WIDTH= ADDR_WIDTH - DATA_RATIO_W,
|
||||
parameter QUEUE_ADDRW = `CLOG2(QUEUE_SIZE),
|
||||
parameter OUT_TAG_WIDTH = UUID_WIDTH + QUEUE_ADDRW
|
||||
) (
|
||||
|
@ -45,7 +45,7 @@ module VX_mem_coalescer #(
|
|||
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr,
|
||||
input wire [NUM_REQS-1:0][ATYPE_WIDTH-1:0] in_req_atype,
|
||||
input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data,
|
||||
input wire [TAG_WIDTH-1:0] in_req_tag,
|
||||
input wire [TAG_WIDTH-1:0] in_req_tag,
|
||||
output wire in_req_ready,
|
||||
|
||||
// Input response
|
||||
|
@ -58,7 +58,7 @@ module VX_mem_coalescer #(
|
|||
// Output request
|
||||
output wire out_req_valid,
|
||||
output wire out_req_rw,
|
||||
output wire [OUT_REQS-1:0] out_req_mask,
|
||||
output wire [OUT_REQS-1:0] out_req_mask,
|
||||
output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen,
|
||||
output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr,
|
||||
output wire [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype,
|
||||
|
@ -78,27 +78,27 @@ module VX_mem_coalescer #(
|
|||
`STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter"))
|
||||
`RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("invalid request mask"));
|
||||
`RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("invalid request mask"));
|
||||
|
||||
localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH;
|
||||
localparam NUM_REQS_W = `LOG2UP(NUM_REQS);
|
||||
|
||||
localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH;
|
||||
localparam NUM_REQS_W = `LOG2UP(NUM_REQS);
|
||||
// tag + mask + offest
|
||||
localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * BATCH_SIZE_W);
|
||||
localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * DATA_RATIO_W);
|
||||
|
||||
localparam STATE_SETUP = 0;
|
||||
localparam STATE_SEND = 1;
|
||||
|
||||
logic state_r, state_n;
|
||||
|
||||
logic out_req_valid_r, out_req_valid_n;
|
||||
logic out_req_rw_r, out_req_rw_n;
|
||||
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
|
||||
logic [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
|
||||
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
|
||||
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
|
||||
logic [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data_r, out_req_data_n;
|
||||
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
|
||||
|
||||
logic in_req_ready_n;
|
||||
reg state_r, state_n;
|
||||
|
||||
reg out_req_valid_r, out_req_valid_n;
|
||||
reg out_req_rw_r, out_req_rw_n;
|
||||
reg [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
|
||||
reg [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
|
||||
reg [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
|
||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
|
||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
|
||||
reg [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
|
||||
|
||||
reg in_req_ready_n;
|
||||
|
||||
wire ibuf_push;
|
||||
wire ibuf_pop;
|
||||
|
@ -108,33 +108,45 @@ module VX_mem_coalescer #(
|
|||
wire ibuf_empty;
|
||||
wire [IBUF_DATA_WIDTH-1:0] ibuf_din;
|
||||
wire [IBUF_DATA_WIDTH-1:0] ibuf_dout;
|
||||
|
||||
|
||||
logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
|
||||
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
|
||||
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n;
|
||||
logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n;
|
||||
logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n;
|
||||
|
||||
wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx;
|
||||
|
||||
wire [NUM_REQS-1:0][OUT_ADDR_WIDTH-1:0] in_addr_base;
|
||||
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] in_addr_offset;
|
||||
wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] in_addr_offset;
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:BATCH_SIZE_W];
|
||||
assign in_addr_offset[i] = in_req_addr[i][BATCH_SIZE_W-1:0];
|
||||
assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:DATA_RATIO_W];
|
||||
assign in_addr_offset[i] = in_req_addr[i][DATA_RATIO_W-1:0];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
wire [BATCH_SIZE-1:0] batch_mask = in_req_mask[BATCH_SIZE * i +: BATCH_SIZE] & ~processed_mask_r[BATCH_SIZE * i +: BATCH_SIZE];
|
||||
wire [BATCH_SIZE_W-1:0] batch_idx;
|
||||
wire [DATA_RATIO-1:0] batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & ~processed_mask_r[i * DATA_RATIO +: DATA_RATIO];
|
||||
wire [DATA_RATIO_W-1:0] batch_idx;
|
||||
VX_priority_encoder #(
|
||||
.N (BATCH_SIZE)
|
||||
.N (DATA_RATIO)
|
||||
) priority_encoder (
|
||||
.data_in (batch_mask),
|
||||
.index (batch_idx),
|
||||
.index (batch_idx),
|
||||
`UNUSED_PIN (onehot),
|
||||
.valid_out (batch_valid_n[i])
|
||||
);
|
||||
assign seed_idx[i] = NUM_REQS_W'(BATCH_SIZE * i) + NUM_REQS_W'(batch_idx);
|
||||
assign seed_idx[i] = NUM_REQS_W'(i * DATA_RATIO) + NUM_REQS_W'(batch_idx);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
assign seed_addr_n[i] = in_addr_base[seed_idx[i]];
|
||||
assign seed_atype_n[i] = in_req_atype[seed_idx[i]];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
for (genvar j = 0; j < DATA_RATIO; ++j) begin
|
||||
assign addr_matches_n[i * DATA_RATIO + j] = (in_addr_base[i * DATA_RATIO + j] == seed_addr_n[i]);
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
@ -144,12 +156,13 @@ module VX_mem_coalescer #(
|
|||
out_req_valid_r <= 0;
|
||||
end else begin
|
||||
state_r <= state_n;
|
||||
out_req_valid_r <= out_req_valid_n;
|
||||
batch_valid_r <= batch_valid_n;
|
||||
seed_addr_r <= seed_addr_n;
|
||||
seed_atype_r <= seed_atype_n;
|
||||
out_req_rw_r <= out_req_rw_n;
|
||||
out_req_mask_r <= out_req_mask_n;
|
||||
seed_atype_r <= seed_atype_n;
|
||||
addr_matches_r <= addr_matches_n;
|
||||
out_req_valid_r <= out_req_valid_n;
|
||||
out_req_mask_r <= out_req_mask_n;
|
||||
out_req_rw_r <= out_req_rw_n;
|
||||
out_req_addr_r <= out_req_addr_n;
|
||||
out_req_atype_r <= out_req_atype_n;
|
||||
out_req_byteen_r <= out_req_byteen_n;
|
||||
|
@ -159,84 +172,77 @@ module VX_mem_coalescer #(
|
|||
end
|
||||
end
|
||||
|
||||
logic [NUM_REQS-1:0] addr_matches;
|
||||
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r;
|
||||
|
||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged;
|
||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] req_data_merged;
|
||||
|
||||
always @(*) begin
|
||||
addr_matches = '0;
|
||||
req_byteen_merged = '0;
|
||||
req_data_merged = 'x;
|
||||
for (integer i = 0; i < OUT_REQS; ++i) begin
|
||||
for (integer j = 0; j < BATCH_SIZE; j++) begin
|
||||
if (in_addr_base[BATCH_SIZE * i + j] == seed_addr_r[i]) begin
|
||||
addr_matches[BATCH_SIZE * i + j] = 1;
|
||||
for (integer j = 0; j < DATA_RATIO; ++j) begin
|
||||
if (current_pmask[i * DATA_RATIO + j]) begin
|
||||
for (integer k = 0; k < DATA_IN_SIZE; ++k) begin
|
||||
if (in_req_byteen[DATA_RATIO * i + j][k]) begin
|
||||
req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1;
|
||||
req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches;
|
||||
wire [OUT_REQS * DATA_RATIO - 1:0] pending_mask;
|
||||
for (genvar i = 0; i < OUT_REQS * DATA_RATIO; ++i) begin
|
||||
assign pending_mask[i] = in_req_mask[i] && ~addr_matches_r[i] && ~processed_mask_r[i];
|
||||
end
|
||||
wire batch_completed = ~(| pending_mask);
|
||||
|
||||
always @(*) begin
|
||||
state_n = state_r;
|
||||
|
||||
out_req_valid_n = out_req_valid_r;
|
||||
seed_addr_n = seed_addr_r;
|
||||
seed_atype_n = seed_atype_r;
|
||||
out_req_rw_n = out_req_rw_r;
|
||||
out_req_mask_n = out_req_mask_r;
|
||||
out_req_mask_n = out_req_mask_r;
|
||||
out_req_rw_n = out_req_rw_r;
|
||||
out_req_addr_n = out_req_addr_r;
|
||||
out_req_atype_n = out_req_atype_r;
|
||||
out_req_byteen_n = out_req_byteen_r;
|
||||
out_req_data_n = out_req_data_r;
|
||||
out_req_tag_n = out_req_tag_r;
|
||||
|
||||
processed_mask_n = processed_mask_r;
|
||||
in_req_ready_n = 0;
|
||||
|
||||
case (state_r)
|
||||
STATE_SETUP: begin
|
||||
// find the next seed address
|
||||
for (integer i = 0; i < OUT_REQS; ++i) begin
|
||||
seed_addr_n[i] = in_addr_base[seed_idx[i]];
|
||||
seed_atype_n[i] = in_req_atype[seed_idx[i]];
|
||||
end
|
||||
STATE_SETUP: begin
|
||||
// wait for pending outgoing request to submit
|
||||
if (out_req_valid && out_req_ready) begin
|
||||
out_req_valid_n = 0;
|
||||
end
|
||||
if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin
|
||||
if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin
|
||||
state_n = STATE_SEND;
|
||||
end
|
||||
end
|
||||
default/*STATE_SEND*/: begin
|
||||
out_req_valid_n = 1;
|
||||
out_req_rw_n = in_req_rw;
|
||||
out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr};
|
||||
in_req_ready_n = 1;
|
||||
out_req_byteen_n = '0;
|
||||
out_req_data_n = 'x;
|
||||
for (integer i = 0; i < OUT_REQS; ++i) begin
|
||||
for (integer j = 0; j < BATCH_SIZE; j++) begin
|
||||
if (in_req_mask[BATCH_SIZE * i + j]) begin
|
||||
if (addr_matches[BATCH_SIZE * i + j]) begin
|
||||
for (integer k = 0; k < DATA_IN_SIZE; ++k) begin
|
||||
if (in_req_byteen[BATCH_SIZE * i + j][k]) begin
|
||||
out_req_byteen_n[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_SIZE + k +: 1] = 1'b1;
|
||||
out_req_data_n[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH + k * 8 +: 8] = in_req_data[BATCH_SIZE * i + j][k * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
if (!processed_mask_r[BATCH_SIZE * i + j]) begin
|
||||
in_req_ready_n = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
out_req_mask_n[i] = batch_valid_r[i];
|
||||
out_req_addr_n[i] = seed_addr_r[i];
|
||||
out_req_atype_n[i]= seed_atype_r[i];
|
||||
end
|
||||
if (in_req_ready_n) begin
|
||||
out_req_mask_n = batch_valid_r;
|
||||
out_req_rw_n = in_req_rw;
|
||||
out_req_addr_n = seed_addr_r;
|
||||
out_req_atype_n = seed_atype_r;
|
||||
out_req_byteen_n= req_byteen_merged;
|
||||
out_req_data_n = req_data_merged;
|
||||
out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr};
|
||||
|
||||
in_req_ready_n = batch_completed;
|
||||
|
||||
if (batch_completed) begin
|
||||
processed_mask_n = '0;
|
||||
end else begin
|
||||
processed_mask_n = processed_mask_r | current_pmask;
|
||||
end
|
||||
|
||||
state_n = STATE_SETUP;
|
||||
end
|
||||
endcase
|
||||
|
@ -246,13 +252,15 @@ module VX_mem_coalescer #(
|
|||
|
||||
wire out_rsp_eop;
|
||||
|
||||
assign ibuf_push = (state_r == STATE_SEND) && ~in_req_rw;
|
||||
wire req_sent = (state_r == STATE_SEND);
|
||||
|
||||
assign ibuf_push = req_sent && ~in_req_rw;
|
||||
assign ibuf_pop = out_rsp_fire && out_rsp_eop;
|
||||
assign ibuf_raddr = out_rsp_tag[QUEUE_ADDRW-1:0];
|
||||
assign ibuf_raddr = out_rsp_tag[QUEUE_ADDRW-1:0];
|
||||
|
||||
wire [TAG_ID_WIDTH-1:0] ibuf_din_tag = in_req_tag[TAG_ID_WIDTH-1:0];
|
||||
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_din_offset = in_addr_offset;
|
||||
wire [NUM_REQS-1:0] ibuf_din_pmask = current_pmask;
|
||||
wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] ibuf_din_offset = in_addr_offset;
|
||||
wire [NUM_REQS-1:0] ibuf_din_pmask = current_pmask;
|
||||
|
||||
assign ibuf_din = {ibuf_din_tag, ibuf_din_pmask, ibuf_din_offset};
|
||||
|
||||
|
@ -286,7 +294,7 @@ module VX_mem_coalescer #(
|
|||
|
||||
// unmerge responses
|
||||
|
||||
reg [QUEUE_SIZE-1:0][OUT_REQS-1:0] rsp_rem_mask;
|
||||
reg [QUEUE_SIZE-1:0][OUT_REQS-1:0] rsp_rem_mask;
|
||||
wire [OUT_REQS-1:0] rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~out_rsp_mask;
|
||||
assign out_rsp_eop = ~(| rsp_rem_mask_n);
|
||||
|
||||
|
@ -299,21 +307,19 @@ module VX_mem_coalescer #(
|
|||
end
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_dout_offset;
|
||||
reg [NUM_REQS-1:0] ibuf_dout_pmask;
|
||||
wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] ibuf_dout_offset;
|
||||
wire [NUM_REQS-1:0] ibuf_dout_pmask;
|
||||
wire [TAG_ID_WIDTH-1:0] ibuf_dout_tag;
|
||||
|
||||
assign {ibuf_dout_tag, ibuf_dout_pmask, ibuf_dout_offset} = ibuf_dout;
|
||||
|
||||
logic [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n;
|
||||
logic [NUM_REQS-1:0] in_rsp_mask_n;
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < OUT_REQS; ++i) begin
|
||||
for (integer j = 0; j < BATCH_SIZE; j++) begin
|
||||
in_rsp_mask_n[BATCH_SIZE * i + j] = out_rsp_mask[i] && ibuf_dout_pmask[BATCH_SIZE * i + j];
|
||||
in_rsp_data_n[BATCH_SIZE * i + j] = out_rsp_data[i][ibuf_dout_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH];
|
||||
end
|
||||
wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n;
|
||||
wire [NUM_REQS-1:0] in_rsp_mask_n;
|
||||
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
for (genvar j = 0; j < DATA_RATIO; ++j) begin
|
||||
assign in_rsp_mask_n[i * DATA_RATIO + j] = out_rsp_mask[i] && ibuf_dout_pmask[i * DATA_RATIO + j];
|
||||
assign in_rsp_data_n[i * DATA_RATIO + j] = out_rsp_data[i][ibuf_dout_offset[i * DATA_RATIO + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH];
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -335,11 +341,11 @@ module VX_mem_coalescer #(
|
|||
assign out_rsp_uuid = '0;
|
||||
end
|
||||
|
||||
reg [NUM_REQS-1:0][BATCH_SIZE_W-1:0] out_req_offset;
|
||||
reg [NUM_REQS-1:0][DATA_RATIO_W-1:0] out_req_offset;
|
||||
reg [NUM_REQS-1:0] out_req_pmask;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_push) begin
|
||||
if (req_sent) begin
|
||||
out_req_offset <= ibuf_din_offset;
|
||||
out_req_pmask <= ibuf_din_pmask;
|
||||
end
|
||||
|
@ -351,30 +357,30 @@ module VX_mem_coalescer #(
|
|||
if (out_req_fire) begin
|
||||
if (out_req_rw) begin
|
||||
`TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
|
||||
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
|
||||
`TRACE(1, (", byteen="));
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS);
|
||||
`TRACE(1, (", data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS);
|
||||
`TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS);
|
||||
end else begin
|
||||
`TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
|
||||
end
|
||||
`TRACE(1, (", offset="));
|
||||
`TRACE(1, (", offset="));
|
||||
`TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS);
|
||||
`TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid));
|
||||
`TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid));
|
||||
if ($countones(out_req_pmask) > 1) begin
|
||||
`TRACE(1, ("%t: *** %s: coalescing=%b (#%0d)\n", $time, INSTANCE_ID, out_req_pmask, out_req_uuid));
|
||||
end
|
||||
`TRACE(1, ("%t: *** %s: coalesced=%d (#%0d)\n", $time, INSTANCE_ID, $countones(out_req_pmask), out_req_uuid));
|
||||
end
|
||||
end
|
||||
if (out_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s-out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS);
|
||||
`TRACE(1, (", offset="));
|
||||
`TRACE(1, (", offset="));
|
||||
`TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS);
|
||||
`TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid));
|
||||
end
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -23,7 +23,7 @@ module VX_mem_scheduler #(
|
|||
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
|
||||
parameter ATYPE_WIDTH = 1,
|
||||
parameter TAG_WIDTH = 8,
|
||||
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
|
||||
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
|
||||
parameter CORE_QUEUE_SIZE= 8,
|
||||
parameter MEM_QUEUE_SIZE= CORE_QUEUE_SIZE,
|
||||
parameter RSP_PARTIAL = 0,
|
||||
|
@ -54,7 +54,7 @@ module VX_mem_scheduler #(
|
|||
input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire core_req_ready,
|
||||
output wire core_req_empty,
|
||||
output wire core_req_empty,
|
||||
output wire core_req_sent,
|
||||
|
||||
// Core response
|
||||
|
@ -81,7 +81,7 @@ module VX_mem_scheduler #(
|
|||
input wire mem_rsp_valid,
|
||||
input wire [MEM_CHANNELS-1:0] mem_rsp_mask,
|
||||
input wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready
|
||||
);
|
||||
localparam BATCH_SEL_WIDTH = `UP(MEM_BATCH_BITS);
|
||||
|
@ -110,7 +110,7 @@ module VX_mem_scheduler #(
|
|||
|
||||
wire reqq_valid;
|
||||
wire [CORE_REQS-1:0] reqq_mask;
|
||||
wire reqq_rw;
|
||||
wire reqq_rw;
|
||||
wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen;
|
||||
wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr;
|
||||
wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype;
|
||||
|
@ -118,7 +118,7 @@ module VX_mem_scheduler #(
|
|||
wire [REQQ_TAG_WIDTH-1:0] reqq_tag;
|
||||
wire reqq_ready;
|
||||
|
||||
wire reqq_valid_s;
|
||||
wire reqq_valid_s;
|
||||
wire [MERGED_REQS-1:0] reqq_mask_s;
|
||||
wire reqq_rw_s;
|
||||
wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s;
|
||||
|
@ -139,9 +139,9 @@ module VX_mem_scheduler #(
|
|||
wire mem_req_ready_s;
|
||||
|
||||
wire mem_rsp_valid_s;
|
||||
wire [CORE_REQS-1:0] mem_rsp_mask_s;
|
||||
wire [CORE_REQS-1:0][WORD_WIDTH-1:0] mem_rsp_data_s;
|
||||
wire [REQQ_TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||
wire [CORE_CHANNELS-1:0] mem_rsp_mask_s;
|
||||
wire [CORE_CHANNELS-1:0][WORD_WIDTH-1:0] mem_rsp_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||
wire mem_rsp_ready_s;
|
||||
|
||||
wire crsp_valid;
|
||||
|
@ -159,7 +159,7 @@ module VX_mem_scheduler #(
|
|||
wire ibuf_ready = (core_req_rw || ~ibuf_full);
|
||||
wire reqq_valid_in = core_req_valid && ibuf_ready;
|
||||
wire reqq_ready_in;
|
||||
|
||||
|
||||
wire [REQQ_TAG_WIDTH-1:0] reqq_tag_u;
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign reqq_tag_u = {core_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr};
|
||||
|
@ -169,7 +169,7 @@ module VX_mem_scheduler #(
|
|||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + ATYPE_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
|
||||
.SIZE (CORE_QUEUE_SIZE),
|
||||
.SIZE (CORE_QUEUE_SIZE),
|
||||
.OUT_REG (1)
|
||||
) req_queue (
|
||||
.clk (clk),
|
||||
|
@ -188,7 +188,7 @@ module VX_mem_scheduler #(
|
|||
// no pending requests
|
||||
assign core_req_empty = !reqq_valid && ibuf_empty;
|
||||
|
||||
// notify request submisison
|
||||
// notify request submisison
|
||||
assign core_req_sent = reqq_valid && reqq_ready;
|
||||
|
||||
// Index buffer ///////////////////////////////////////////////////////////
|
||||
|
@ -219,15 +219,15 @@ module VX_mem_scheduler #(
|
|||
|
||||
`UNUSED_VAR (ibuf_empty)
|
||||
|
||||
// Handle memory coalescing ///////////////////////////////////////////////
|
||||
// Handle memory coalescing ///////////////////////////////////////////////
|
||||
|
||||
if (COALESCE_ENABLE) begin
|
||||
|
||||
|
||||
`RESET_RELAY (coalescer_reset, reset);
|
||||
|
||||
VX_mem_coalescer #(
|
||||
.INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)),
|
||||
.NUM_REQS (CORE_REQS),
|
||||
.NUM_REQS (CORE_REQS),
|
||||
.DATA_IN_SIZE (WORD_SIZE),
|
||||
.DATA_OUT_SIZE (LINE_SIZE),
|
||||
.ADDR_WIDTH (ADDR_WIDTH),
|
||||
|
@ -238,7 +238,7 @@ module VX_mem_scheduler #(
|
|||
) coalescer (
|
||||
.clk (clk),
|
||||
.reset (coalescer_reset),
|
||||
|
||||
|
||||
// Input request
|
||||
.in_req_valid (reqq_valid),
|
||||
.in_req_mask (reqq_mask),
|
||||
|
@ -280,7 +280,7 @@ module VX_mem_scheduler #(
|
|||
|
||||
assign reqq_valid_s = reqq_valid;
|
||||
assign reqq_mask_s = reqq_mask;
|
||||
assign reqq_rw_s = reqq_rw;
|
||||
assign reqq_rw_s = reqq_rw;
|
||||
assign reqq_byteen_s= reqq_byteen;
|
||||
assign reqq_addr_s = reqq_addr;
|
||||
assign reqq_atype_s = reqq_atype;
|
||||
|
@ -292,18 +292,18 @@ module VX_mem_scheduler #(
|
|||
assign mem_rsp_mask_s = mem_rsp_mask;
|
||||
assign mem_rsp_data_s = mem_rsp_data;
|
||||
assign mem_rsp_tag_s = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_rsp_ready_s;
|
||||
assign mem_rsp_ready = mem_rsp_ready_s;
|
||||
|
||||
end
|
||||
|
||||
// Handle memory requests /////////////////////////////////////////////////
|
||||
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b;
|
||||
|
||||
|
||||
wire [BATCH_SEL_WIDTH-1:0] req_batch_idx;
|
||||
|
||||
for (genvar i = 0; i < MEM_BATCHES; ++i) begin
|
||||
|
@ -331,14 +331,19 @@ module VX_mem_scheduler #(
|
|||
assign mem_req_addr_s = mem_req_addr_b[req_batch_idx];
|
||||
assign mem_req_atype_s = mem_req_atype_b[req_batch_idx];
|
||||
assign mem_req_data_s = mem_req_data_b[req_batch_idx];
|
||||
|
||||
|
||||
if (MEM_BATCHES != 1) begin
|
||||
reg [MEM_BATCH_BITS-1:0] req_batch_idx_r;
|
||||
|
||||
wire is_degenerate_batch = ~(| mem_req_mask_s);
|
||||
wire mem_req_valid_b = reqq_valid_s && ~is_degenerate_batch;
|
||||
wire mem_req_ready_b = mem_req_ready_s || is_degenerate_batch;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
req_batch_idx_r <= '0;
|
||||
end else begin
|
||||
if (reqq_valid_s && mem_req_ready_s) begin
|
||||
if (reqq_valid_s && mem_req_ready_b) begin
|
||||
if (req_sent_all) begin
|
||||
req_batch_idx_r <= '0;
|
||||
end else begin
|
||||
|
@ -352,10 +357,10 @@ module VX_mem_scheduler #(
|
|||
wire [MEM_BATCHES-1:0][MEM_BATCH_BITS-1:0] req_batch_idxs;
|
||||
wire [MEM_BATCH_BITS-1:0] req_batch_idx_last;
|
||||
|
||||
for (genvar i = 0; i < MEM_BATCHES; ++i) begin
|
||||
for (genvar i = 0; i < MEM_BATCHES; ++i) begin
|
||||
assign req_batch_valids[i] = (| mem_req_mask_b[i]);
|
||||
assign req_batch_idxs[i] = MEM_BATCH_BITS'(i);
|
||||
end
|
||||
end
|
||||
|
||||
VX_find_first #(
|
||||
.N (MEM_BATCHES),
|
||||
|
@ -368,21 +373,22 @@ module VX_mem_scheduler #(
|
|||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
assign req_batch_idx = req_batch_idx_r;
|
||||
assign req_sent_all = mem_req_ready_s && (req_batch_idx_r == req_batch_idx_last);
|
||||
assign mem_req_valid_s = mem_req_valid_b;
|
||||
assign req_batch_idx = req_batch_idx_r;
|
||||
assign req_sent_all = mem_req_ready_b && (req_batch_idx_r == req_batch_idx_last);
|
||||
assign mem_req_tag_s = {reqq_tag_s, req_batch_idx};
|
||||
|
||||
end else begin
|
||||
|
||||
assign mem_req_valid_s = reqq_valid_s;
|
||||
assign req_batch_idx = '0;
|
||||
assign req_sent_all = mem_req_ready_s;
|
||||
assign mem_req_tag_s = reqq_tag_s;
|
||||
|
||||
end
|
||||
|
||||
assign mem_req_valid_s = reqq_valid_s;
|
||||
assign reqq_ready_s = req_sent_all;
|
||||
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + ATYPE_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
|
||||
|
@ -415,7 +421,7 @@ module VX_mem_scheduler #(
|
|||
localparam j = r % CORE_CHANNELS;
|
||||
assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j];
|
||||
end
|
||||
|
||||
|
||||
assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask;
|
||||
|
||||
wire rsp_complete = ~(| rsp_rem_mask_n);
|
||||
|
@ -457,19 +463,19 @@ module VX_mem_scheduler #(
|
|||
|
||||
end else begin
|
||||
|
||||
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
|
||||
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n;
|
||||
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];
|
||||
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
|
||||
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n;
|
||||
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];
|
||||
|
||||
always @(*) begin
|
||||
rsp_store_n = rsp_store[ibuf_raddr];
|
||||
rsp_store_n = rsp_store[ibuf_raddr];
|
||||
for (integer i = 0; i < CORE_CHANNELS; ++i) begin
|
||||
if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin
|
||||
rsp_store_n[(rsp_batch_idx * CORE_CHANNELS + i) * WORD_WIDTH +: WORD_WIDTH] = mem_rsp_data_s[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_push) begin
|
||||
rsp_orig_mask[ibuf_waddr] <= core_req_mask;
|
||||
|
@ -490,10 +496,11 @@ module VX_mem_scheduler #(
|
|||
end
|
||||
|
||||
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
|
||||
|
||||
end
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign crsp_tag = {mem_rsp_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout};
|
||||
assign crsp_tag = {mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout};
|
||||
end else begin
|
||||
assign crsp_tag = ibuf_dout;
|
||||
end
|
||||
|
@ -509,11 +516,11 @@ module VX_mem_scheduler #(
|
|||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (crsp_valid),
|
||||
.valid_in (crsp_valid),
|
||||
.ready_in (crsp_ready),
|
||||
.data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}),
|
||||
.data_out ({core_rsp_mask, core_rsp_sop, core_rsp_eop, core_rsp_data, core_rsp_tag}),
|
||||
.valid_out (core_rsp_valid),
|
||||
.valid_out (core_rsp_valid),
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
|
@ -541,14 +548,14 @@ module VX_mem_scheduler #(
|
|||
end
|
||||
end
|
||||
|
||||
if (ibuf_push) begin
|
||||
if (ibuf_push) begin
|
||||
pending_reqs_time[ibuf_waddr] <= {req_dbg_uuid, ibuf_din, $time};
|
||||
end
|
||||
|
||||
for (integer i = 0; i < CORE_QUEUE_SIZE; ++i) begin
|
||||
if (pending_reqs_valid[i]) begin
|
||||
`ASSERT(($time - pending_reqs_time[i][63:0]) < STALL_TIMEOUT,
|
||||
("%t: *** %s response timeout: tag=0x%0h (#%0d)",
|
||||
("%t: *** %s response timeout: tag=0x%0h (#%0d)",
|
||||
$time, INSTANCE_ID, pending_reqs_time[i][64 +: TAG_ID_WIDTH], pending_reqs_time[i][64+TAG_ID_WIDTH +: `UP(UUID_WIDTH)]));
|
||||
end
|
||||
end
|
||||
|
@ -563,8 +570,8 @@ module VX_mem_scheduler #(
|
|||
wire [`UP(UUID_WIDTH)-1:0] rsp_dbg_uuid;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign mem_req_dbg_uuid = mem_req_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign mem_rsp_dbg_uuid = mem_rsp_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign mem_req_dbg_uuid = mem_req_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign mem_rsp_dbg_uuid = mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign rsp_dbg_uuid = core_rsp_tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign mem_req_dbg_uuid = '0;
|
||||
|
@ -572,25 +579,27 @@ module VX_mem_scheduler #(
|
|||
assign rsp_dbg_uuid = '0;
|
||||
end
|
||||
|
||||
wire [CORE_QUEUE_ADDRW-1:0] ibuf_waddr_s = mem_req_tag_s[MEM_BATCH_BITS +: CORE_QUEUE_ADDRW];
|
||||
|
||||
wire mem_req_fire_s = mem_req_valid_s && mem_req_ready_s;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (core_req_fire) begin
|
||||
if (core_req_rw) begin
|
||||
`TRACE(1, ("%d: %s-core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
|
||||
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
|
||||
`TRACE(1, (", byteen="));
|
||||
`TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS);
|
||||
`TRACE(1, (", data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS);
|
||||
`TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS);
|
||||
end else begin
|
||||
`TRACE(1, ("%d: %s-core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
|
||||
end
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid));
|
||||
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
|
||||
end
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid));
|
||||
end
|
||||
if (core_rsp_valid && core_rsp_ready) begin
|
||||
`TRACE(1, ("%d: %s-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop));
|
||||
`TRACE(1, ("%d: %s-core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS);
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid));
|
||||
end
|
||||
|
@ -601,20 +610,20 @@ module VX_mem_scheduler #(
|
|||
`TRACE(1, (", byteen="));
|
||||
`TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS);
|
||||
`TRACE(1, (", data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS);
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS);
|
||||
end else begin
|
||||
`TRACE(1, ("%d: %s-mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s));
|
||||
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS);
|
||||
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS);
|
||||
end
|
||||
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr, req_batch_idx, mem_req_dbg_uuid));
|
||||
end
|
||||
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid));
|
||||
end
|
||||
if (mem_rsp_fire_s) begin
|
||||
`TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s));
|
||||
`TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS);
|
||||
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -19,131 +19,36 @@ module VX_onehot_mux #(
|
|||
parameter N = 1,
|
||||
parameter MODEL = 1
|
||||
) (
|
||||
input wire [N-1:0][DATAW-1:0] data_in,
|
||||
input wire [N-1:0] sel_in,
|
||||
input wire [N-1:0][DATAW-1:0] data_in,
|
||||
input wire [N-1:0] sel_in,
|
||||
output wire [DATAW-1:0] data_out
|
||||
);
|
||||
);
|
||||
if (N == 1) begin
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = data_in;
|
||||
end else if (N == 2) begin
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = sel_in[0] ? data_in[0] : data_in[1];
|
||||
end else if (N == 3) begin
|
||||
end else if (MODEL == 1) begin
|
||||
wire [N-1:0][DATAW-1:0] mask;
|
||||
for (genvar i = 0; i < N; ++i) begin
|
||||
assign mask[i] = {DATAW{sel_in[i]}} & data_in[i];
|
||||
end
|
||||
for (genvar i = 0; i < DATAW; ++i) begin
|
||||
wire [N-1:0] gather;
|
||||
for (genvar j = 0; j < N; ++j) begin
|
||||
assign gather[j] = mask[j][i];
|
||||
end
|
||||
assign data_out[i] = (| gather);
|
||||
end
|
||||
end else if (MODEL == 2) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
3'b001: data_out_r = data_in[0];
|
||||
3'b010: data_out_r = data_in[1];
|
||||
3'b100: data_out_r = data_in[2];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (N == 4) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
4'b0001: data_out_r = data_in[0];
|
||||
4'b0010: data_out_r = data_in[1];
|
||||
4'b0100: data_out_r = data_in[2];
|
||||
4'b1000: data_out_r = data_in[3];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (N == 5) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
5'b00001: data_out_r = data_in[0];
|
||||
5'b00010: data_out_r = data_in[1];
|
||||
5'b00100: data_out_r = data_in[2];
|
||||
5'b01000: data_out_r = data_in[3];
|
||||
5'b10000: data_out_r = data_in[4];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (N == 6) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
6'b000001: data_out_r = data_in[0];
|
||||
6'b000010: data_out_r = data_in[1];
|
||||
6'b000100: data_out_r = data_in[2];
|
||||
6'b001000: data_out_r = data_in[3];
|
||||
6'b010000: data_out_r = data_in[4];
|
||||
6'b100000: data_out_r = data_in[5];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (N == 7) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
7'b0000001: data_out_r = data_in[0];
|
||||
7'b0000010: data_out_r = data_in[1];
|
||||
7'b0000100: data_out_r = data_in[2];
|
||||
7'b0001000: data_out_r = data_in[3];
|
||||
7'b0010000: data_out_r = data_in[4];
|
||||
7'b0100000: data_out_r = data_in[5];
|
||||
7'b1000000: data_out_r = data_in[6];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (N == 8) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
8'b00000001: data_out_r = data_in[0];
|
||||
8'b00000010: data_out_r = data_in[1];
|
||||
8'b00000100: data_out_r = data_in[2];
|
||||
8'b00001000: data_out_r = data_in[3];
|
||||
8'b00010000: data_out_r = data_in[4];
|
||||
8'b00100000: data_out_r = data_in[5];
|
||||
8'b01000000: data_out_r = data_in[6];
|
||||
8'b10000000: data_out_r = data_in[7];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else begin
|
||||
if (MODEL == 1) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
data_out_r = 'x;
|
||||
for (integer i = 0; i < N; ++i) begin
|
||||
if (sel_in[i]) begin
|
||||
data_out_r = data_in[i];
|
||||
end
|
||||
data_out_r = 'x;
|
||||
for (integer i = 0; i < N; ++i) begin
|
||||
if (sel_in[i]) begin
|
||||
data_out_r = data_in[i];
|
||||
end
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (MODEL == 2) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
data_out_r = '0;
|
||||
for (integer i = 0; i < N; ++i) begin
|
||||
data_out_r |= {DATAW{sel_in[i]}} & data_in[i];
|
||||
end
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (MODEL == 3) begin
|
||||
wire [N-1:0][DATAW-1:0] mask;
|
||||
for (genvar i = 0; i < N; ++i) begin
|
||||
assign mask[i] = {DATAW{sel_in[i]}} & data_in[i];
|
||||
end
|
||||
for (genvar i = 0; i < DATAW; ++i) begin
|
||||
wire [N-1:0] gather;
|
||||
for (genvar j = 0; j < N; ++j) begin
|
||||
assign gather[j] = mask[j][i];
|
||||
end
|
||||
assign data_out[i] = (| gather);
|
||||
end
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -15,8 +15,8 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_pe_serializer #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter NUM_PES = 1,
|
||||
parameter NUM_LANES = 1,
|
||||
parameter NUM_PES = 1,
|
||||
parameter LATENCY = 1,
|
||||
parameter DATA_IN_WIDTH = 1,
|
||||
parameter DATA_OUT_WIDTH = 1,
|
||||
|
@ -28,12 +28,12 @@ module VX_pe_serializer #(
|
|||
|
||||
// input
|
||||
input wire valid_in,
|
||||
input wire [NUM_LANES-1:0][DATA_IN_WIDTH-1:0] data_in,
|
||||
input wire [NUM_LANES-1:0][DATA_IN_WIDTH-1:0] data_in,
|
||||
input wire [TAG_WIDTH-1:0] tag_in,
|
||||
output wire ready_in,
|
||||
|
||||
// PE
|
||||
output wire pe_enable,
|
||||
output wire pe_enable,
|
||||
output wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in,
|
||||
input wire [NUM_PES-1:0][DATA_OUT_WIDTH-1:0] pe_data_out,
|
||||
|
||||
|
@ -43,6 +43,7 @@ module VX_pe_serializer #(
|
|||
output wire [TAG_WIDTH-1:0] tag_out,
|
||||
input wire ready_out
|
||||
);
|
||||
wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s;
|
||||
wire valid_out_s;
|
||||
wire [TAG_WIDTH-1:0] tag_out_s;
|
||||
wire enable;
|
||||
|
@ -59,6 +60,17 @@ module VX_pe_serializer #(
|
|||
.data_out ({valid_out_s, tag_out_s})
|
||||
);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (NUM_PES * DATA_IN_WIDTH),
|
||||
.DEPTH (PE_REG)
|
||||
) pe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in (pe_data_in_s),
|
||||
.data_out (pe_data_in)
|
||||
);
|
||||
|
||||
if (NUM_LANES != NUM_PES) begin
|
||||
|
||||
localparam BATCH_SIZE = NUM_LANES / NUM_PES;
|
||||
|
@ -67,6 +79,10 @@ module VX_pe_serializer #(
|
|||
reg [BATCH_SIZEW-1:0] batch_in_idx;
|
||||
reg [BATCH_SIZEW-1:0] batch_out_idx;
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
assign pe_data_in_s[i] = data_in[batch_in_idx * NUM_PES + i];
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
batch_in_idx <= '0;
|
||||
|
@ -81,45 +97,29 @@ module VX_pe_serializer #(
|
|||
end
|
||||
end
|
||||
|
||||
wire batch_in_done = (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-1));
|
||||
wire batch_in_done = (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-1));
|
||||
wire batch_out_done = (batch_out_idx == BATCH_SIZEW'(BATCH_SIZE-1));
|
||||
|
||||
wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s;
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
assign pe_data_in_s[i] = data_in[batch_in_idx * NUM_PES + i];
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (NUM_PES * DATA_IN_WIDTH),
|
||||
.DEPTH (PE_REG)
|
||||
) pe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in (pe_data_in_s),
|
||||
.data_out (pe_data_in)
|
||||
);
|
||||
|
||||
reg valid_out_r;
|
||||
reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r;
|
||||
reg [TAG_WIDTH-1:0] tag_out_r;
|
||||
|
||||
wire valid_out_b = valid_out_s && batch_out_done;
|
||||
wire enable_r = ready_out || ~valid_out;
|
||||
wire valid_out_b = valid_out_s && batch_out_done;
|
||||
wire ready_out_b = ready_out || ~valid_out;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_out_r <= 1'b0;
|
||||
end else if (enable_r) begin
|
||||
end else if (ready_out_b) begin
|
||||
valid_out_r <= valid_out_b;
|
||||
end
|
||||
if (enable_r) begin
|
||||
if (ready_out_b) begin
|
||||
data_out_r[batch_out_idx] <= pe_data_out;
|
||||
tag_out_r <= tag_out_s;
|
||||
end
|
||||
end
|
||||
|
||||
assign enable = (enable_r || ~valid_out_b);
|
||||
|
||||
assign enable = ready_out_b || ~valid_out_b;
|
||||
assign ready_in = enable && batch_in_done;
|
||||
|
||||
assign pe_enable = enable;
|
||||
|
@ -130,16 +130,17 @@ module VX_pe_serializer #(
|
|||
|
||||
end else begin
|
||||
|
||||
assign pe_data_in_s = data_in;
|
||||
|
||||
assign enable = ready_out || ~valid_out;
|
||||
assign ready_in = enable;
|
||||
|
||||
assign pe_enable = enable;
|
||||
assign pe_data_in= data_in;
|
||||
|
||||
assign valid_out = valid_out_s;
|
||||
assign valid_out = valid_out_s;
|
||||
assign data_out = pe_data_out;
|
||||
assign tag_out = tag_out_s;
|
||||
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,44 +13,53 @@
|
|||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
//`TRACING_OFF
|
||||
module VX_pending_size #(
|
||||
parameter SIZE = 1,
|
||||
parameter INCRW = 1,
|
||||
parameter DECRW = 1,
|
||||
parameter SIZEW = `CLOG2(SIZE+1)
|
||||
parameter SIZE = 1,
|
||||
parameter INCRW = 1,
|
||||
parameter DECRW = 1,
|
||||
parameter ALM_FULL = (SIZE - 1),
|
||||
parameter ALM_EMPTY = 1,
|
||||
parameter SIZEW = `CLOG2(SIZE+1)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [INCRW-1:0] incr,
|
||||
input wire [DECRW-1:0] decr,
|
||||
output wire empty,
|
||||
output wire alm_empty,
|
||||
output wire full,
|
||||
output wire alm_full,
|
||||
output wire [SIZEW-1:0] size
|
||||
);
|
||||
`STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter"))
|
||||
`STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter"))
|
||||
`STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter: %d vs %d", INCRW, SIZEW))
|
||||
`STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter: %d vs %d", DECRW, SIZEW))
|
||||
localparam ADDRW = `LOG2UP(SIZE);
|
||||
|
||||
reg empty_r;
|
||||
reg full_r;
|
||||
reg empty_r, alm_empty_r;
|
||||
reg full_r, alm_full_r;
|
||||
|
||||
if (INCRW != 1 || DECRW != 1) begin
|
||||
|
||||
reg [SIZEW-1:0] size_r;
|
||||
wire [SIZEW-1:0] size_n;
|
||||
|
||||
assign size_n = size_r + SIZEW'(incr) - SIZEW'(decr);
|
||||
wire [SIZEW-1:0] size_n = size_r + SIZEW'(incr) - SIZEW'(decr);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
size_r <= '0;
|
||||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
if (reset) begin
|
||||
empty_r <= 1;
|
||||
alm_empty_r <= 1;
|
||||
alm_full_r <= 0;
|
||||
full_r <= 0;
|
||||
size_r <= '0;
|
||||
end else begin
|
||||
size_r <= size_n;
|
||||
empty_r <= (size_n == SIZEW'(0));
|
||||
full_r <= (size_n == SIZEW'(SIZE));
|
||||
`ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow"));
|
||||
`ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow"));
|
||||
size_r <= size_n;
|
||||
empty_r <= (size_n == SIZEW'(0));
|
||||
alm_empty_r <= (size_n == SIZEW'(ALM_EMPTY));
|
||||
full_r <= (size_n == SIZEW'(SIZE));
|
||||
alm_full_r <= (size_n == SIZEW'(ALM_FULL));
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -59,30 +68,47 @@ module VX_pending_size #(
|
|||
end else begin
|
||||
|
||||
reg [ADDRW-1:0] used_r;
|
||||
wire [ADDRW-1:0] used_n;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
used_r <= '0;
|
||||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
end else begin
|
||||
`ASSERT(~(incr && ~decr) || ~full, ("runtime error: incrementing full counter"));
|
||||
`ASSERT(~(decr && ~incr) || ~empty, ("runtime error: decrementing empty counter"));
|
||||
if (reset) begin
|
||||
empty_r <= 1;
|
||||
alm_empty_r <= 1;
|
||||
full_r <= 0;
|
||||
alm_full_r <= 0;
|
||||
used_r <= '0;
|
||||
end else begin
|
||||
`ASSERT(~(incr && ~decr) || ~full, ("runtime error: counter overflow"));
|
||||
`ASSERT(~(decr && ~incr) || ~empty, ("runtime error: counter underflow"));
|
||||
if (incr) begin
|
||||
if (~decr) begin
|
||||
empty_r <= 0;
|
||||
if (used_r == ADDRW'(ALM_EMPTY))
|
||||
alm_empty_r <= 0;
|
||||
if (used_r == ADDRW'(SIZE-1))
|
||||
full_r <= 1;
|
||||
if (used_r == ADDRW'(ALM_FULL-1))
|
||||
alm_full_r <= 1;
|
||||
end
|
||||
end else if (decr) begin
|
||||
full_r <= 0;
|
||||
if (used_r == ADDRW'(1))
|
||||
empty_r <= 1;
|
||||
empty_r <= 1;
|
||||
if (used_r == ADDRW'(ALM_EMPTY+1))
|
||||
alm_empty_r <= 1;
|
||||
full_r <= 0;
|
||||
if (used_r == ADDRW'(ALM_FULL))
|
||||
alm_full_r <= 0;
|
||||
end
|
||||
used_r <= $signed(used_r) + ADDRW'($signed(2'(incr) - 2'(decr)));
|
||||
used_r <= used_n;
|
||||
end
|
||||
end
|
||||
|
||||
if (SIZE == 2) begin
|
||||
assign used_n = used_r ^ (incr ^ decr);
|
||||
end else begin
|
||||
assign used_n = $signed(used_r) + ADDRW'($signed(2'(incr) - 2'(decr)));
|
||||
end
|
||||
|
||||
if (SIZE > 1) begin
|
||||
if (SIZEW > ADDRW) begin
|
||||
assign size = {full_r, used_r};
|
||||
|
@ -95,8 +121,10 @@ module VX_pending_size #(
|
|||
|
||||
end
|
||||
|
||||
assign empty = empty_r;
|
||||
assign full = full_r;
|
||||
|
||||
assign empty = empty_r;
|
||||
assign alm_empty = alm_empty_r;
|
||||
assign alm_full = alm_full_r;
|
||||
assign full = full_r;
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
//`TRACING_ON
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -23,8 +23,8 @@ module VX_priority_arbiter #(
|
|||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid
|
||||
);
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
assign grant_index = '0;
|
||||
assign grant_onehot = requests;
|
||||
assign grant_valid = requests[0];
|
||||
|
@ -41,6 +41,6 @@ module VX_priority_arbiter #(
|
|||
);
|
||||
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,24 +16,23 @@
|
|||
`TRACING_OFF
|
||||
module VX_rr_arbiter #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LOCK_ENABLE = 0,
|
||||
parameter MODEL = 1,
|
||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
input wire grant_ready
|
||||
);
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
|
||||
`UNUSED_VAR (grant_ready)
|
||||
|
||||
assign grant_index = '0;
|
||||
assign grant_onehot = requests;
|
||||
assign grant_valid = requests[0];
|
||||
|
@ -41,7 +40,7 @@ module VX_rr_arbiter #(
|
|||
end else if (NUM_REQS == 2) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [LOG_NUM_REQS-1:0] state;
|
||||
|
||||
always @(*) begin
|
||||
|
@ -52,279 +51,279 @@ module VX_rr_arbiter #(
|
|||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
end else if (grant_ready) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
||||
assign grant_index = grant_index_r;
|
||||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end /*else if (NUM_REQS == 3) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [LOG_NUM_REQS-1:0] state;
|
||||
|
||||
always @(*) begin
|
||||
casez ({state, requests})
|
||||
5'b00_001,
|
||||
5'b01_0?1,
|
||||
5'b01_0?1,
|
||||
5'b10_??1: begin grant_onehot_r = 3'b001; grant_index_r = LOG_NUM_REQS'(0); end
|
||||
5'b00_?1?,
|
||||
5'b01_010,
|
||||
5'b00_?1?,
|
||||
5'b01_010,
|
||||
5'b10_?10: begin grant_onehot_r = 3'b010; grant_index_r = LOG_NUM_REQS'(1); end
|
||||
default: begin grant_onehot_r = 3'b100; grant_index_r = LOG_NUM_REQS'(2); end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
end else if (grant_ready) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
||||
assign grant_index = grant_index_r;
|
||||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end */else if (NUM_REQS == 4) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [LOG_NUM_REQS-1:0] state;
|
||||
|
||||
always @(*) begin
|
||||
casez ({state, requests})
|
||||
6'b00_0001,
|
||||
6'b01_00?1,
|
||||
6'b00_0001,
|
||||
6'b01_00?1,
|
||||
6'b10_0??1,
|
||||
6'b11_???1: begin grant_onehot_r = 4'b0001; grant_index_r = LOG_NUM_REQS'(0); end
|
||||
6'b00_??1?,
|
||||
6'b01_0010,
|
||||
6'b10_0?10,
|
||||
6'b00_??1?,
|
||||
6'b01_0010,
|
||||
6'b10_0?10,
|
||||
6'b11_??10: begin grant_onehot_r = 4'b0010; grant_index_r = LOG_NUM_REQS'(1); end
|
||||
6'b00_?10?,
|
||||
6'b01_?1??,
|
||||
6'b10_0100,
|
||||
6'b00_?10?,
|
||||
6'b01_?1??,
|
||||
6'b10_0100,
|
||||
6'b11_?100: begin grant_onehot_r = 4'b0100; grant_index_r = LOG_NUM_REQS'(2); end
|
||||
default: begin grant_onehot_r = 4'b1000; grant_index_r = LOG_NUM_REQS'(3); end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
end else if (grant_ready) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
||||
assign grant_index = grant_index_r;
|
||||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end /*else if (NUM_REQS == 5) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [LOG_NUM_REQS-1:0] state;
|
||||
|
||||
always @(*) begin
|
||||
casez ({state, requests})
|
||||
8'b000_00001,
|
||||
8'b001_000?1,
|
||||
8'b010_00??1,
|
||||
8'b000_00001,
|
||||
8'b001_000?1,
|
||||
8'b010_00??1,
|
||||
8'b011_0???1,
|
||||
8'b100_????1: begin grant_onehot_r = 5'b00001; grant_index_r = LOG_NUM_REQS'(0); end
|
||||
8'b000_???1?,
|
||||
8'b001_00010,
|
||||
8'b010_00?10,
|
||||
8'b011_0??10,
|
||||
8'b000_???1?,
|
||||
8'b001_00010,
|
||||
8'b010_00?10,
|
||||
8'b011_0??10,
|
||||
8'b100_???10: begin grant_onehot_r = 5'b00010; grant_index_r = LOG_NUM_REQS'(1); end
|
||||
8'b000_??10?,
|
||||
8'b001_??1??,
|
||||
8'b010_00100,
|
||||
8'b000_??10?,
|
||||
8'b001_??1??,
|
||||
8'b010_00100,
|
||||
8'b011_0?100,
|
||||
8'b100_??100: begin grant_onehot_r = 5'b00100; grant_index_r = LOG_NUM_REQS'(2); end
|
||||
8'b000_?100?,
|
||||
8'b001_?10??,
|
||||
8'b000_?100?,
|
||||
8'b001_?10??,
|
||||
8'b010_?1???,
|
||||
8'b011_01000,
|
||||
8'b011_01000,
|
||||
8'b100_?1000: begin grant_onehot_r = 5'b01000; grant_index_r = LOG_NUM_REQS'(3); end
|
||||
default: begin grant_onehot_r = 5'b10000; grant_index_r = LOG_NUM_REQS'(4); end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
end else if (grant_ready) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
||||
assign grant_index = grant_index_r;
|
||||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end else if (NUM_REQS == 6) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [LOG_NUM_REQS-1:0] state;
|
||||
|
||||
always @(*) begin
|
||||
casez ({state, requests})
|
||||
9'b000_000001,
|
||||
9'b001_0000?1,
|
||||
9'b010_000??1,
|
||||
9'b011_00???1,
|
||||
9'b100_0????1,
|
||||
9'b000_000001,
|
||||
9'b001_0000?1,
|
||||
9'b010_000??1,
|
||||
9'b011_00???1,
|
||||
9'b100_0????1,
|
||||
9'b101_?????1: begin grant_onehot_r = 6'b000001; grant_index_r = LOG_NUM_REQS'(0); end
|
||||
9'b000_????1?,
|
||||
9'b001_000010,
|
||||
9'b010_000?10,
|
||||
9'b011_00??10,
|
||||
9'b100_0???10,
|
||||
9'b000_????1?,
|
||||
9'b001_000010,
|
||||
9'b010_000?10,
|
||||
9'b011_00??10,
|
||||
9'b100_0???10,
|
||||
9'b101_????10: begin grant_onehot_r = 6'b000010; grant_index_r = LOG_NUM_REQS'(1); end
|
||||
9'b000_???10?,
|
||||
9'b001_???1??,
|
||||
9'b010_000100,
|
||||
9'b000_???10?,
|
||||
9'b001_???1??,
|
||||
9'b010_000100,
|
||||
9'b011_00?100,
|
||||
9'b100_0??100,
|
||||
9'b100_0??100,
|
||||
9'b101_???100: begin grant_onehot_r = 6'b000100; grant_index_r = LOG_NUM_REQS'(2); end
|
||||
9'b000_??100?,
|
||||
9'b001_??10??,
|
||||
9'b000_??100?,
|
||||
9'b001_??10??,
|
||||
9'b010_??1???,
|
||||
9'b011_001000,
|
||||
9'b100_0?1000,
|
||||
9'b011_001000,
|
||||
9'b100_0?1000,
|
||||
9'b101_??1000: begin grant_onehot_r = 6'b001000; grant_index_r = LOG_NUM_REQS'(3); end
|
||||
9'b000_?1000?,
|
||||
9'b001_?100??,
|
||||
9'b000_?1000?,
|
||||
9'b001_?100??,
|
||||
9'b010_?10???,
|
||||
9'b011_?1????,
|
||||
9'b100_010000,
|
||||
9'b011_?1????,
|
||||
9'b100_010000,
|
||||
9'b101_?10000: begin grant_onehot_r = 6'b010000; grant_index_r = LOG_NUM_REQS'(4); end
|
||||
default: begin grant_onehot_r = 6'b100000; grant_index_r = LOG_NUM_REQS'(5); end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
end else if (grant_ready) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
||||
assign grant_index = grant_index_r;
|
||||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end else if (NUM_REQS == 7) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [LOG_NUM_REQS-1:0] state;
|
||||
|
||||
always @(*) begin
|
||||
casez ({state, requests})
|
||||
10'b000_000001,
|
||||
10'b001_0000?1,
|
||||
10'b010_000??1,
|
||||
10'b011_00???1,
|
||||
10'b100_00???1,
|
||||
10'b101_0????1,
|
||||
10'b000_000001,
|
||||
10'b001_0000?1,
|
||||
10'b010_000??1,
|
||||
10'b011_00???1,
|
||||
10'b100_00???1,
|
||||
10'b101_0????1,
|
||||
10'b110_?????1: begin grant_onehot_r = 7'b0000001; grant_index_r = LOG_NUM_REQS'(0); end
|
||||
10'b000_?????1?,
|
||||
10'b001_0000010,
|
||||
10'b010_0000?10,
|
||||
10'b011_000??10,
|
||||
10'b100_00???10,
|
||||
10'b101_0????10,
|
||||
10'b000_?????1?,
|
||||
10'b001_0000010,
|
||||
10'b010_0000?10,
|
||||
10'b011_000??10,
|
||||
10'b100_00???10,
|
||||
10'b101_0????10,
|
||||
10'b110_?????10: begin grant_onehot_r = 7'b0000010; grant_index_r = LOG_NUM_REQS'(1); end
|
||||
10'b000_????10?,
|
||||
10'b001_????1??,
|
||||
10'b010_0000100,
|
||||
10'b000_????10?,
|
||||
10'b001_????1??,
|
||||
10'b010_0000100,
|
||||
10'b011_000?100,
|
||||
10'b100_00??100,
|
||||
10'b101_0???100,
|
||||
10'b100_00??100,
|
||||
10'b101_0???100,
|
||||
10'b110_????100: begin grant_onehot_r = 7'b0000100; grant_index_r = LOG_NUM_REQS'(2); end
|
||||
10'b000_???100?,
|
||||
10'b001_???10??,
|
||||
10'b000_???100?,
|
||||
10'b001_???10??,
|
||||
10'b010_???1???,
|
||||
10'b011_0001000,
|
||||
10'b100_00?1000,
|
||||
10'b101_0??1000,
|
||||
10'b011_0001000,
|
||||
10'b100_00?1000,
|
||||
10'b101_0??1000,
|
||||
10'b110_???1000: begin grant_onehot_r = 7'b0001000; grant_index_r = LOG_NUM_REQS'(3); end
|
||||
10'b000_??1000?,
|
||||
10'b001_??100??,
|
||||
10'b000_??1000?,
|
||||
10'b001_??100??,
|
||||
10'b010_??10???,
|
||||
10'b011_??1????,
|
||||
10'b100_0010000,
|
||||
10'b101_0?10000,
|
||||
10'b011_??1????,
|
||||
10'b100_0010000,
|
||||
10'b101_0?10000,
|
||||
10'b110_??10000: begin grant_onehot_r = 7'b0010000; grant_index_r = LOG_NUM_REQS'(4); end
|
||||
10'b000_?10000?,
|
||||
10'b001_?1000??,
|
||||
10'b000_?10000?,
|
||||
10'b001_?1000??,
|
||||
10'b010_?100???,
|
||||
10'b011_?10????,
|
||||
10'b100_?1?????,
|
||||
10'b101_0100000,
|
||||
10'b011_?10????,
|
||||
10'b100_?1?????,
|
||||
10'b101_0100000,
|
||||
10'b110_?100000: begin grant_onehot_r = 7'b0100000; grant_index_r = LOG_NUM_REQS'(5); end
|
||||
default: begin grant_onehot_r = 7'b1000000; grant_index_r = LOG_NUM_REQS'(6); end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
end else if (grant_ready) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
||||
assign grant_index = grant_index_r;
|
||||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end */else if (NUM_REQS == 8) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [LOG_NUM_REQS-1:0] state;
|
||||
|
||||
always @(*) begin
|
||||
casez ({state, requests})
|
||||
11'b000_00000001,
|
||||
11'b001_000000?1,
|
||||
11'b010_00000??1,
|
||||
11'b000_00000001,
|
||||
11'b001_000000?1,
|
||||
11'b010_00000??1,
|
||||
11'b011_0000???1,
|
||||
11'b100_000????1,
|
||||
11'b101_00?????1,
|
||||
11'b110_0??????1,
|
||||
11'b100_000????1,
|
||||
11'b101_00?????1,
|
||||
11'b110_0??????1,
|
||||
11'b111_???????1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end
|
||||
11'b000_??????1?,
|
||||
11'b001_00000010,
|
||||
11'b010_00000?10,
|
||||
11'b000_??????1?,
|
||||
11'b001_00000010,
|
||||
11'b010_00000?10,
|
||||
11'b011_0000??10,
|
||||
11'b100_000???10,
|
||||
11'b101_00????10,
|
||||
11'b110_0?????10,
|
||||
11'b100_000???10,
|
||||
11'b101_00????10,
|
||||
11'b110_0?????10,
|
||||
11'b111_??????10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end
|
||||
11'b000_?????10?,
|
||||
11'b001_?????1??,
|
||||
11'b010_00000100,
|
||||
11'b000_?????10?,
|
||||
11'b001_?????1??,
|
||||
11'b010_00000100,
|
||||
11'b011_0000?100,
|
||||
11'b100_000??100,
|
||||
11'b101_00???100,
|
||||
11'b110_0????100,
|
||||
11'b100_000??100,
|
||||
11'b101_00???100,
|
||||
11'b110_0????100,
|
||||
11'b111_?????100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end
|
||||
11'b000_????100?,
|
||||
11'b001_????10??,
|
||||
|
@ -362,20 +361,20 @@ module VX_rr_arbiter #(
|
|||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
end else if (grant_ready) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
||||
assign grant_index = grant_index_r;
|
||||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end else if (MODEL == 1) begin
|
||||
|
||||
|
||||
`IGNORE_UNOPTFLAT_BEGIN
|
||||
wire [NUM_REQS-1:0] mask_higher_pri_regs, unmask_higher_pri_regs;
|
||||
`IGNORE_UNOPTFLAT_END
|
||||
|
@ -385,12 +384,18 @@ module VX_rr_arbiter #(
|
|||
|
||||
wire [NUM_REQS-1:0] req_masked = requests & pointer_reg;
|
||||
|
||||
assign mask_higher_pri_regs[NUM_REQS-1:1] = mask_higher_pri_regs[NUM_REQS-2:0] | req_masked[NUM_REQS-2:0];
|
||||
assign mask_higher_pri_regs[0] = 1'b0;
|
||||
for (genvar i = 1; i < NUM_REQS; ++i) begin
|
||||
assign mask_higher_pri_regs[i] = mask_higher_pri_regs[i-1] | req_masked[i-1];
|
||||
end
|
||||
|
||||
assign grant_masked[NUM_REQS-1:0] = req_masked[NUM_REQS-1:0] & ~mask_higher_pri_regs[NUM_REQS-1:0];
|
||||
|
||||
assign unmask_higher_pri_regs[NUM_REQS-1:1] = unmask_higher_pri_regs[NUM_REQS-2:0] | requests[NUM_REQS-2:0];
|
||||
assign unmask_higher_pri_regs[0] = 1'b0;
|
||||
for (genvar i = 1; i < NUM_REQS; ++i) begin
|
||||
assign unmask_higher_pri_regs[i] = unmask_higher_pri_regs[i-1] | requests[i-1];
|
||||
end
|
||||
|
||||
assign grant_unmasked[NUM_REQS-1:0] = requests[NUM_REQS-1:0] & ~unmask_higher_pri_regs[NUM_REQS-1:0];
|
||||
|
||||
wire no_req_masked = ~(|req_masked);
|
||||
|
@ -399,7 +404,7 @@ module VX_rr_arbiter #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pointer_reg <= {NUM_REQS{1'b1}};
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
end else if (grant_ready) begin
|
||||
if (|req_masked) begin
|
||||
pointer_reg <= mask_higher_pri_regs;
|
||||
end else if (|requests) begin
|
||||
|
@ -410,22 +415,22 @@ module VX_rr_arbiter #(
|
|||
end
|
||||
end
|
||||
|
||||
assign grant_valid = (| requests);
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (NUM_REQS)
|
||||
) onehot_encoder (
|
||||
.data_in (grant_onehot),
|
||||
.data_out (grant_index),
|
||||
.data_out (grant_index),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
|
||||
end else begin
|
||||
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [NUM_REQS-1:0] state;
|
||||
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
reg [NUM_REQS-1:0] state;
|
||||
|
||||
always @(*) begin
|
||||
grant_index_r = 'x;
|
||||
grant_onehot_r = 'x;
|
||||
|
@ -440,18 +445,18 @@ module VX_rr_arbiter #(
|
|||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
end else if (grant_ready) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
||||
assign grant_index = grant_index_r;
|
||||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
assign grant_valid = (| requests);
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,17 +17,18 @@
|
|||
module VX_sp_ram #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter ADDR_MIN = 0,
|
||||
parameter WRENW = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter NO_RWCHECK = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire read,
|
||||
) (
|
||||
input wire clk,
|
||||
input wire read,
|
||||
input wire write,
|
||||
input wire [WRENW-1:0] wren,
|
||||
input wire [ADDRW-1:0] addr,
|
||||
|
@ -37,6 +38,7 @@ module VX_sp_ram #(
|
|||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.ADDR_MIN (ADDR_MIN),
|
||||
.WRENW (WRENW),
|
||||
.OUT_REG (OUT_REG),
|
||||
.NO_RWCHECK (NO_RWCHECK),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -20,7 +20,8 @@ module VX_stream_arb #(
|
|||
parameter DATAW = 1,
|
||||
parameter `STRING ARBITER = "P",
|
||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||
parameter OUT_BUF = 0 ,
|
||||
parameter OUT_BUF = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter NUM_REQS = `CDIV(NUM_INPUTS, NUM_OUTPUTS),
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
|
||||
parameter NUM_REQS_W = `UP(LOG_NUM_REQS)
|
||||
|
@ -42,7 +43,7 @@ module VX_stream_arb #(
|
|||
if (NUM_OUTPUTS > 1) begin
|
||||
|
||||
// (#inputs > #outputs) and (#outputs > 1)
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
|
||||
localparam BATCH_BEGIN = i * NUM_REQS;
|
||||
|
@ -57,7 +58,8 @@ module VX_stream_arb #(
|
|||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
.OUT_BUF (OUT_BUF),
|
||||
.LUTRAM (LUTRAM)
|
||||
) arb_slice (
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
|
@ -81,8 +83,8 @@ module VX_stream_arb #(
|
|||
|
||||
wire [NUM_BATCHES-1:0] valid_tmp;
|
||||
wire [NUM_BATCHES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp;
|
||||
wire [NUM_BATCHES-1:0] ready_tmp;
|
||||
|
||||
wire [NUM_BATCHES-1:0] ready_tmp;
|
||||
|
||||
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
|
||||
|
||||
localparam BATCH_BEGIN = i * MAX_FANOUT;
|
||||
|
@ -97,18 +99,19 @@ module VX_stream_arb #(
|
|||
if (MAX_FANOUT != 1) begin
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (BATCH_SIZE),
|
||||
.NUM_OUTPUTS (1),
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
.OUT_BUF (3), // registered output
|
||||
.LUTRAM (LUTRAM)
|
||||
) fanout_slice_arb (
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
.valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.data_in (data_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.valid_out (valid_tmp[i]),
|
||||
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.valid_out (valid_tmp[i]),
|
||||
.data_out (data_tmp_u),
|
||||
.sel_out (sel_tmp_u),
|
||||
.ready_out (ready_tmp[i])
|
||||
|
@ -123,11 +126,12 @@ module VX_stream_arb #(
|
|||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BATCHES),
|
||||
.NUM_OUTPUTS (1),
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW + LOG_NUM_REQS2),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
.OUT_BUF (OUT_BUF),
|
||||
.LUTRAM (LUTRAM)
|
||||
) fanout_join_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -150,16 +154,15 @@ module VX_stream_arb #(
|
|||
wire valid_in_r;
|
||||
wire [DATAW-1:0] data_in_r;
|
||||
wire ready_in_r;
|
||||
|
||||
|
||||
wire arb_valid;
|
||||
wire [NUM_REQS_W-1:0] arb_index;
|
||||
wire [NUM_REQS-1:0] arb_onehot;
|
||||
wire arb_ready;
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (1),
|
||||
.TYPE (ARBITER)
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.TYPE (ARBITER)
|
||||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -167,21 +170,30 @@ module VX_stream_arb #(
|
|||
.grant_valid (arb_valid),
|
||||
.grant_index (arb_index),
|
||||
.grant_onehot (arb_onehot),
|
||||
.grant_unlock (arb_ready)
|
||||
.grant_ready (arb_ready)
|
||||
);
|
||||
|
||||
assign valid_in_r = arb_valid;
|
||||
assign data_in_r = data_in[arb_index];
|
||||
assign arb_ready = ready_in_r;
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (DATAW),
|
||||
.N (NUM_REQS)
|
||||
) onehot_mux (
|
||||
.data_in (data_in),
|
||||
.sel_in (arb_onehot),
|
||||
.data_out (data_in_r)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign ready_in[i] = ready_in_r & arb_onehot[i];
|
||||
assign ready_in[i] = ready_in_r && arb_onehot[i];
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (LOG_NUM_REQS + DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
|
||||
.LUTRAM (LUTRAM)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -214,7 +226,8 @@ module VX_stream_arb #(
|
|||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
.OUT_BUF (OUT_BUF),
|
||||
.LUTRAM (LUTRAM)
|
||||
) arb_slice (
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
|
@ -248,19 +261,20 @@ module VX_stream_arb #(
|
|||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
.OUT_BUF (3), // registered output
|
||||
.LUTRAM (LUTRAM)
|
||||
) fanout_fork_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.data_in (data_in),
|
||||
.data_in (data_in),
|
||||
.data_out (data_tmp),
|
||||
.valid_out (valid_tmp),
|
||||
.ready_out (ready_tmp),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
|
||||
|
||||
localparam BATCH_BEGIN = i * MAX_FANOUT;
|
||||
|
@ -271,11 +285,12 @@ module VX_stream_arb #(
|
|||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (1),
|
||||
.NUM_OUTPUTS (BATCH_SIZE),
|
||||
.NUM_OUTPUTS (BATCH_SIZE),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
.OUT_BUF (OUT_BUF),
|
||||
.LUTRAM (LUTRAM)
|
||||
) fanout_slice_arb (
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
|
@ -293,25 +308,24 @@ module VX_stream_arb #(
|
|||
|
||||
// (#inputs == 1) and (#outputs <= max_fanout)
|
||||
|
||||
wire [NUM_OUTPUTS-1:0] ready_in_r;
|
||||
|
||||
wire [NUM_OUTPUTS-1:0] ready_in_r;
|
||||
|
||||
wire [NUM_OUTPUTS-1:0] arb_requests;
|
||||
wire arb_valid;
|
||||
wire [NUM_OUTPUTS-1:0] arb_onehot;
|
||||
wire arb_ready;
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_OUTPUTS),
|
||||
.LOCK_ENABLE (1),
|
||||
.TYPE (ARBITER)
|
||||
.NUM_REQS (NUM_OUTPUTS),
|
||||
.TYPE (ARBITER)
|
||||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (arb_requests),
|
||||
.grant_valid (arb_valid),
|
||||
`UNUSED_PIN (grant_index),
|
||||
`UNUSED_PIN (grant_index),
|
||||
.grant_onehot (arb_onehot),
|
||||
.grant_unlock (arb_ready)
|
||||
.grant_ready (arb_ready)
|
||||
);
|
||||
|
||||
assign arb_requests = ready_in_r;
|
||||
|
@ -320,9 +334,10 @@ module VX_stream_arb #(
|
|||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
|
||||
.LUTRAM (LUTRAM)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -337,7 +352,7 @@ module VX_stream_arb #(
|
|||
end
|
||||
|
||||
assign sel_out = 0;
|
||||
|
||||
|
||||
end else begin
|
||||
|
||||
// #Inputs == #Outputs
|
||||
|
@ -349,7 +364,8 @@ module VX_stream_arb #(
|
|||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
|
||||
.LUTRAM (LUTRAM)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (out_buf_reset),
|
||||
|
@ -363,6 +379,6 @@ module VX_stream_arb #(
|
|||
assign sel_out[i] = NUM_REQS_W'(i);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -15,9 +15,9 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_stream_pack #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter TAG_WIDTH = 1,
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter TAG_WIDTH = 1,
|
||||
parameter TAG_SEL_BITS = 0,
|
||||
parameter `STRING ARBITER = "P",
|
||||
parameter OUT_BUF = 0
|
||||
|
@ -38,47 +38,48 @@ module VX_stream_pack #(
|
|||
output wire [TAG_WIDTH-1:0] tag_out,
|
||||
input wire ready_out
|
||||
);
|
||||
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS);
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [LOG_NUM_REQS-1:0] grant_index;
|
||||
wire [NUM_REQS-1:0] grant_onehot;
|
||||
wire grant_valid;
|
||||
wire grant_ready;
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (1),
|
||||
.TYPE (ARBITER)
|
||||
.TYPE (ARBITER)
|
||||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_in),
|
||||
.requests (valid_in),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
`UNUSED_PIN (grant_onehot),
|
||||
.grant_unlock(grant_ready)
|
||||
`UNUSED_PIN (grant_index),
|
||||
.grant_onehot(grant_onehot),
|
||||
.grant_ready (grant_ready)
|
||||
);
|
||||
|
||||
reg [NUM_REQS-1:0] valid_sel;
|
||||
reg [NUM_REQS-1:0] ready_sel;
|
||||
wire ready_unqual;
|
||||
wire [TAG_WIDTH-1:0] tag_sel;
|
||||
|
||||
wire [TAG_WIDTH-1:0] tag_sel = tag_in[grant_index];
|
||||
|
||||
always @(*) begin
|
||||
valid_sel = '0;
|
||||
ready_sel = '0;
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (tag_in[i][TAG_SEL_BITS-1:0] == tag_sel[TAG_SEL_BITS-1:0]) begin
|
||||
valid_sel[i] = valid_in[i];
|
||||
ready_sel[i] = ready_unqual;
|
||||
end
|
||||
end
|
||||
end
|
||||
VX_onehot_mux #(
|
||||
.DATAW (TAG_WIDTH),
|
||||
.N (NUM_REQS)
|
||||
) onehot_mux (
|
||||
.data_in (tag_in),
|
||||
.sel_in (grant_onehot),
|
||||
.data_out (tag_sel)
|
||||
);
|
||||
|
||||
wire [NUM_REQS-1:0] tag_matches;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign tag_matches[i] = (tag_in[i][TAG_SEL_BITS-1:0] == tag_sel[TAG_SEL_BITS-1:0]);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign ready_in[i] = grant_ready & tag_matches[i];
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] mask_sel = valid_in & tag_matches;
|
||||
|
||||
assign grant_ready = ready_unqual;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (NUM_REQS + TAG_WIDTH + (NUM_REQS * DATA_WIDTH)),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
|
@ -86,16 +87,14 @@ module VX_stream_pack #(
|
|||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (grant_valid),
|
||||
.data_in ({valid_sel, tag_sel, data_in}),
|
||||
.ready_in (ready_unqual),
|
||||
.valid_in (grant_valid),
|
||||
.data_in ({mask_sel, tag_sel, data_in}),
|
||||
.ready_in (grant_ready),
|
||||
.valid_out (valid_out),
|
||||
.data_out ({mask_out, tag_out, data_out}),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
);
|
||||
|
||||
assign ready_in = ready_sel;
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -22,6 +22,7 @@ module VX_stream_xbar #(
|
|||
parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS),
|
||||
parameter ARBITER = "P",
|
||||
parameter OUT_BUF = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||
parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1)
|
||||
) (
|
||||
|
@ -36,7 +37,7 @@ module VX_stream_xbar #(
|
|||
output wire [NUM_INPUTS-1:0] ready_in,
|
||||
|
||||
output wire [NUM_OUTPUTS-1:0] valid_out,
|
||||
output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out,
|
||||
output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out,
|
||||
output wire [NUM_OUTPUTS-1:0][IN_WIDTH-1:0] sel_out,
|
||||
input wire [NUM_OUTPUTS-1:0] ready_out
|
||||
);
|
||||
|
@ -66,7 +67,8 @@ module VX_stream_xbar #(
|
|||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
.OUT_BUF (OUT_BUF),
|
||||
.LUTRAM (LUTRAM)
|
||||
) xbar_arb (
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
|
@ -94,7 +96,8 @@ module VX_stream_xbar #(
|
|||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
.OUT_BUF (OUT_BUF),
|
||||
.LUTRAM (LUTRAM)
|
||||
) xbar_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -124,13 +127,14 @@ module VX_stream_xbar #(
|
|||
assign ready_in = ready_out_r[sel_in];
|
||||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
|
||||
|
||||
`RESET_RELAY (out_buf_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
|
||||
.LUTRAM (LUTRAM)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (out_buf_reset),
|
||||
|
@ -152,7 +156,8 @@ module VX_stream_xbar #(
|
|||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
|
||||
.LUTRAM (LUTRAM)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -172,7 +177,7 @@ module VX_stream_xbar #(
|
|||
// compute inputs collision
|
||||
// we have a collision when there exists a valid transfer with multiple input candicates
|
||||
// we count the unique duplicates each cycle.
|
||||
|
||||
|
||||
reg [NUM_INPUTS-1:0] per_cycle_collision, per_cycle_collision_r;
|
||||
wire [`CLOG2(NUM_INPUTS+1)-1:0] collision_count;
|
||||
reg [PERF_CTR_BITS-1:0] collisions_r;
|
||||
|
@ -182,14 +187,14 @@ module VX_stream_xbar #(
|
|||
for (integer i = 0; i < NUM_INPUTS; ++i) begin
|
||||
for (integer j = 1; j < (NUM_INPUTS-i); ++j) begin
|
||||
per_cycle_collision[i] |= valid_in[i]
|
||||
&& valid_in[j+i]
|
||||
&& valid_in[j+i]
|
||||
&& (sel_in[i] == sel_in[j+i])
|
||||
&& (ready_in[i] | ready_in[j+i]);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`BUFFER(per_cycle_collision_r, per_cycle_collision);
|
||||
|
||||
`BUFFER(per_cycle_collision_r, per_cycle_collision);
|
||||
`POP_COUNT(collision_count, per_cycle_collision_r);
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,10 +17,10 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
parameter `STRING INSTANCE_ID = "",
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter SIZE = (1024*16*8),
|
||||
|
||||
parameter SIZE = (1024*16*8),
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
parameter NUM_REQS = 4,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 4,
|
||||
|
||||
|
@ -33,8 +33,11 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// Request tag size
|
||||
parameter TAG_WIDTH = 16
|
||||
) (
|
||||
parameter TAG_WIDTH = 16,
|
||||
|
||||
// Response buffer
|
||||
parameter OUT_BUF = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -59,7 +62,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
localparam REQ_DATAW = 1 + BANK_ADDR_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH;
|
||||
localparam RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
|
||||
|
||||
`STATIC_ASSERT(ADDR_WIDTH == (BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
`STATIC_ASSERT(ADDR_WIDTH == (BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
|
||||
// bank selection
|
||||
|
||||
|
@ -70,7 +73,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end else begin
|
||||
assign req_bank_idx = 0;
|
||||
end
|
||||
end
|
||||
|
||||
// bank addressing
|
||||
|
||||
|
@ -83,18 +86,18 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
// bank requests dispatch
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_req_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_req_rw;
|
||||
wire [NUM_BANKS-1:0] per_bank_req_rw;
|
||||
wire [NUM_BANKS-1:0][BANK_ADDR_WIDTH-1:0] per_bank_req_addr;
|
||||
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_req_byteen;
|
||||
wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_req_data;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_req_tag;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_req_ready;
|
||||
|
||||
|
||||
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_all;
|
||||
|
||||
wire [NUM_REQS-1:0] req_valid_in;
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
|
||||
wire [NUM_REQS-1:0] req_ready_in;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -104,13 +107,13 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign req_valid_in[i] = mem_bus_if[i].req_valid;
|
||||
assign req_data_in[i] = {
|
||||
mem_bus_if[i].req_data.rw,
|
||||
mem_bus_if[i].req_data.rw,
|
||||
req_bank_addr[i],
|
||||
mem_bus_if[i].req_data.byteen,
|
||||
mem_bus_if[i].req_data.data,
|
||||
mem_bus_if[i].req_data.tag};
|
||||
assign mem_bus_if[i].req_ready = req_ready_in[i];
|
||||
end
|
||||
end
|
||||
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_REQS),
|
||||
|
@ -138,10 +141,10 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign {
|
||||
per_bank_req_rw[i],
|
||||
per_bank_req_rw[i],
|
||||
per_bank_req_addr[i],
|
||||
per_bank_req_byteen[i],
|
||||
per_bank_req_data[i],
|
||||
per_bank_req_byteen[i],
|
||||
per_bank_req_data[i],
|
||||
per_bank_req_tag[i]} = per_bank_req_data_all[i];
|
||||
end
|
||||
|
||||
|
@ -149,13 +152,13 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [NUM_BANKS-1:0] per_bank_rsp_valid;
|
||||
wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_rsp_data;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_rsp_idx;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_rsp_idx;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_rsp_ready;
|
||||
|
||||
`RESET_RELAY (bank_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
VX_sp_ram #(
|
||||
.DATAW (WORD_WIDTH),
|
||||
.SIZE (WORDS_PER_BANK),
|
||||
|
@ -165,7 +168,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
.read (1'b1),
|
||||
.write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]),
|
||||
.wren (per_bank_req_byteen[i]),
|
||||
.addr (per_bank_req_addr[i]),
|
||||
.addr (per_bank_req_addr[i]),
|
||||
.wdata (per_bank_req_data[i]),
|
||||
.rdata (per_bank_rsp_data[i])
|
||||
);
|
||||
|
@ -193,7 +196,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
// bank responses gather
|
||||
|
||||
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_all;
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign per_bank_rsp_data_all[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
|
||||
end
|
||||
|
@ -206,7 +209,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
.NUM_INPUTS (NUM_BANKS),
|
||||
.NUM_OUTPUTS (NUM_REQS),
|
||||
.DATAW (RSP_DATAW),
|
||||
.OUT_BUF (2)
|
||||
.OUT_BUF (OUT_BUF)
|
||||
) rsp_xbar (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -302,38 +305,38 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
assign per_bank_rsp_uuid[i] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin
|
||||
if (mem_bus_if[i].req_data.rw) begin
|
||||
`TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i]));
|
||||
end else begin
|
||||
`TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i]));
|
||||
end
|
||||
end
|
||||
if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin
|
||||
`TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i]));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin
|
||||
if (per_bank_req_rw[i]) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
`TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i]));
|
||||
end else begin
|
||||
`TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
`TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i]));
|
||||
end
|
||||
end
|
||||
if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
`TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i]));
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,20 +17,19 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
|
|||
parameter NUM_REQS = 1,
|
||||
parameter DATA_SIZE = 1,
|
||||
parameter TAG_WIDTH = 1,
|
||||
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
|
||||
parameter ADDR_WIDTH = 1,
|
||||
parameter REQ_OUT_BUF = 0,
|
||||
parameter RSP_OUT_BUF = 0,
|
||||
parameter `STRING ARBITER = "R",
|
||||
parameter `STRING ARBITER = "R",
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
||||
input wire [`UP(LOG_NUM_REQS)-1:0] bus_sel,
|
||||
VX_mem_bus_if.slave bus_in_if,
|
||||
VX_mem_bus_if.master bus_out_if [NUM_REQS]
|
||||
);
|
||||
localparam ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE));
|
||||
);
|
||||
localparam DATA_WIDTH = (8 * DATA_SIZE);
|
||||
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
|
||||
localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH;
|
||||
|
@ -40,7 +39,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
|
|||
wire [NUM_REQS-1:0] req_valid_out;
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_out;
|
||||
wire [NUM_REQS-1:0] req_ready_out;
|
||||
|
||||
|
||||
VX_stream_switch #(
|
||||
.NUM_OUTPUTS (NUM_REQS),
|
||||
.DATAW (REQ_DATAW),
|
||||
|
@ -49,7 +48,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel_in (bus_sel),
|
||||
.valid_in (bus_in_if.req_valid),
|
||||
.valid_in (bus_in_if.req_valid),
|
||||
.data_in (bus_in_if.req_data),
|
||||
.ready_in (bus_in_if.req_ready),
|
||||
.valid_out (req_valid_out),
|
||||
|
@ -68,7 +67,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
|
|||
wire [NUM_REQS-1:0] rsp_valid_in;
|
||||
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_in;
|
||||
wire [NUM_REQS-1:0] rsp_ready_in;
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign rsp_valid_in[i] = bus_out_if[i].rsp_valid;
|
||||
assign rsp_data_in[i] = bus_out_if[i].rsp_data;
|
||||
|
@ -77,15 +76,15 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_REQS),
|
||||
.DATAW (RSP_DATAW),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.OUT_BUF (RSP_OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_valid_in),
|
||||
.data_in (rsp_data_in),
|
||||
.ready_in (rsp_ready_in),
|
||||
.valid_in (rsp_valid_in),
|
||||
.data_in (rsp_data_in),
|
||||
.ready_in (rsp_ready_in),
|
||||
.valid_out (bus_in_if.rsp_valid),
|
||||
.data_out (bus_in_if.rsp_data),
|
||||
.ready_out (bus_in_if.rsp_ready),
|
||||
|
|
|
@ -12,7 +12,6 @@ SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/opae
|
|||
RTL_DIR := $(VORTEX_HOME)/hw/rtl
|
||||
DPI_DIR := $(VORTEX_HOME)/hw/dpi
|
||||
AFU_DIR := $(RTL_DIR)/afu/opae
|
||||
THIRD_PARTY_DIR := $(VORTEX_HOME)/third_party
|
||||
SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts
|
||||
|
||||
IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY)
|
||||
|
@ -76,19 +75,19 @@ endif
|
|||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
ifeq ($(TARGET), fpga)
|
||||
CFLAGS += -DNDEBUG -DSCOPE $(DBG_SCOPE_FLAGS)
|
||||
SCOPE_JSON += $(BUILD_DIR)/scope.json
|
||||
ifneq ($(TARGET), fpga)
|
||||
CFLAGS += -DNDEBUG
|
||||
else
|
||||
CFLAGS += $(DBG_TRACE_FLAGS)
|
||||
endif
|
||||
else
|
||||
else
|
||||
CFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
CFLAGS += -DSCOPE
|
||||
CFLAGS += -DSCOPE $(DBG_SCOPE_FLAGS)
|
||||
SCOPE_JSON += $(BUILD_DIR)/scope.json
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
|
@ -128,7 +127,7 @@ ifeq ($(TARGET), asesim)
|
|||
afu_sim_setup -s $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/synth
|
||||
else
|
||||
afu_synth_setup -s $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/synth
|
||||
endif
|
||||
endif
|
||||
|
||||
build: ip-gen setup $(SCOPE_JSON)
|
||||
ifeq ($(TARGET), asesim)
|
||||
|
@ -145,5 +144,5 @@ scope-json: $(BUILD_DIR)/scope.json
|
|||
$(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml
|
||||
$(SCRIPT_DIR)/scope.py $(BUILD_DIR)/vortex.xml -o $(BUILD_DIR)/scope.json
|
||||
|
||||
clean:
|
||||
clean:
|
||||
rm -rf vortex_afu.h $(BUILD_DIR)
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue