Merge branch 'mranduril-vortex_vm_rebased' into vortex_vm

This commit is contained in:
Hanran Wu 2024-08-23 17:45:03 -04:00
commit 35c15f554d
186 changed files with 36003 additions and 4008 deletions

270
.github/workflows/ci.yml vendored Normal file
View file

@ -0,0 +1,270 @@
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: CI
on: [push, pull_request]
jobs:
setup:
runs-on: ubuntu-20.04
steps:
- name: Checkout code
uses: actions/checkout@v2
with:
submodules: recursive
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v2
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
restore-keys: |
${{ runner.os }}-toolchain-
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v2
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
restore-keys: |
${{ runner.os }}-thirdparty-
- name: Install Dependencies
if: steps.cache-toolchain.outputs.cache-hit != 'true' || steps.cache-thirdparty.outputs.cache-hit != 'true'
run: |
sudo bash ./ci/system_updates.sh
- name: Setup Toolchain
if: steps.cache-toolchain.outputs.cache-hit != 'true'
run: |
TOOLDIR=$PWD/tools
mkdir -p build
cd build
../configure --tooldir=$TOOLDIR
ci/toolchain_install.sh --all
- name: Setup Third Party
if: steps.cache-thirdparty.outputs.cache-hit != 'true'
run: |
make -C third_party > /dev/null
# build:
# runs-on: ubuntu-20.04
# needs: setup
# strategy:
# matrix:
# xlen: [32, 64]
# steps:
# - name: Checkout code
# uses: actions/checkout@v2
# - name: Install Dependencies
# run: |
# sudo bash ./ci/system_updates.sh
# - name: Cache Toolchain Directory
# id: cache-toolchain
# uses: actions/cache@v2
# with:
# path: tools
# key: ${{ runner.os }}-toolchain-v0.1
# restore-keys: |
# ${{ runner.os }}-toolchain-
# - name: Cache Third Party Directory
# id: cache-thirdparty
# uses: actions/cache@v2
# with:
# path: third_party
# key: ${{ runner.os }}-thirdparty-v0.1
# restore-keys: |
# ${{ runner.os }}-thirdparty-
# - name: Run Build
# run: |
# TOOLDIR=$PWD/tools
# mkdir -p build${{ matrix.xlen }}
# cd build${{ matrix.xlen }}
# ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }}
# source ci/toolchain_env.sh
# make software -s > /dev/null
# make tests -s > /dev/null
# - name: Upload Build Artifact
# uses: actions/upload-artifact@v2
# with:
# name: build-${{ matrix.xlen }}
# path: build${{ matrix.xlen }}
# tests:
# runs-on: ubuntu-20.04
# needs: build
# strategy:
# matrix:
# name: [regression, opencl, config1, config2, debug, stress]
# xlen: [32, 64]
# steps:
# - name: Checkout code
# uses: actions/checkout@v2
# - name: Install Dependencies
# run: |
# sudo bash ./ci/system_updates.sh
# - name: Cache Toolchain Directory
# id: cache-toolchain
# uses: actions/cache@v2
# with:
# path: tools
# key: ${{ runner.os }}-toolchain-v0.1
# restore-keys: |
# ${{ runner.os }}-toolchain-
# - name: Cache Third Party Directory
# id: cache-thirdparty
# uses: actions/cache@v2
# with:
# path: third_party
# key: ${{ runner.os }}-thirdparty-v0.1
# restore-keys: |
# ${{ runner.os }}-thirdparty-
# - name: Download Build Artifact
# uses: actions/download-artifact@v2
# with:
# name: build-${{ matrix.xlen }}
# path: build${{ matrix.xlen }}
# - name: Run tests
# run: |
# cd build${{ matrix.xlen }}
# source ci/toolchain_env.sh
# chmod -R +x . # Ensure all files have executable permissions
# if [ "${{ matrix.name }}" == "regression" ]; then
# ./ci/regression.sh --unittest
# ./ci/regression.sh --isa
# ./ci/regression.sh --kernel
# ./ci/regression.sh --synthesis
# ./ci/regression.sh --regression
# else
# ./ci/regression.sh --${{ matrix.name }}
# fi
build_vm:
runs-on: ubuntu-20.04
needs: setup
strategy:
matrix:
xlen: [32, 64]
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Install Dependencies
run: |
sudo bash ./ci/system_updates.sh
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v2
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
restore-keys: |
${{ runner.os }}-toolchain-
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v2
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
restore-keys: |
${{ runner.os }}-thirdparty-
- name: Run Build
run: |
TOOLDIR=$PWD/tools
mkdir -p build${{ matrix.xlen }}-vm
cd build${{ matrix.xlen }}-vm
../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} --vm_enable=1
source ci/toolchain_env.sh
make software -s > /dev/null
make tests -s > /dev/null
- name: Upload Build Artifact
uses: actions/upload-artifact@v2
with:
name: build-${{ matrix.xlen }}-vm
path: build${{ matrix.xlen }}-vm
test_vm:
runs-on: ubuntu-20.04
needs: build_vm
strategy:
matrix:
xlen: [32, 64]
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Install Dependencies
run: |
sudo bash ./ci/system_updates.sh
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v2
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
restore-keys: |
${{ runner.os }}-toolchain-
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v2
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
restore-keys: |
${{ runner.os }}-thirdparty-
- name: Download Build Artifact
uses: actions/download-artifact@v2
with:
name: build-${{ matrix.xlen }}-vm
path: build${{ matrix.xlen }}-vm
- name: Run tests
run: |
cd build${{ matrix.xlen }}-vm
source ci/toolchain_env.sh
chmod -R +x . # Ensure all files have executable permissions
./ci/regression.sh --vm
complete:
runs-on: ubuntu-20.04
needs: test_vm
steps:
- name: Check Completion
run: echo "All matrix jobs passed"

1
.gitignore vendored
View file

@ -1,3 +1,4 @@
/build*
/.vscode
*.cache
*.code-workspace

3
.gitmodules vendored
View file

@ -6,5 +6,4 @@
url = https://github.com/ucb-bar/berkeley-softfloat-3.git
[submodule "third_party/ramulator"]
path = third_party/ramulator
url = https://github.com/CMU-SAFARI/ramulator.git
ignore = dirty
url = https://github.com/CMU-SAFARI/ramulator2.git

View file

@ -1,118 +0,0 @@
language: cpp
dist: focal
os: linux
compiler: gcc
addons:
apt:
packages:
- build-essential
- valgrind
- libstdc++6
- binutils
- python
- uuid-dev
env:
global:
- TOOLDIR=$HOME/tools
cache:
directories:
- $TOOLDIR
- $HOME/third_party
- $HOME/build32
- $HOME/build64
before_install:
- if [ ! -d "$TOOLDIR" ] || [ -z "$(ls -A $TOOLDIR)" ] || [ "$(cat "$TOOLDIR/version.txt")" != "v0.4" ]; then
rm -rf $TOOLDIR;
mkdir -p $TRAVIS_BUILD_DIR/build && cd $TRAVIS_BUILD_DIR/build;
../configure --tooldir=$TOOLDIR;
ci/toolchain_install.sh --all;
echo "v0.3" > "$TOOLDIR/version.txt";
else
echo "using existing tooldir build";
fi
- if [ ! -d "$HOME/third_party" ] || [ -z "$(ls -A $HOME/third_party)" ] || [ "$(cat "$HOME/third_party/version.txt")" != "v0.2" ]; then
cd $TRAVIS_BUILD_DIR;
make -C third_party > /dev/null;
echo "v0.2" > "third_party/version.txt";
cp -rf third_party $HOME;
else
echo "using existing third_party build";
cp -rf $HOME/third_party $TRAVIS_BUILD_DIR;
fi
install:
- if [ ! -d "$HOME/build$XLEN" ] || [ -z "$(ls -A $HOME/build$XLEN)" ] || [ "$(cat "$HOME/build$XLEN/version.txt")" != "$TRAVIS_COMMIT" ]; then
mkdir -p $TRAVIS_BUILD_DIR/build$XLEN && cd $TRAVIS_BUILD_DIR/build$XLEN;
../configure --tooldir=$TOOLDIR --xlen=$XLEN;
source ci/toolchain_env.sh;
make build -s > /dev/null;
echo "$TRAVIS_COMMIT" > version.txt;
cp -rf $TRAVIS_BUILD_DIR/build$XLEN $HOME;
else
echo "using existing build for commit $TRAVIS_COMMIT";
cp -rf $HOME/build$XLEN $TRAVIS_BUILD_DIR;
fi
before_script:
- cd $TRAVIS_BUILD_DIR/build$XLEN
- source ci/toolchain_env.sh
stages:
- test
jobs:
include:
- stage: test
name: regression32
env: XLEN=32
script:
- ./ci/travis_run.py ./ci/regression.sh --unittest
- ./ci/travis_run.py ./ci/regression.sh --isa
- ./ci/travis_run.py ./ci/regression.sh --kernel
- ./ci/travis_run.py ./ci/regression.sh --synthesis
- ./ci/travis_run.py ./ci/regression.sh --regression
- ./ci/travis_run.py ./ci/regression.sh --opencl
- stage: test
name: regression64
env: XLEN=64
script:
- ./ci/travis_run.py ./ci/regression.sh --isa
- ./ci/travis_run.py ./ci/regression.sh --kernel
- ./ci/travis_run.py ./ci/regression.sh --synthesis
- ./ci/travis_run.py ./ci/regression.sh --regression
- ./ci/travis_run.py ./ci/regression.sh --opencl
- stage: test
name: config
env: XLEN=32
script:
- ./ci/travis_run.py ./ci/regression.sh --cluster
- ./ci/travis_run.py ./ci/regression.sh --config
- stage: test
name: debug
env: XLEN=32
script:
- ./ci/travis_run.py ./ci/regression.sh --debug
- ./ci/travis_run.py ./ci/regression.sh --stress
- stage: test
name: virtual_memory
env: XLEN=32
env: VM_DISABLE=1
script:
- ./ci/travis_run.py ./ci/regression.sh --regression
- ./ci/travis_run.py ./ci/regression.sh --opencl
- stage: test
name: virtual_memory
env: XLEN=64
env: VM_DISABLE=1
script:
- ./ci/travis_run.py ./ci/regression.sh --regression
- ./ci/travis_run.py ./ci/regression.sh --opencl

View file

@ -1,5 +1,15 @@
include config.mk
.PHONY: build software tests
vm:
$(MAKE) -C $(VORTEX_HOME)/third_party
$(MAKE) -C hw
$(MAKE) -C sim simx
$(MAKE) -C kernel
$(MAKE) -C runtime vm
$(MAKE) -C tests
all:
$(MAKE) -C $(VORTEX_HOME)/third_party
$(MAKE) -C hw
@ -15,13 +25,24 @@ build:
$(MAKE) -C runtime
$(MAKE) -C tests
clean:
software:
$(MAKE) -C hw
$(MAKE) -C kernel
$(MAKE) -C runtime/stub
tests:
$(MAKE) -C tests
clean-build:
$(MAKE) -C hw clean
$(MAKE) -C sim clean
$(MAKE) -C kernel clean
$(MAKE) -C runtime clean
$(MAKE) -C tests clean
clean: clean-build
$(MAKE) -C $(VORTEX_HOME)/third_party clean
# Install setup
KERNEL_INC_DST = $(PREFIX)/kernel/include
KERNEL_LIB_DST = $(PREFIX)/kernel/lib$(XLEN)

View file

@ -56,7 +56,7 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
```
### Install Vortex codebase
```
git clone --depth=1 --recursive git@github.com:vortexgpgpu/vortex.git -b vortex_vm
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git -b vortex_vm
cd vortex
```
@ -68,18 +68,18 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
mkdir out
export OUT_DIR=`pwd`/out
cd build
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-6-14 --prefix=$OUT_DIR
# Run the following to disble virtual memory feature in compilation
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR
# Run the following instead to enable virtual memory feature in compilation
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR --vm_enable=1
### Install prebuilt toolchain
# We will use the precomipled tools in volvo toolchanin directory
### set environment variables
# should always run before using the toolchain!
source ./ci/toolchain_env.sh
### Building Vortex
make -s
### Quick demo running vecadd OpenCL kernel on 2 cores
$ ./ci/blackbox.sh --cores=2 --app=vecadd

View file

@ -25,37 +25,6 @@ XLEN=${XLEN:=@XLEN@}
echo "Vortex Regression Test: XLEN=$XLEN"
split_file() {
if [[ $# -ne 2 ]]; then
echo "Usage: $0 <filename> <start_with>"
return 1
fi
input_file="$1"
start_with="$2"
if [[ ! -r "$input_file" ]]; then
echo "Error: File '$input_file' is not readable or does not exist."
return 1
fi
count=0
output_file=""
while IFS= read -r line; do
if [[ $line == $start_with* ]]; then
count=$((count + 1))
output_file="$input_file.part$count"
> "$output_file" # ensure empty
fi
if [[ -n "$output_file" ]]; then
echo "$line" >> "$output_file"
fi
done < "$input_file"
if [[ $count -eq 0 ]]; then
echo "No lines starting with '$start_with' were found in '$input_file'."
fi
}
###############################################################################
unittest()
{
make -C tests/unittest run
@ -66,6 +35,9 @@ isa()
{
echo "begin isa tests..."
make -C sim/simx
make -C sim/rtlsim
make -C tests/riscv/isa run-simx
make -C tests/riscv/isa run-rtlsim
@ -96,8 +68,8 @@ isa()
make -C tests/riscv/isa run-rtlsim-64fx
fi
# restore default prebuilt configuration
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
# clean build
make -C sim/rtlsim clean
echo "isa tests done!"
}
@ -106,6 +78,9 @@ kernel()
{
echo "begin kernel tests..."
make -C sim/simx
make -C sim/rtlsim
make -C tests/kernel run-simx
make -C tests/kernel run-rtlsim
@ -116,6 +91,9 @@ regression()
{
echo "begin regression tests..."
make -C runtime/simx
make -C runtime/rtlsim
make -C tests/regression run-simx
make -C tests/regression run-rtlsim
@ -134,6 +112,9 @@ opencl()
{
echo "begin opencl tests..."
make -C runtime/simx
make -C runtime/rtlsim
make -C tests/opencl run-simx
make -C tests/opencl run-rtlsim
@ -143,24 +124,28 @@ opencl()
echo "opencl tests done!"
}
cluster()
{
echo "begin clustering tests..."
vm(){
echo "begin vm tests..."
# cores clustering
./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1"
make -C sim/simx
make -C runtime/simx
# L2/L3
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1"
make -C tests/kernel run-simx
# Regression tests
make -C tests/regression run-simx
echo "clustering tests done!"
# test global barrier
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
# test local barrier
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
# OpenCL tests
make -C tests/opencl run-simx
./ci/blackbox.sh --driver=simx --app=lbm --warps=8
echo "vm tests done!"
}
test_csv_trace()
@ -170,29 +155,20 @@ test_csv_trace()
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-simx-32im > run_simx.log
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
split_file run_simx.log "Running "
split_file run_rtlsim.log "Running "
for file in ./run_simx.log.part*; do
if [[ -f "$file" ]]; then
file2="${file//simx/rtlsim}"
if [[ -f "$file2" ]]; then
./ci/trace_csv.py -tsimx $file -otrace_simx.csv
./ci/trace_csv.py -trtlsim $file2 -otrace_rtlsim.csv
diff trace_rtlsim.csv trace_simx.csv
else
echo "File $file2 not found."
fi
fi
done
# restore default prebuilt configuration
make -C sim/simx clean && make -C sim/simx > /dev/null
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
diff trace_rtlsim.csv trace_simx.csv
# clean build
make -C sim/simx clean
make -C sim/rtlsim clean
}
debug()
{
echo "begin debugging tests..."
test_csv_trace
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
@ -200,21 +176,23 @@ debug()
echo "debugging tests done!"
}
config()
config1()
{
echo "begin configuration tests..."
echo "begin configuration-1 tests..."
# warp/threads configurations
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=1 --threads=1 --app=diverge
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=2 --app=diverge
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=diverge
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=diverge
./ci/blackbox.sh --driver=simx --cores=1 --warps=1 --threads=1 --app=diverge
./ci/blackbox.sh --driver=simx --cores=1 --warps=8 --threads=16 --app=diverge
# warp/threads
./ci/blackbox.sh --driver=rtlsim --warps=1 --threads=1 --app=diverge
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=2 --app=diverge
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=8 --app=diverge
./ci/blackbox.sh --driver=rtlsim --warps=8 --threads=2 --app=diverge
./ci/blackbox.sh --driver=simx --warps=1 --threads=1 --app=diverge
./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge
# disable DPI
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
# cores clustering
./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1"
# issue width
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
@ -240,6 +218,31 @@ config()
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
# L2/L3
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1"
# multiple L1 caches per socket
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=8 --warps=1 --threads=2
echo "configuration-1 tests done!"
}
config2()
{
echo "begin configuration-2 tests..."
# test opaesim
./ci/blackbox.sh --driver=opae --app=printf
./ci/blackbox.sh --driver=opae --app=diverge
# disable DPI
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
# custom program startup address
make -C tests/regression/dogfood clean-kernel
if [ "$XLEN" == "64" ]; then
@ -249,55 +252,57 @@ config()
fi
./ci/blackbox.sh --driver=simx --app=dogfood
./ci/blackbox.sh --driver=rtlsim --app=dogfood
make -C tests/regression/dogfood clean-kernel
# disabling M & F extensions
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-32i
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
make -C sim/rtlsim clean
# disabling ZICOND extension
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
# disable local memory
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --perf=1
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=demo --perf=1
# disable L1 cache
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
# multiple L1 caches per socket
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=8 --warps=1 --threads=2
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
# test AXI bus
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=demo
# disable L1 cache
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
# reduce l1 line size
CONFIGS="-DL1_LINE_SIZE=4" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
CONFIGS="-DL1_LINE_SIZE=4" ./ci/blackbox.sh --driver=simx --app=io_addr
CONFIGS="-DL1_LINE_SIZE=4 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_LINE_SIZE=4 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=simx --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache ways
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache banking
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test 128-bit MEM block
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=demo
# test single-bank DRAM
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=demo
# test 27-bit DRAM address
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=demo
echo "configuration tests done!"
echo "configuration-2 tests done!"
}
stress()
@ -306,9 +311,7 @@ stress()
# test verilator reset values
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --app=printf
./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n128" --l2cache
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache
echo "stress tests done!"
}
@ -318,7 +321,7 @@ synthesis()
echo "begin synthesis tests..."
PREFIX=build_base make -C hw/syn/yosys clean
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys elaborate
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys synthesis
echo "synthesis tests done!"
}
@ -326,7 +329,7 @@ synthesis()
show_usage()
{
echo "Vortex Regression Test"
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cluster] [--debug] [--config] [--stress] [--synthesis] [--all] [--h|--help]"
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]"
}
start=$SECONDS
@ -336,6 +339,9 @@ clean=0
while [ "$1" != "" ]; do
case $1 in
--vm )
tests+=("vm")
;;
--clean )
clean=1
;;
@ -354,15 +360,15 @@ while [ "$1" != "" ]; do
--opencl )
tests+=("opencl")
;;
--cluster )
tests+=("cluster")
--config1 )
tests+=("config1")
;;
--config2 )
tests+=("config2")
;;
--debug )
tests+=("debug")
;;
--config )
tests+=("config")
;;
--stress )
tests+=("stress")
;;
@ -376,9 +382,9 @@ while [ "$1" != "" ]; do
tests+=("kernel")
tests+=("regression")
tests+=("opencl")
tests+=("cluster")
tests+=("config1")
tests+=("config2")
tests+=("debug")
tests+=("config")
tests+=("stress")
tests+=("synthesis")
;;

27
ci/system_updates.sh Executable file
View file

@ -0,0 +1,27 @@
#!/bin/sh
# Copyright 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
apt-get update -y
add-apt-repository -y ppa:ubuntu-toolchain-r/test
apt-get update
apt-get install -y g++-11 gcc-11
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100
apt-get install -y build-essential valgrind libstdc++6 binutils python uuid-dev ccache

View file

@ -16,8 +16,8 @@
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
export VERILATOR_ROOT=$TOOLDIR/verilator
export PATH=$VERILATOR_ROOT/bin:$PATH
# export VERILATOR_ROOT=$TOOLDIR/verilator
# export PATH=$VERILATOR_ROOT/bin:$PATH
export SV2V_PATH=$TOOLDIR/sv2v
export PATH=$SV2V_PATH/bin:$PATH

View file

@ -26,7 +26,7 @@ def parse_args():
parser.add_argument('log', help='Input log file')
return parser.parse_args()
def parse_simx(log_filename):
def parse_simx(log_lines):
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
opcode_pattern = r"Instr 0x[0-9a-fA-F]+: ([0-9a-zA-Z_\.]+)"
@ -37,32 +37,31 @@ def parse_simx(log_filename):
destination_pattern = r"Dest Reg: (.+)"
uuid_pattern = r"#(\d+)"
entries = []
with open(log_filename, 'r') as log_file:
instr_data = None
for lineno, line in enumerate(log_file, start=1):
try:
if line.startswith("DEBUG Fetch:"):
if instr_data:
entries.append(instr_data)
instr_data = {}
instr_data["lineno"] = lineno
instr_data["PC"] = re.search(pc_pattern, line).group(1)
instr_data["core_id"] = re.search(core_id_pattern, line).group(1)
instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1)
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
instr_data["uuid"] = re.search(uuid_pattern, line).group(1)
elif line.startswith("DEBUG Instr"):
instr_data["instr"] = re.search(instr_pattern, line).group(1)
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
elif line.startswith("DEBUG Src"):
src_reg = re.search(operands_pattern, line).group(1)
instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg
elif line.startswith("DEBUG Dest"):
instr_data["destination"] = re.search(destination_pattern, line).group(1)
except Exception as e:
print("Error at line {}: {}".format(lineno, e))
if instr_data:
entries.append(instr_data)
instr_data = None
for lineno, line in enumerate(log_lines, start=1):
try:
if line.startswith("DEBUG Fetch:"):
if instr_data:
entries.append(instr_data)
instr_data = {}
instr_data["lineno"] = lineno
instr_data["PC"] = re.search(pc_pattern, line).group(1)
instr_data["core_id"] = re.search(core_id_pattern, line).group(1)
instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1)
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
instr_data["uuid"] = re.search(uuid_pattern, line).group(1)
elif line.startswith("DEBUG Instr"):
instr_data["instr"] = re.search(instr_pattern, line).group(1)
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
elif line.startswith("DEBUG Src"):
src_reg = re.search(operands_pattern, line).group(1)
instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg
elif line.startswith("DEBUG Dest"):
instr_data["destination"] = re.search(destination_pattern, line).group(1)
except Exception as e:
print("Error at line {}: {}".format(lineno, e))
if instr_data:
entries.append(instr_data)
return entries
def reverse_binary(bin_str):
@ -95,8 +94,9 @@ def append_value(text, reg, value, tmask_arr, sep):
text += "}"
return text, sep
def parse_rtlsim(log_filename):
line_pattern = r"\d+: core(\d+)-(decode|issue|commit)"
def parse_rtlsim(log_lines):
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=(\d+), num_barriers=(\d+)"
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
ex_pattern = r"ex=([a-zA-Z]+)"
@ -116,124 +116,166 @@ def parse_rtlsim(log_filename):
eop_pattern = r"eop=(\d)"
uuid_pattern = r"#(\d+)"
entries = []
with open(log_filename, 'r') as log_file:
instr_data = {}
for lineno, line in enumerate(log_file, start=1):
try:
line_match = re.search(line_pattern, line)
if line_match:
PC = re.search(pc_pattern, line).group(1)
warp_id = re.search(warp_id_pattern, line).group(1)
tmask = re.search(tmask_pattern, line).group(1)
uuid = re.search(uuid_pattern, line).group(1)
core_id = line_match.group(1)
stage = line_match.group(2)
if stage == "decode":
trace = {}
trace["uuid"] = uuid
trace["PC"] = PC
trace["core_id"] = core_id
trace["warp_id"] = warp_id
trace["tmask"] = reverse_binary(tmask)
trace["instr"] = re.search(instr_pattern, line).group(1)
trace["opcode"] = re.search(op_pattern, line).group(1)
trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1))
trace["rd"] = re.search(rd_pattern, line).group(1)
trace["rs1"] = re.search(rs1_pattern, line).group(1)
trace["rs2"] = re.search(rs2_pattern, line).group(1)
trace["rs3"] = re.search(rs3_pattern, line).group(1)
instr_data = {}
num_threads = 0
num_warps = 0
num_cores = 0
num_clusters = 0
socket_size = 0
local_mem_base = 0
num_barriers = 0
num_sockets = 0
for lineno, line in enumerate(log_lines, start=1):
try:
config_match = re.search(config_pattern, line)
if config_match:
num_threads = int(config_match.group(1))
num_warps = int(config_match.group(2))
num_cores = int(config_match.group(3))
num_clusters = int(config_match.group(4))
socket_size = int(config_match.group(5))
local_mem_base = int(config_match.group(6))
num_barriers = int(config_match.group(7))
num_sockets = (num_cores + socket_size - 1) // socket_size
continue
line_match = re.search(line_pattern, line)
if line_match:
PC = re.search(pc_pattern, line).group(1)
warp_id = re.search(warp_id_pattern, line).group(1)
tmask = re.search(tmask_pattern, line).group(1)
uuid = re.search(uuid_pattern, line).group(1)
cluster_id = line_match.group(1)
socket_id = line_match.group(2)
core_id = line_match.group(3)
stage = line_match.group(4)
if stage == "decode":
trace = {}
trace["uuid"] = uuid
trace["PC"] = PC
trace["core_id"] = ((((cluster_id * num_sockets) + socket_id) * socket_size) + core_id)
trace["warp_id"] = warp_id
trace["tmask"] = reverse_binary(tmask)
trace["instr"] = re.search(instr_pattern, line).group(1)
trace["opcode"] = re.search(op_pattern, line).group(1)
trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1))
trace["rd"] = re.search(rd_pattern, line).group(1)
trace["rs1"] = re.search(rs1_pattern, line).group(1)
trace["rs2"] = re.search(rs2_pattern, line).group(1)
trace["rs3"] = re.search(rs3_pattern, line).group(1)
instr_data[uuid] = trace
elif stage == "issue":
if uuid in instr_data:
trace = instr_data[uuid]
trace["lineno"] = lineno
opds = trace["opds"]
if opds[1]:
trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1]
if opds[2]:
trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1]
if opds[3]:
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
trace["issued"] = True
instr_data[uuid] = trace
elif stage == "issue":
if uuid in instr_data:
trace = instr_data[uuid]
trace["lineno"] = lineno
elif stage == "commit":
if uuid in instr_data:
trace = instr_data[uuid]
if "issued" in trace:
opds = trace["opds"]
if opds[1]:
trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1]
if opds[2]:
trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1]
if opds[3]:
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
trace["issued"] = True
dst_tmask_arr = bin_to_array(tmask)[::-1]
wb = re.search(wb_pattern, line).group(1) == "1"
if wb:
rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1]
if 'rd_data' in trace:
merged_rd_data = trace['rd_data']
for i in range(len(dst_tmask_arr)):
if dst_tmask_arr[i] == 1:
merged_rd_data[i] = rd_data[i]
trace['rd_data'] = merged_rd_data
else:
trace['rd_data'] = rd_data
instr_data[uuid] = trace
elif stage == "commit":
if uuid in instr_data:
trace = instr_data[uuid]
if "issued" in trace:
opds = trace["opds"]
dst_tmask_arr = bin_to_array(tmask)[::-1]
wb = re.search(wb_pattern, line).group(1) == "1"
eop = re.search(eop_pattern, line).group(1) == "1"
if eop:
tmask_arr = bin_to_array(trace["tmask"])
destination = ''
if wb:
rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1]
if 'rd_data' in trace:
merged_rd_data = trace['rd_data']
for i in range(len(dst_tmask_arr)):
if dst_tmask_arr[i] == 1:
merged_rd_data[i] = rd_data[i]
trace['rd_data'] = merged_rd_data
else:
trace['rd_data'] = rd_data
instr_data[uuid] = trace
eop = re.search(eop_pattern, line).group(1) == "1"
if eop:
tmask_arr = bin_to_array(trace["tmask"])
destination = ''
if wb:
destination, sep = append_value(destination, trace["rd"], trace['rd_data'], tmask_arr, False)
del trace['rd_data']
trace["destination"] = destination
operands = ''
sep = False
if opds[1]:
operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep)
del trace["rs1_data"]
if opds[2]:
operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep)
del trace["rs2_data"]
if opds[3]:
operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
del trace["rs3_data"]
trace["operands"] = operands
del trace["opds"]
del trace["rd"]
del trace["rs1"]
del trace["rs2"]
del trace["rs3"]
del trace["issued"]
del instr_data[uuid]
entries.append(trace)
except Exception as e:
print("Error at line {}: {}".format(lineno, e))
destination, sep = append_value(destination, trace["rd"], trace['rd_data'], tmask_arr, False)
del trace['rd_data']
trace["destination"] = destination
operands = ''
sep = False
if opds[1]:
operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep)
del trace["rs1_data"]
if opds[2]:
operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep)
del trace["rs2_data"]
if opds[3]:
operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
del trace["rs3_data"]
trace["operands"] = operands
del trace["opds"]
del trace["rd"]
del trace["rs1"]
del trace["rs2"]
del trace["rs3"]
del trace["issued"]
del instr_data[uuid]
entries.append(trace)
except Exception as e:
print("Error at line {}: {}".format(lineno, e))
return entries
def write_csv(log_filename, csv_filename, log_type):
entries = None
# parse log file
if log_type == "rtlsim":
entries = parse_rtlsim(log_filename)
elif log_type == "simx":
entries = parse_simx(log_filename)
else:
print('Error: invalid log type')
sys.exit()
# sort entries by uuid
entries.sort(key=lambda x: (int(x['uuid'])))
for entry in entries:
del entry['lineno']
# write to CSV
def write_csv(sublogs, csv_filename, log_type):
with open(csv_filename, 'w', newline='') as csv_file:
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for entry in entries:
writer.writerow(entry)
for sublog in sublogs:
entries = None
# parse sublog
if log_type == "rtlsim":
entries = parse_rtlsim(sublog)
elif log_type == "simx":
entries = parse_simx(sublog)
else:
print('Error: invalid log type')
sys.exit()
# sort entries by uuid
entries.sort(key=lambda x: (int(x['uuid'])))
for entry in entries:
del entry['lineno']
for entry in entries:
writer.writerow(entry)
def split_log_file(log_filename):
with open(log_filename, 'r') as log_file:
log_lines = log_file.readlines()
sublogs = []
current_sublog = None
for line in log_lines:
if line.startswith("[VXDRV] START"):
if current_sublog is not None:
sublogs.append(current_sublog)
current_sublog = [line]
elif current_sublog is not None:
current_sublog.append(line)
if current_sublog is not None:
sublogs.append(current_sublog)
return sublogs
def main():
args = parse_args()
write_csv(args.log, args.csv, args.type)
sublogs = split_log_file(args.log)
write_csv(sublogs, args.csv, args.type)
if __name__ == "__main__":
main()

View file

@ -32,4 +32,8 @@ RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime
VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel
VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel
THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party
VM_ENABLE ?= @VM_ENABLE@

15
configure vendored
View file

@ -63,7 +63,7 @@ copy_files() {
filename_no_ext="${filename%.in}"
dest_file="$dest_dir/$filename_no_ext"
mkdir -p "$dest_dir"
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g" "$file" > "$dest_file"
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file"
# apply permissions to bash scripts
read -r firstline < "$dest_file"
if [[ "$firstline" =~ ^#!.*bash ]]; then
@ -111,9 +111,10 @@ copy_files() {
# default configuration parameters
default_xlen=32
default_tooldir=/opt
default_tooldir=$HOME/tools
default_osversion=$(detect_osversion)
default_prefix=$CURRENT_DIR
default_vm=0
# load default configuration parameters from existing config.mk
if [ -f "config.mk" ]; then
@ -126,6 +127,7 @@ if [ -f "config.mk" ]; then
TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;;
OSVERSION\ ?*) default_osversion=${value//\?=/} ;;
PREFIX\ ?*) default_prefix=${value//\?=/} ;;
VM_ENABLE\ ?*) default_vm=${value//\?=/} ;;
esac
done < config.mk
fi
@ -135,14 +137,16 @@ XLEN=${XLEN:=$default_xlen}
TOOLDIR=${TOOLDIR:=$default_tooldir}
OSVERSION=${OSVERSION:=$default_osversion}
PREFIX=${PREFIX:=$default_prefix}
VM_ENABLE=${VM_ENABLE:=$default_vm}
# parse command line arguments
usage() {
echo "Usage: $0 [--xlen=<value>] [--tooldir=<path>] [--osversion=<version>]"
echo " --xlen=<value> Set the XLEN value (default: 32)"
echo " --tooldir=<path> Set the TOOLDIR path (default: /opt)"
echo " --osversion=<version> Set the OS Version (default: $(detect_os))"
echo " --tooldir=<path> Set the TOOLDIR path (default: $HOME/tools)"
echo " --osversion=<version> Set the OS Version (default: $(detect_osversion))"
echo " --prefix=<path> Set installation directory"
echo " --vm_enable=<value> Enable Virtual Memory support (default: 0)"
exit 1
}
while [[ "$#" -gt 0 ]]; do
@ -151,6 +155,7 @@ while [[ "$#" -gt 0 ]]; do
--tooldir=*) TOOLDIR="${1#*=}" ;;
--osversion=*) OSVERSION="${1#*=}" ;;
--prefix=*) PREFIX="${1#*=}" ;;
--vm_enable=*) VM_ENABLE="${1#*=}" ;;
-h|--help) usage ;;
*) echo "Unknown parameter passed: $1"; usage ;;
esac
@ -172,3 +177,5 @@ SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
THIRD_PARTY_DIR=$SCRIPT_DIR/third_party
copy_files "$SCRIPT_DIR" "$CURRENT_DIR"
echo "VM Enable: "$VM_ENABLE

79
docs/altera_fpga_guide.md Normal file
View file

@ -0,0 +1,79 @@
# FPGA Startup and Configuration Guide
OPAE Environment Setup
----------------------
$ source /opt/inteldevstack/init_env_user.sh
$ export OPAE_HOME=/opt/opae/1.1.2
$ export PATH=$OPAE_HOME/bin:$PATH
$ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH
$ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH
$ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH
OPAE Build
------------------
The FPGA has to following configuration options:
- DEVICE_FAMILY=arria10 | stratix10
- NUM_CORES=#n
Command line:
$ cd hw/syn/altera/opae
$ PREFIX=test1 TARGET=fpga NUM_CORES=4 make
A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete.
Setting TARGET=ase will build the project for simulation using Intel ASE.
OPAE Build Configuration
------------------------
The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured:
- `NUM_WARPS`: Number of warps per cores
- `NUM_THREADS`: Number of threads per warps
- `PERF_ENABLE`: enable the use of all profile counters
You configure the syntesis build from the command line:
$ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make
OPAE Build Progress
-------------------
You could check the last 10 lines in the build log for possible errors until build completion.
$ tail -n 10 <build_dir>/build.log
Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs.
$ ps -u <username>
If the build fails and you need to restart it, clean up the build folder using the following command:
$ make clean
The bitstream file `vortex_afu.gbs` should exist when the build is done:
$ ls -lsa <build_dir>/synth/vortex_afu.gbs
Signing the bitstream and Programming the FPGA
----------------------------------------------
$ cd <build_dir>
$ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs
$ fpgasupdate vortex_afu_unsigned_ssl.gbs
Sample FPGA Run Test
--------------------
Ensure you have the correct opae runtime for the FPGA target
$ make -C runtime/opae clean
$ TARGET=FPGA make -C runtime/opae
Run the following from your Vortex build directory
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"

View file

@ -7,7 +7,8 @@
- [Cache Subsystem](cache_subsystem.md)
- [Software](software.md)
- [Simulation](simulation.md)
- [FPGA Setup Guide](fpga_setup.md)
- [Altera FPGA Setup Guide](altera_fpga_guide.md)
- [Xilinx FPGA Setup Guide](xilinx_fpga_guide.md)
- [Debugging](debugging.md)
- [Useful Links](references.md)
@ -27,6 +28,6 @@ Running Vortex simulators with different configurations:
$ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
$ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood

36
docs/xilinx_fpga_guide.md Normal file
View file

@ -0,0 +1,36 @@
# FPGA Startup and Configuration Guide
XRT Environment Setup
----------------------
$ source /opt/xilinx/Vitis/2023.1/settings64.sh
$ source /opt/xilinx/xrt/setup.sh
Check Installed FPGA Platforms
------------------------------
$ platforminfo -l
Build FPGA image
----------------
$ cd hw/syn/xilinx/xrt
$ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make
Will run the synthesis under new build directory: BUILD_DIR := "\<PREFIX>\_\<PLATFORM>\_\<TARGET>"
The generated bitstream will be located under <BUILD_DIR>/bin/vortex_afu.xclbin
Sample FPGA Run Test
--------------------
Ensure you have the correct opae runtime for the FPGA target
$ make -C runtime/xrt clean
$ TARGET=hw make -C runtime/xrt
Run the following from your Vortex build directory
$ TARGET=hw FPGA_BIN_DIR=<BUILD_DIR>/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128"

View file

@ -9,13 +9,14 @@ all: config
config: VX_config.h VX_types.h
VX_config.h: $(RTL_DIR)/VX_config.vh
VX_config.h: $(RTL_DIR)/VX_config.vh
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_config.vh -o VX_config.h
VX_types.h: $(RTL_DIR)/VX_types.vh
VX_types.h: $(RTL_DIR)/VX_types.vh
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_types.vh -o VX_types.h
clean:
$(MAKE) -C unittest clean
rm -f VX_config.h VX_types.h
.PHONY: VX_config.h VX_types.h

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,8 +14,6 @@
`ifndef FLOAT_DPI_VH
`define FLOAT_DPI_VH
`include "VX_config.vh"
import "DPI-C" function void dpi_fadd(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fsub(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmul(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);

View file

@ -14,8 +14,6 @@
`ifndef UTIL_DPI_VH
`define UTIL_DPI_VH
`include "VX_config.vh"
`ifdef XLEN_64
`define INT_TYPE longint
`else

View file

@ -14,7 +14,8 @@
`include "VX_define.vh"
module VX_cluster import VX_gpu_pkg::*; #(
parameter CLUSTER_ID = 0
parameter CLUSTER_ID = 0,
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
@ -85,7 +86,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
`RESET_RELAY (l2_reset, reset);
VX_cache_wrap #(
.INSTANCE_ID ("l2cache"),
.INSTANCE_ID ($sformatf("%s-l2cache", INSTANCE_ID)),
.CACHE_SIZE (`L2_CACHE_SIZE),
.LINE_SIZE (`L2_LINE_SIZE),
.NUM_BANKS (`L2_NUM_BANKS),
@ -98,6 +99,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
.MREQ_SIZE (`L2_MREQ_SIZE),
.TAG_WIDTH (L2_TAG_WIDTH),
.WRITE_ENABLE (1),
.WRITEBACK (`L2_WRITEBACK),
.UUID_WIDTH (`UUID_WIDTH),
.CORE_OUT_BUF (2),
.MEM_OUT_BUF (2),
@ -122,17 +124,19 @@ module VX_cluster import VX_gpu_pkg::*; #(
wire [`NUM_SOCKETS-1:0] per_socket_busy;
VX_dcr_bus_if socket_dcr_bus_if();
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1));
// Generate all sockets
for (genvar i = 0; i < `NUM_SOCKETS; ++i) begin
for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : sockets
`RESET_RELAY (socket_reset, reset);
VX_socket #(
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + i)
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id),
.INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id))
) socket (
`SCOPE_IO_BIND (scope_socket+i)
`SCOPE_IO_BIND (scope_socket+socket_id)
.clk (clk),
.reset (socket_reset),
@ -143,13 +147,13 @@ module VX_cluster import VX_gpu_pkg::*; #(
.dcr_bus_if (socket_dcr_bus_if),
.mem_bus_if (per_socket_mem_bus_if[i]),
.mem_bus_if (per_socket_mem_bus_if[socket_id]),
`ifdef GBAR_ENABLE
.gbar_bus_if (per_socket_gbar_bus_if[i]),
.gbar_bus_if (per_socket_gbar_bus_if[socket_id]),
`endif
.busy (per_socket_busy[i])
.busy (per_socket_busy[socket_id])
);
end

View file

@ -33,10 +33,6 @@
`endif
///////////////////////////////////////////////////////////////////////////////
`ifndef VM_DISABLE
`define VM_ENABLE
`endif
`ifndef EXT_M_DISABLE
`define EXT_M_ENABLE
`endif
@ -114,7 +110,6 @@
`ifndef SOCKET_SIZE
`define SOCKET_SIZE `MIN(4, `NUM_CORES)
`endif
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
`ifdef L2_ENABLE
`define L2_ENABLED 1
@ -357,7 +352,7 @@
// Number of SFU units
`ifndef NUM_SFU_LANES
`define NUM_SFU_LANES `MIN(`NUM_THREADS, 4)
`define NUM_SFU_LANES `NUM_THREADS
`endif
`ifndef NUM_SFU_BLOCKS
`define NUM_SFU_BLOCKS 1
@ -481,22 +476,27 @@
`define LATENCY_FCVT 5
`endif
// FMA Bandwidth ratio
`ifndef FMA_PE_RATIO
`define FMA_PE_RATIO 1
`endif
// FDIV Bandwidth ratio
`ifndef FDIV_PE_RATIO
`define FDIV_PE_RATIO 8
`endif
// FSQRT Bandwidth ratio
`ifndef FSQRT_PE_RATIO
`define FSQRT_PE_RATIO 8
`endif
// FCVT Bandwidth ratio
`ifndef FCVT_PE_RATIO
`define FCVT_PE_RATIO 8
`endif
// FNCP Bandwidth ratio
`ifndef FNCP_PE_RATIO
`define FNCP_PE_RATIO 2
`endif
@ -603,7 +603,12 @@
`define DCACHE_NUM_WAYS 1
`endif
// SM Configurable Knobs //////////////////////////////////////////////////////
// Enable Cache Writeback
`ifndef DCACHE_WRITEBACK
`define DCACHE_WRITEBACK 0
`endif
// LMEM Configurable Knobs ////////////////////////////////////////////////////
`ifndef LMEM_DISABLE
`define LMEM_ENABLE
@ -662,6 +667,11 @@
`define L2_NUM_WAYS 2
`endif
// Enable Cache Writeback
`ifndef L2_WRITEBACK
`define L2_WRITEBACK 0
`endif
// L3cache Configurable Knobs /////////////////////////////////////////////////
// Cache Size
@ -703,6 +713,11 @@
`define L3_NUM_WAYS 4
`endif
// Enable Cache Writeback
`ifndef L3_WRITEBACK
`define L3_WRITEBACK 0
`endif
// ISA Extensions /////////////////////////////////////////////////////////////
`ifdef EXT_A_ENABLE

View file

@ -59,6 +59,8 @@
`define OFFSET_BITS 12
`define IMM_BITS `XLEN
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
///////////////////////////////////////////////////////////////////////////////
`define EX_ALU 0
@ -296,6 +298,7 @@
`ifdef ICACHE_ENABLE
`define L1_ENABLE
`endif
`ifdef DCACHE_ENABLE
`define L1_ENABLE
`endif
@ -322,7 +325,7 @@
.DATAW ($bits(dst)), \
.RESETW ($bits(dst)), \
.DEPTH (latency) \
) __``dst ( \
) __``dst``__ ( \
.clk (clk), \
.reset (reset), \
.enable (ena), \
@ -336,13 +339,18 @@
VX_popcount #( \
.N ($bits(in)), \
.MODEL (model) \
) __``out ( \
) __``out``__ ( \
.data_in (in), \
.data_out (out) \
)
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
`define ASSIGN_VX_IF(dst, src) \
assign dst.valid = src.valid; \
assign dst.data = src.data; \
assign src.ready = dst.ready
`define ASSIGN_VX_MEM_BUS_IF(dst, src) \
assign dst.req_valid = src.req_valid; \
assign dst.req_data = src.req_data; \
@ -377,42 +385,42 @@
assign dst.rsp_ready = src.rsp_ready
`define BUFFER_DCR_BUS_IF(dst, src, enable) \
logic [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __``dst; \
if (enable) begin \
reg [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __dst; \
always @(posedge clk) begin \
__``dst <= {src.write_valid, src.write_addr, src.write_data}; \
__dst <= {src.write_valid, src.write_addr, src.write_data}; \
end \
assign {dst.write_valid, dst.write_addr, dst.write_data} = __dst; \
end else begin \
assign __``dst = {src.write_valid, src.write_addr, src.write_data}; \
end \
VX_dcr_bus_if dst(); \
assign {dst.write_valid, dst.write_addr, dst.write_data} = __``dst
assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid, src.write_addr, src.write_data}; \
end
`define PERF_COUNTER_ADD(dst, src, field, width, dst_count, src_count, reg_enable) \
for (genvar __d = 0; __d < dst_count; ++__d) begin \
localparam __count = ((src_count > dst_count) ? `CDIV(src_count, dst_count) : 1); \
wire [__count-1:0][width-1:0] __reduce_add_i_``src``field; \
wire [width-1:0] __reduce_add_o_``dst``field; \
for (genvar __i = 0; __i < __count; ++__i) begin \
assign __reduce_add_i_``src``field[__i] = ``src[__d * __count + __i].``field; \
`define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \
if (count > 1) begin \
wire [count-1:0][width-1:0] __reduce_add_i_field; \
wire [width-1:0] __reduce_add_o_field; \
for (genvar __i = 0; __i < count; ++__i) begin \
assign __reduce_add_i_field[__i] = src[__i].``field; \
end \
VX_reduce #(.DATAW_IN(width), .N(__count), .OP("+")) __reduce_add_``dst``field ( \
__reduce_add_i_``src``field, \
__reduce_add_o_``dst``field \
VX_reduce #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_field ( \
__reduce_add_i_field, \
__reduce_add_o_field \
); \
if (reg_enable) begin \
reg [width-1:0] __reduce_add_r_``dst``field; \
reg [width-1:0] __reduce_add_r_field; \
always @(posedge clk) begin \
if (reset) begin \
__reduce_add_r_``dst``field <= '0; \
__reduce_add_r_field <= '0; \
end else begin \
__reduce_add_r_``dst``field <= __reduce_add_o_``dst``field; \
__reduce_add_r_field <= __reduce_add_o_field; \
end \
end \
assign ``dst[__d].``field = __reduce_add_r_``dst``field; \
assign dst.``field = __reduce_add_r_field; \
end else begin \
assign ``dst[__d].``field = __reduce_add_o_``dst``field; \
assign dst.``field = __reduce_add_o_field; \
end \
end else begin \
assign dst.``field = src[0].``field; \
end
`define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \
@ -426,20 +434,4 @@
assign dst = src; \
end
`define TO_DISPATCH_DATA(data, tid) { \
data.uuid, \
data.wis, \
data.tmask, \
data.PC, \
data.op_type, \
data.op_args, \
data.wb, \
data.rd, \
tid, \
data.rs1_data, \
data.rs2_data, \
data.rs3_data}
///////////////////////////////////////////////////////////////////////////////
`endif // VX_DEFINE_VH

View file

@ -60,6 +60,8 @@ package VX_gpu_pkg;
logic [7:0] mpm_class;
} base_dcrs_t;
//////////////////////////// Perf counter types ///////////////////////////
typedef struct packed {
logic [`PERF_CTR_BITS-1:0] reads;
logic [`PERF_CTR_BITS-1:0] writes;
@ -77,48 +79,63 @@ package VX_gpu_pkg;
logic [`PERF_CTR_BITS-1:0] latency;
} mem_perf_t;
typedef struct packed {
logic [`PERF_CTR_BITS-1:0] idles;
logic [`PERF_CTR_BITS-1:0] stalls;
} sched_perf_t;
typedef struct packed {
logic [`PERF_CTR_BITS-1:0] ibf_stalls;
logic [`PERF_CTR_BITS-1:0] scb_stalls;
logic [`PERF_CTR_BITS-1:0] opd_stalls;
logic [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] units_uses;
logic [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] sfu_uses;
} issue_perf_t;
//////////////////////// instruction arguments ////////////////////////////
typedef struct packed {
logic use_PC;
logic use_imm;
logic is_w;
logic [`ALU_TYPE_BITS-1:0] xtype;
logic [`IMM_BITS-1:0] imm;
} alu_mod_t;
} alu_args_t;
typedef struct packed {
logic [($bits(alu_mod_t)-`INST_FRM_BITS-`INST_FMT_BITS)-1:0] __padding;
logic [($bits(alu_args_t)-`INST_FRM_BITS-`INST_FMT_BITS)-1:0] __padding;
logic [`INST_FRM_BITS-1:0] frm;
logic [`INST_FMT_BITS-1:0] fmt;
} fpu_mod_t;
} fpu_args_t;
typedef struct packed {
logic [($bits(alu_mod_t)-1-1-`OFFSET_BITS)-1:0] __padding;
logic [($bits(alu_args_t)-1-1-`OFFSET_BITS)-1:0] __padding;
logic is_store;
logic is_float;
logic [`OFFSET_BITS-1:0] offset;
} lsu_mod_t;
} lsu_args_t;
typedef struct packed {
logic [($bits(alu_mod_t)-1-`VX_CSR_ADDR_BITS-5)-1:0] __padding;
logic [($bits(alu_args_t)-1-`VX_CSR_ADDR_BITS-5)-1:0] __padding;
logic use_imm;
logic [`VX_CSR_ADDR_BITS-1:0] addr;
logic [4:0] imm;
} csr_mod_t;
} csr_args_t;
typedef struct packed {
logic [($bits(alu_mod_t)-1)-1:0] __padding;
logic [($bits(alu_args_t)-1)-1:0] __padding;
logic is_neg;
} wctl_mod_t;
} wctl_args_t;
typedef union packed {
alu_mod_t alu;
fpu_mod_t fpu;
lsu_mod_t lsu;
csr_mod_t csr;
wctl_mod_t wctl;
alu_args_t alu;
fpu_args_t fpu;
lsu_args_t lsu;
csr_args_t csr;
wctl_args_t wctl;
} op_args_t;
/* verilator lint_off UNUSED */
`IGNORE_UNUSED_BEGIN
///////////////////////// LSU memory Parameters ///////////////////////////
@ -129,6 +146,31 @@ package VX_gpu_pkg;
localparam LSU_TAG_WIDTH = (`UUID_WIDTH + LSU_TAG_ID_BITS);
localparam LSU_NUM_REQS = `NUM_LSU_BLOCKS * `NUM_LSU_LANES;
////////////////////////// Icache Parameters //////////////////////////////
// Word size in bytes
localparam ICACHE_WORD_SIZE = 4;
localparam ICACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(ICACHE_WORD_SIZE));
// Block size in bytes
localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE;
// Core request tag Id bits
localparam ICACHE_TAG_ID_BITS = `NW_WIDTH;
// Core request tag bits
localparam ICACHE_TAG_WIDTH = (`UUID_WIDTH + ICACHE_TAG_ID_BITS);
// Memory request data bits
localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8);
// Memory request tag bits
`ifdef ICACHE_ENABLE
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES);
`else
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
`endif
////////////////////////// Dcache Parameters //////////////////////////////
// Word size in bytes
@ -154,36 +196,11 @@ package VX_gpu_pkg;
localparam DCACHE_MEM_DATA_WIDTH = (DCACHE_LINE_SIZE * 8);
// Memory request tag bits
`ifdef DCACHE_ENABLE
`ifdef DCACHE_ENABLE
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
`else
`else
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
`endif
////////////////////////// Icache Parameters //////////////////////////////
// Word size in bytes
localparam ICACHE_WORD_SIZE = 4;
localparam ICACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(ICACHE_WORD_SIZE));
// Block size in bytes
localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE;
// Core request tag Id bits
localparam ICACHE_TAG_ID_BITS = `NW_WIDTH;
// Core request tag bits
localparam ICACHE_TAG_WIDTH = (`UUID_WIDTH + ICACHE_TAG_ID_BITS);
// Memory request data bits
localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8);
// Memory request tag bits
`ifdef ICACHE_ENABLE
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES);
`else
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
`endif
`endif
/////////////////////////////// L1 Parameters /////////////////////////////
@ -208,11 +225,11 @@ package VX_gpu_pkg;
localparam L2_MEM_DATA_WIDTH = (`L2_LINE_SIZE * 8);
// Memory request tag bits
`ifdef L2_ENABLE
`ifdef L2_ENABLE
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
`else
`else
localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
`endif
`endif
/////////////////////////////// L3 Parameters /////////////////////////////
@ -229,23 +246,20 @@ package VX_gpu_pkg;
localparam L3_MEM_DATA_WIDTH = (`L3_LINE_SIZE * 8);
// Memory request tag bits
`ifdef L3_ENABLE
`ifdef L3_ENABLE
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
`else
`else
localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
`endif
/* verilator lint_on UNUSED */
`endif
/////////////////////////////// Issue parameters //////////////////////////
localparam ISSUE_ISW = `CLOG2(`ISSUE_WIDTH);
localparam ISSUE_ISW_W = `UP(ISSUE_ISW);
localparam ISSUE_RATIO = `NUM_WARPS / `ISSUE_WIDTH;
localparam ISSUE_WIS = `CLOG2(ISSUE_RATIO);
localparam PER_ISSUE_WARPS = `NUM_WARPS / `ISSUE_WIDTH;
localparam ISSUE_WIS = `CLOG2(PER_ISSUE_WARPS);
localparam ISSUE_WIS_W = `UP(ISSUE_WIS);
`IGNORE_UNUSED_BEGIN
function logic [`NW_WIDTH-1:0] wis_to_wid(
input logic [ISSUE_WIS_W-1:0] wis,
input logic [ISSUE_ISW_W-1:0] isw
@ -278,6 +292,20 @@ package VX_gpu_pkg;
wid_to_wis = 0;
end
endfunction
///////////////////////// Miscaellaneous functions ////////////////////////
function logic [`SFU_WIDTH-1:0] op_to_sfu_type(
input logic [`INST_OP_BITS-1:0] op_type
);
case (op_type)
`INST_SFU_CSRRW,
`INST_SFU_CSRRS,
`INST_SFU_CSRRC: op_to_sfu_type = `SFU_CSRS;
default: op_to_sfu_type = `SFU_WCTL;
endcase
endfunction
`IGNORE_UNUSED_END
endpackage

View file

@ -47,7 +47,7 @@
`define UNUSED_VAR(x)
`define UNUSED_PIN(x) . x ()
`define UNUSED_ARG(x) x
`define TRACE(level, args) $write args
`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args
`else
`ifdef VERILATOR
`define TRACING_ON /* verilator tracing_on */
@ -112,8 +112,14 @@
`define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \
x \
/* verilator lint_on UNUSED */
`define TRACE(level, args) dpi_trace(level, $sformatf args)
`endif
`ifdef SV_DPI
`define TRACE(level, args) dpi_trace(level, $sformatf args)
`else
`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args
`endif
`endif
`ifdef SIMULATION

View file

@ -14,7 +14,8 @@
`include "VX_define.vh"
module VX_socket import VX_gpu_pkg::*; #(
parameter SOCKET_ID = 0
parameter SOCKET_ID = 0,
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
@ -40,6 +41,11 @@ module VX_socket import VX_gpu_pkg::*; #(
output wire busy
);
`ifdef SCOPE
localparam scope_core = 0;
`SCOPE_IO_SWITCH (`SOCKET_SIZE);
`endif
`ifdef GBAR_ENABLE
VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE]();
@ -81,7 +87,7 @@ module VX_socket import VX_gpu_pkg::*; #(
`RESET_RELAY (icache_reset, reset);
VX_cache_cluster #(
.INSTANCE_ID ($sformatf("socket%0d-icache", SOCKET_ID)),
.INSTANCE_ID ($sformatf("%s-icache", INSTANCE_ID)),
.NUM_UNITS (`NUM_ICACHES),
.NUM_INPUTS (`SOCKET_SIZE),
.TAG_SEL_IDX (0),
@ -126,7 +132,7 @@ module VX_socket import VX_gpu_pkg::*; #(
`RESET_RELAY (dcache_reset, reset);
VX_cache_cluster #(
.INSTANCE_ID ($sformatf("socket%0d-dcache", SOCKET_ID)),
.INSTANCE_ID ($sformatf("%s-dcache", INSTANCE_ID)),
.NUM_UNITS (`NUM_DCACHES),
.NUM_INPUTS (`SOCKET_SIZE),
.TAG_SEL_IDX (0),
@ -143,8 +149,9 @@ module VX_socket import VX_gpu_pkg::*; #(
.TAG_WIDTH (DCACHE_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.WRITE_ENABLE (1),
.WRITEBACK (`DCACHE_WRITEBACK),
.NC_ENABLE (1),
.CORE_OUT_BUF (`LMEM_ENABLED ? 2 : 1),
.CORE_OUT_BUF (2),
.MEM_OUT_BUF (2)
) dcache (
`ifdef PERF_ENABLE
@ -194,19 +201,19 @@ module VX_socket import VX_gpu_pkg::*; #(
wire [`SOCKET_SIZE-1:0] per_core_busy;
VX_dcr_bus_if core_dcr_bus_if();
`BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1));
`SCOPE_IO_SWITCH (`SOCKET_SIZE)
// Generate all cores
for (genvar i = 0; i < `SOCKET_SIZE; ++i) begin
for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : cores
`RESET_RELAY (core_reset, reset);
VX_core #(
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + i)
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id),
.INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id))
) core (
`SCOPE_IO_BIND (i)
`SCOPE_IO_BIND (scope_core + core_id)
.clk (clk),
.reset (core_reset),
@ -217,15 +224,15 @@ module VX_socket import VX_gpu_pkg::*; #(
.dcr_bus_if (core_dcr_bus_if),
.dcache_bus_if (per_core_dcache_bus_if[i * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
.dcache_bus_if (per_core_dcache_bus_if[core_id * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
.icache_bus_if (per_core_icache_bus_if[i]),
.icache_bus_if (per_core_icache_bus_if[core_id]),
`ifdef GBAR_ENABLE
.gbar_bus_if (per_core_gbar_bus_if[i]),
.gbar_bus_if (per_core_gbar_bus_if[core_id]),
`endif
.busy (per_core_busy[i])
.busy (per_core_busy[core_id])
);
end

View file

@ -85,30 +85,31 @@
`define VX_CSR_MPM_IBUF_ST_H 12'hB85
`define VX_CSR_MPM_SCRB_ST 12'hB06
`define VX_CSR_MPM_SCRB_ST_H 12'hB86
`define VX_CSR_MPM_SCRB_ALU 12'hB07
`define VX_CSR_MPM_SCRB_ALU_H 12'hB87
`define VX_CSR_MPM_SCRB_FPU 12'hB08
`define VX_CSR_MPM_SCRB_FPU_H 12'hB88
`define VX_CSR_MPM_SCRB_LSU 12'hB09
`define VX_CSR_MPM_SCRB_LSU_H 12'hB89
`define VX_CSR_MPM_SCRB_SFU 12'hB0A
`define VX_CSR_MPM_SCRB_SFU_H 12'hB8A
`define VX_CSR_MPM_OPDS_ST 12'hB07
`define VX_CSR_MPM_OPDS_ST_H 12'hB87
`define VX_CSR_MPM_SCRB_ALU 12'hB08
`define VX_CSR_MPM_SCRB_ALU_H 12'hB88
`define VX_CSR_MPM_SCRB_FPU 12'hB09
`define VX_CSR_MPM_SCRB_FPU_H 12'hB89
`define VX_CSR_MPM_SCRB_LSU 12'hB0A
`define VX_CSR_MPM_SCRB_LSU_H 12'hB8A
`define VX_CSR_MPM_SCRB_SFU 12'hB0B
`define VX_CSR_MPM_SCRB_SFU_H 12'hB8B
`define VX_CSR_MPM_SCRB_CSRS 12'hB0C
`define VX_CSR_MPM_SCRB_CSRS_H 12'hB8C
`define VX_CSR_MPM_SCRB_WCTL 12'hB0D
`define VX_CSR_MPM_SCRB_WCTL_H 12'hB8D
// PERF: memory
`define VX_CSR_MPM_IFETCHES 12'hB0B
`define VX_CSR_MPM_IFETCHES_H 12'hB8B
`define VX_CSR_MPM_LOADS 12'hB0C
`define VX_CSR_MPM_LOADS_H 12'hB8C
`define VX_CSR_MPM_STORES 12'hB0D
`define VX_CSR_MPM_STORES_H 12'hB8D
`define VX_CSR_MPM_IFETCH_LT 12'hB0E
`define VX_CSR_MPM_IFETCH_LT_H 12'hB8E
`define VX_CSR_MPM_LOAD_LT 12'hB0F
`define VX_CSR_MPM_LOAD_LT_H 12'hB8F
// SFU: scoreboard
`define VX_CSR_MPM_SCRB_WCTL 12'hB10
`define VX_CSR_MPM_SCRB_WCTL_H 12'hB90
`define VX_CSR_MPM_SCRB_CSRS 12'hB11
`define VX_CSR_MPM_SCRB_CSRS_H 12'hB91
`define VX_CSR_MPM_IFETCHES 12'hB0E
`define VX_CSR_MPM_IFETCHES_H 12'hB8E
`define VX_CSR_MPM_LOADS 12'hB0F
`define VX_CSR_MPM_LOADS_H 12'hB8F
`define VX_CSR_MPM_STORES 12'hB10
`define VX_CSR_MPM_STORES_H 12'hB90
`define VX_CSR_MPM_IFETCH_LT 12'hB11
`define VX_CSR_MPM_IFETCH_LT_H 12'hB91
`define VX_CSR_MPM_LOAD_LT 12'hB12
`define VX_CSR_MPM_LOAD_LT_H 12'hB92
// Machine Performance-monitoring memory counters (class 2) ///////////////////

View file

@ -44,6 +44,11 @@ module Vortex import VX_gpu_pkg::*; (
output wire busy
);
`ifdef SCOPE
localparam scope_cluster = 0;
`SCOPE_IO_SWITCH (`NUM_CLUSTERS);
`endif
`ifdef PERF_ENABLE
VX_mem_perf_if mem_perf_if();
assign mem_perf_if.icache = 'x;
@ -78,6 +83,7 @@ module Vortex import VX_gpu_pkg::*; (
.MREQ_SIZE (`L3_MREQ_SIZE),
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
.WRITE_ENABLE (1),
.WRITEBACK (`L3_WRITEBACK),
.UUID_WIDTH (`UUID_WIDTH),
.CORE_OUT_BUF (2),
.MEM_OUT_BUF (2),
@ -121,19 +127,19 @@ module Vortex import VX_gpu_pkg::*; (
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
`SCOPE_IO_SWITCH (`NUM_CLUSTERS)
// Generate all clusters
for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin
for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters
`RESET_RELAY (cluster_reset, reset);
VX_dcr_bus_if cluster_dcr_bus_if();
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
VX_cluster #(
.CLUSTER_ID (i)
.CLUSTER_ID (cluster_id),
.INSTANCE_ID ($sformatf("cluster%0d", cluster_id))
) cluster (
`SCOPE_IO_BIND (i)
`SCOPE_IO_BIND (scope_cluster + cluster_id)
.clk (clk),
.reset (cluster_reset),
@ -144,9 +150,9 @@ module Vortex import VX_gpu_pkg::*; (
.dcr_bus_if (cluster_dcr_bus_if),
.mem_bus_if (per_cluster_mem_bus_if[i]),
.mem_bus_if (per_cluster_mem_bus_if[cluster_id]),
.busy (per_cluster_busy[i])
.busy (per_cluster_busy[cluster_id])
);
end

View file

@ -5,6 +5,7 @@
// To be done:
// Check how to run this with OPAE. Looks like setup issue
`ifndef NOPAE
`include "platform_if.vh"
@ -85,7 +86,7 @@ module ccip_std_afu #(
t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS];
t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS];
logic avs_write [NUM_LOCAL_MEM_BANKS];
logic avs_read [NUM_LOCAL_MEM_BANKS];
logic avs_read [NUM_LOCAL_MEM_BANKS];
for (genvar b = 0; b < NUM_LOCAL_MEM_BANKS; b++) begin
assign local_mem[b].burstcount = avs_burstcount[b];
@ -94,7 +95,7 @@ module ccip_std_afu #(
assign local_mem[b].byteenable = avs_byteenable[b];
assign local_mem[b].write = avs_write[b];
assign local_mem[b].read = avs_read[b];
assign avs_waitrequest[b] = local_mem[b].waitrequest;
assign avs_readdata[b] = local_mem[b].readdata;
assign avs_readdatavalid[b] = local_mem[b].readdatavalid;
@ -107,7 +108,7 @@ module ccip_std_afu #(
.reset (reset_T1),
.cp2af_sRxPort (cp2af_sRx_T1),
.af2cp_sTxPort (af2cp_sTx_T0),
.af2cp_sTxPort (af2cp_sTx_T0),
.avs_writedata (avs_writedata),
.avs_readdata (avs_readdata),
@ -121,3 +122,5 @@ module ccip_std_afu #(
);
endmodule
`endif

View file

@ -587,7 +587,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.DATA_SIZE (LMEM_DATA_SIZE),
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.TAG_WIDTH (AVS_REQ_TAGW),
.ARBITER ("P"),
.ARBITER ("P"), // prioritize VX requests
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) mem_arb (
@ -692,9 +692,11 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.reset (reset),
.incr (cci_rd_req_fire),
.decr (cci_rdq_pop),
`UNUSED_PIN (empty),
`UNUSED_PIN (alm_empty),
.full (cci_pending_reads_full),
.size (cci_pending_reads),
`UNUSED_PIN (empty)
`UNUSED_PIN (alm_full),
.size (cci_pending_reads)
);
`UNUSED_VAR (cci_pending_reads)
@ -852,7 +854,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.incr (cci_mem_rd_rsp_fire),
.decr (cci_wr_rsp_fire),
.empty (cci_pending_writes_empty),
`UNUSED_PIN (alm_empty),
.full (cci_pending_writes_full),
`UNUSED_PIN (alm_full),
.size (cci_pending_writes)
);
@ -1010,7 +1014,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
// SCOPE //////////////////////////////////////////////////////////////////////
`ifdef DBG_SCOPE_AFU
`ifdef SCOPE
wire mem_req_fire = mem_bus_if[0].req_valid && mem_bus_if[0].req_ready;
wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready;
wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0];
@ -1080,7 +1083,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.bus_in(scope_bus_in_w[0]),
.bus_out(scope_bus_out_w[0])
);
`endif
`else
`SCOPE_IO_UNUSED_W(0)
`endif

View file

@ -311,7 +311,6 @@ module VX_afu_wrap #(
// SCOPE //////////////////////////////////////////////////////////////////////
`ifdef DBG_SCOPE_AFU
`ifdef SCOPE
`define TRIGGERS { \
reset, \
ap_start, \
@ -330,35 +329,17 @@ module VX_afu_wrap #(
VX_scope_tap #(
.SCOPE_ID (0),
.TRIGGERW ($bits(`TRIGGERS)),
.PROBEW ($bits(`PROBES))
.PROBEW ($bits(`PROBES))
) scope_tap (
.clk(clk),
.reset(scope_reset_w[0]),
.start(1'b0),
.stop(1'b0),
.triggers(`TRIGGERS),
.probes(`PROBES),
.bus_in(scope_bus_in_w[0]),
.bus_out(scope_bus_out_w[0])
.clk (clk),
.reset (scope_reset_w[0]),
.start (1'b0),
.stop (1'b0),
.triggers (`TRIGGERS),
.probes (`PROBES),
.bus_in (scope_bus_in_w[0]),
.bus_out (scope_bus_out_w[0])
);
`endif
`ifdef CHIPSCOPE
ila_afu ila_afu_inst (
.clk (ap_clk),
.probe0 ({
ap_start,
ap_done,
ap_idle,
interrupt
}),
.probe1 ({
vx_pending_writes,
vx_busy_wait,
vx_busy,
vx_running
})
);
`endif
`else
`SCOPE_IO_UNUSED_W(0)
`endif

109
hw/rtl/cache/VX_bank_flush.sv vendored Normal file
View file

@ -0,0 +1,109 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_bank_flush #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1,
// Enable cache writeback
parameter WRITEBACK = 0
) (
input wire clk,
input wire reset,
input wire flush_in_valid,
output wire flush_in_ready,
output wire flush_out_init,
output wire flush_out_valid,
output wire [`CS_LINE_SEL_BITS-1:0] flush_out_line,
output wire [NUM_WAYS-1:0] flush_out_way,
input wire flush_out_ready,
input wire mshr_empty
);
parameter CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);
parameter STATE_IDLE = 2'd0;
parameter STATE_INIT = 2'd1;
parameter STATE_FLUSH = 2'd2;
reg [CTR_WIDTH-1:0] counter_r;
reg [1:0] state_r, state_n;
reg flush_in_ready_r, flush_in_ready_n;
always @(*) begin
state_n = state_r;
flush_in_ready_n = 0;
case (state_r)
// STATE_IDLE
default: begin
if (flush_in_valid && mshr_empty) begin
state_n = STATE_FLUSH;
end
end
STATE_INIT: begin
if (counter_r == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
state_n = STATE_IDLE;
end
end
STATE_FLUSH: begin
if (counter_r == ((2 ** CTR_WIDTH)-1)) begin
state_n = STATE_IDLE;
flush_in_ready_n = 1;
end
end
endcase
end
always @(posedge clk) begin
if (reset) begin
state_r <= STATE_INIT;
counter_r <= '0;
flush_in_ready_r <= '0;
end else begin
state_r <= state_n;
flush_in_ready_r <= flush_in_ready_n;
if (state_r != STATE_IDLE) begin
if ((state_r == STATE_INIT) || flush_out_ready) begin
counter_r <= counter_r + CTR_WIDTH'(1);
end
end else begin
counter_r <= '0;
end
end
end
assign flush_in_ready = flush_in_ready_r;
assign flush_out_init = (state_r == STATE_INIT);
assign flush_out_valid = (state_r == STATE_FLUSH);
assign flush_out_line = counter_r[`CS_LINE_SEL_BITS-1:0];
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin
reg [NUM_WAYS-1:0] flush_out_way_r;
always @(*) begin
flush_out_way_r = '0;
flush_out_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
end
assign flush_out_way = flush_out_way_r;
end else begin
assign flush_out_way = {NUM_WAYS{1'b1}};
end
endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,15 +14,15 @@
`include "VX_cache_define.vh"
module VX_cache import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter `STRING INSTANCE_ID = "",
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 4096,
parameter CACHE_SIZE = 4096,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
@ -33,7 +33,7 @@ module VX_cache import VX_gpu_pkg::*; #(
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
parameter MSHR_SIZE = 8,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
@ -42,6 +42,9 @@ module VX_cache import VX_gpu_pkg::*; #(
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Request debug identifier
parameter UUID_WIDTH = 0,
@ -53,12 +56,12 @@ module VX_cache import VX_gpu_pkg::*; #(
// Memory request output register
parameter MEM_OUT_BUF = 0
) (
) (
// PERF
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
input wire clk,
input wire reset,
@ -67,6 +70,7 @@ module VX_cache import VX_gpu_pkg::*; #(
);
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter"))
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
@ -78,36 +82,46 @@ module VX_cache import VX_gpu_pkg::*; #(
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH;
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1;
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1);
localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0;
`ifdef PERF_ENABLE
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
`endif
wire [NUM_REQS-1:0] core_req_valid;
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
wire [NUM_REQS-1:0] core_req_rw;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
wire [NUM_REQS-1:0] core_req_ready;
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (TAG_WIDTH)
) core_bus2_if[NUM_REQS]();
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_valid[i] = core_bus_if[i].req_valid;
assign core_req_rw[i] = core_bus_if[i].req_data.rw;
assign core_req_byteen[i] = core_bus_if[i].req_data.byteen;
assign core_req_addr[i] = core_bus_if[i].req_data.addr;
assign core_req_data[i] = core_bus_if[i].req_data.data;
assign core_req_tag[i] = core_bus_if[i].req_data.tag;
assign core_bus_if[i].req_ready = core_req_ready[i];
`UNUSED_VAR (core_bus_if[i].req_data.atype)
end
wire [NUM_BANKS-1:0] per_bank_flush_valid;
wire [NUM_BANKS-1:0] per_bank_flush_ready;
wire [NUM_BANKS-1:0] per_bank_core_req_fire;
// this reset relay is required to sync with bank initialization
`RESET_RELAY (flush_reset, reset);
VX_cache_flush #(
.NUM_REQS (NUM_REQS),
.NUM_BANKS (NUM_BANKS),
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
) flush_unit (
.clk (clk),
.reset (flush_reset),
.core_bus_in_if (core_bus_if),
.core_bus_out_if (core_bus2_if),
.bank_req_fire (per_bank_core_req_fire),
.flush_valid (per_bank_flush_valid),
.flush_ready (per_bank_flush_ready)
);
///////////////////////////////////////////////////////////////////////////
@ -117,10 +131,10 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
wire [NUM_REQS-1:0] core_rsp_ready_s;
`RESET_RELAY (core_rsp_reset, reset);
for (genvar i = 0; i < NUM_REQS; ++i) begin
`RESET_RELAY (core_rsp_reset, reset);
VX_elastic_buffer #(
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
.SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
@ -131,9 +145,9 @@ module VX_cache import VX_gpu_pkg::*; #(
.valid_in (core_rsp_valid_s[i]),
.ready_in (core_rsp_ready_s[i]),
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
.data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}),
.valid_out (core_bus_if[i].rsp_valid),
.ready_out (core_bus_if[i].rsp_ready)
.data_out ({core_bus2_if[i].rsp_data.data, core_bus2_if[i].rsp_data.tag}),
.valid_out (core_bus2_if[i].rsp_valid),
.ready_out (core_bus2_if[i].rsp_ready)
);
end
@ -146,24 +160,29 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [LINE_SIZE-1:0] mem_req_byteen_s;
wire [`CS_LINE_WIDTH-1:0] mem_req_data_s;
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
wire mem_req_flush_s;
wire mem_req_ready_s;
wire mem_bus_if_flush;
`RESET_RELAY (mem_req_reset, reset);
VX_elastic_buffer #(
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH),
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
) mem_req_buf (
.clk (clk),
.reset (reset),
.valid_in (mem_req_valid_s),
.ready_in (mem_req_ready_s),
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s}),
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}),
.valid_out (mem_bus_if.req_valid),
.reset (mem_req_reset),
.valid_in (mem_req_valid_s),
.ready_in (mem_req_ready_s),
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}),
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag, mem_bus_if_flush}),
.valid_out (mem_bus_if.req_valid),
.ready_out (mem_bus_if.req_ready)
);
assign mem_bus_if.req_data.atype = '0;
assign mem_bus_if.req_data.atype = mem_bus_if_flush ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0;
///////////////////////////////////////////////////////////////////////////
@ -172,44 +191,26 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s;
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
wire mem_rsp_ready_s;
`RESET_RELAY (mem_rsp_reset, reset);
VX_elastic_buffer #(
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
.SIZE (MRSQ_SIZE),
.OUT_REG (MRSQ_SIZE > 2)
) mem_rsp_queue (
.clk (clk),
.reset (reset),
.reset (mem_rsp_reset),
.valid_in (mem_bus_if.rsp_valid),
.ready_in (mem_bus_if.rsp_ready),
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
.data_out ({mem_rsp_tag_s, mem_rsp_data_s}),
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
.data_out ({mem_rsp_tag_s, mem_rsp_data_s}),
.valid_out (mem_rsp_valid_s),
.ready_out (mem_rsp_ready_s)
);
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
wire [`CS_LINE_SEL_BITS-1:0] init_line_sel;
wire init_enable;
// this reset relay is required to sync with bank initialization
`RESET_RELAY (init_reset, reset);
VX_cache_init #(
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS)
) cache_init (
.clk (clk),
.reset (init_reset),
.addr_out (init_line_sel),
.valid_out (init_enable)
);
///////////////////////////////////////////////////////////////////////
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
@ -218,25 +219,28 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx;
wire [NUM_BANKS-1:0] per_bank_core_req_flush;
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx;
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_mem_req_wsel;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen;
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_mem_req_data;
wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen;
wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data;
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
wire [NUM_BANKS-1:0] per_bank_mem_req_flush;
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready;
if (NUM_BANKS == 1) begin
assign mem_rsp_ready_s = per_bank_mem_rsp_ready;
end else begin
@ -245,12 +249,33 @@ module VX_cache import VX_gpu_pkg::*; #(
// Bank requests dispatch
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
wire [NUM_REQS-1:0] core_req_valid;
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
wire [NUM_REQS-1:0] core_req_rw;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
wire [NUM_REQS-1:0] core_req_flush;
wire [NUM_REQS-1:0] core_req_ready;
wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr;
wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid;
wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel;
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_valid[i] = core_bus2_if[i].req_valid;
assign core_req_rw[i] = core_bus2_if[i].req_data.rw;
assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen;
assign core_req_addr[i] = core_bus2_if[i].req_data.addr;
assign core_req_data[i] = core_bus2_if[i].req_data.data;
assign core_req_tag[i] = core_bus2_if[i].req_data.tag;
assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
assign core_bus2_if[i].req_ready = core_req_ready[i];
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
if (WORDS_PER_LINE > 1) begin
assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS];
@ -273,9 +298,11 @@ module VX_cache import VX_gpu_pkg::*; #(
core_req_line_addr[i],
core_req_rw[i],
core_req_wsel[i],
core_req_byteen[i],
core_req_byteen[i],
core_req_data[i],
core_req_tag[i]};
core_req_tag[i],
core_req_flush[i]
};
end
`ifdef PERF_ENABLE
@ -284,12 +311,12 @@ module VX_cache import VX_gpu_pkg::*; #(
`RESET_RELAY (req_xbar_reset, reset);
VX_stream_xbar #(
VX_stream_xbar #(
.NUM_INPUTS (NUM_REQS),
.NUM_OUTPUTS (NUM_BANKS),
.DATAW (CORE_REQ_DATAW),
.PERF_CTR_BITS (`PERF_CTR_BITS),
.OUT_BUF ((NUM_REQS > 4) ? 2 : 0)
.OUT_BUF (REQ_XBAR_BUF)
) req_xbar (
.clk (clk),
.reset (req_xbar_reset),
@ -313,27 +340,29 @@ module VX_cache import VX_gpu_pkg::*; #(
per_bank_core_req_addr[i],
per_bank_core_req_rw[i],
per_bank_core_req_wsel[i],
per_bank_core_req_byteen[i],
per_bank_core_req_byteen[i],
per_bank_core_req_data[i],
per_bank_core_req_tag[i]} = core_req_data_out[i];
per_bank_core_req_tag[i],
per_bank_core_req_flush[i]
} = core_req_data_out[i];
end
// Banks access
for (genvar i = 0; i < NUM_BANKS; ++i) begin
for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks
wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
wire curr_bank_mem_rsp_valid;
if (NUM_BANKS == 1) begin
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s;
end else begin
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == i);
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == bank_id);
end
`RESET_RELAY (bank_reset, reset);
VX_cache_bank #(
.BANK_ID (i),
.INSTANCE_ID (INSTANCE_ID),
VX_cache_bank #(
.BANK_ID (bank_id),
.INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
@ -344,65 +373,66 @@ module VX_cache import VX_gpu_pkg::*; #(
.MSHR_SIZE (MSHR_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF),
.MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF)
) bank (
) bank (
.clk (clk),
.reset (bank_reset),
`ifdef PERF_ENABLE
.perf_read_misses (perf_read_miss_per_bank[i]),
.perf_write_misses (perf_write_miss_per_bank[i]),
.perf_mshr_stalls (perf_mshr_stall_per_bank[i]),
.perf_read_misses (perf_read_miss_per_bank[bank_id]),
.perf_write_misses (perf_write_miss_per_bank[bank_id]),
.perf_mshr_stalls (perf_mshr_stall_per_bank[bank_id]),
`endif
// Core request
.core_req_valid (per_bank_core_req_valid[i]),
.core_req_addr (per_bank_core_req_addr[i]),
.core_req_rw (per_bank_core_req_rw[i]),
.core_req_wsel (per_bank_core_req_wsel[i]),
.core_req_byteen (per_bank_core_req_byteen[i]),
.core_req_data (per_bank_core_req_data[i]),
.core_req_tag (per_bank_core_req_tag[i]),
.core_req_idx (per_bank_core_req_idx[i]),
.core_req_ready (per_bank_core_req_ready[i]),
// Core response
.core_rsp_valid (per_bank_core_rsp_valid[i]),
.core_rsp_data (per_bank_core_rsp_data[i]),
.core_rsp_tag (per_bank_core_rsp_tag[i]),
.core_rsp_idx (per_bank_core_rsp_idx[i]),
.core_rsp_ready (per_bank_core_rsp_ready[i]),
// Core request
.core_req_valid (per_bank_core_req_valid[bank_id]),
.core_req_addr (per_bank_core_req_addr[bank_id]),
.core_req_rw (per_bank_core_req_rw[bank_id]),
.core_req_wsel (per_bank_core_req_wsel[bank_id]),
.core_req_byteen (per_bank_core_req_byteen[bank_id]),
.core_req_data (per_bank_core_req_data[bank_id]),
.core_req_tag (per_bank_core_req_tag[bank_id]),
.core_req_idx (per_bank_core_req_idx[bank_id]),
.core_req_flush (per_bank_core_req_flush[bank_id]),
.core_req_ready (per_bank_core_req_ready[bank_id]),
// Core response
.core_rsp_valid (per_bank_core_rsp_valid[bank_id]),
.core_rsp_data (per_bank_core_rsp_data[bank_id]),
.core_rsp_tag (per_bank_core_rsp_tag[bank_id]),
.core_rsp_idx (per_bank_core_rsp_idx[bank_id]),
.core_rsp_ready (per_bank_core_rsp_ready[bank_id]),
// Memory request
.mem_req_valid (per_bank_mem_req_valid[i]),
.mem_req_valid (per_bank_mem_req_valid[bank_id]),
.mem_req_addr (curr_bank_mem_req_addr),
.mem_req_rw (per_bank_mem_req_rw[i]),
.mem_req_wsel (per_bank_mem_req_wsel[i]),
.mem_req_byteen (per_bank_mem_req_byteen[i]),
.mem_req_data (per_bank_mem_req_data[i]),
.mem_req_id (per_bank_mem_req_id[i]),
.mem_req_ready (per_bank_mem_req_ready[i]),
.mem_req_rw (per_bank_mem_req_rw[bank_id]),
.mem_req_byteen (per_bank_mem_req_byteen[bank_id]),
.mem_req_data (per_bank_mem_req_data[bank_id]),
.mem_req_id (per_bank_mem_req_id[bank_id]),
.mem_req_flush (per_bank_mem_req_flush[bank_id]),
.mem_req_ready (per_bank_mem_req_ready[bank_id]),
// Memory response
.mem_rsp_valid (curr_bank_mem_rsp_valid),
.mem_rsp_data (mem_rsp_data_s),
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
.mem_rsp_ready (per_bank_mem_rsp_ready[i]),
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
// initialization
.init_enable (init_enable),
.init_line_sel (init_line_sel)
.flush_valid (per_bank_flush_valid[bank_id]),
.flush_ready (per_bank_flush_ready[bank_id])
);
if (NUM_BANKS == 1) begin
assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr;
assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr;
end else begin
assign per_bank_mem_req_addr[i] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, i);
assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id);
end
end
end
// Bank responses gather
@ -442,37 +472,41 @@ module VX_cache import VX_gpu_pkg::*; #(
wire mem_req_valid_p;
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p;
wire mem_req_rw_p;
wire [WORD_SEL_WIDTH-1:0] mem_req_wsel_p;
wire [WORD_SIZE-1:0] mem_req_byteen_p;
wire [`CS_WORD_WIDTH-1:0] mem_req_data_p;
wire [LINE_SIZE-1:0] mem_req_byteen_p;
wire [`CS_LINE_WIDTH-1:0] mem_req_data_p;
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_p;
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id_p;
wire mem_req_flush_p;
wire mem_req_ready_p;
// Memory request arbitration
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH)-1:0] data_in;
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign data_in[i] = {per_bank_mem_req_addr[i],
per_bank_mem_req_rw[i],
per_bank_mem_req_wsel[i],
per_bank_mem_req_byteen[i],
per_bank_mem_req_data[i],
per_bank_mem_req_id[i]};
assign data_in[i] = {
per_bank_mem_req_addr[i],
per_bank_mem_req_rw[i],
per_bank_mem_req_byteen[i],
per_bank_mem_req_data[i],
per_bank_mem_req_id[i],
per_bank_mem_req_flush[i]
};
end
`RESET_RELAY (mem_arb_reset, reset);
VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS),
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + MSHR_ADDR_WIDTH),
.ARBITER ("R")
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1),
.ARBITER ("F")
) mem_req_arb (
.clk (clk),
.reset (reset),
.reset (mem_arb_reset),
.valid_in (per_bank_mem_req_valid),
.ready_in (per_bank_mem_req_ready),
.data_in (data_in),
.data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_wsel_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p}),
.data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}),
.valid_out (mem_req_valid_p),
.ready_out (mem_req_ready_p),
`UNUSED_PIN (sel_out)
@ -480,44 +514,28 @@ module VX_cache import VX_gpu_pkg::*; #(
if (NUM_BANKS > 1) begin
wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p);
assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p});
assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p});
end else begin
assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p);
end
end
// Memory request multi-port handling
assign mem_req_valid_s = mem_req_valid_p;
assign mem_req_addr_s = mem_req_addr_p;
assign mem_req_tag_s = mem_req_tag_p;
assign mem_req_flush_s = mem_req_flush_p;
assign mem_req_ready_p = mem_req_ready_s;
if (WRITE_ENABLE != 0) begin
if (`CS_WORDS_PER_LINE > 1) begin
reg [LINE_SIZE-1:0] mem_req_byteen_r;
reg [`CS_LINE_WIDTH-1:0] mem_req_data_r;
always @(*) begin
mem_req_byteen_r = '0;
mem_req_data_r = 'x;
mem_req_byteen_r[mem_req_wsel_p * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p;
mem_req_data_r[mem_req_wsel_p * `CS_WORD_WIDTH +: `CS_WORD_WIDTH] = mem_req_data_p;
end
assign mem_req_rw_s = mem_req_rw_p;
assign mem_req_byteen_s = mem_req_byteen_r;
assign mem_req_data_s = mem_req_data_r;
end else begin
`UNUSED_VAR (mem_req_wsel_p)
assign mem_req_rw_s = mem_req_rw_p;
assign mem_req_byteen_s = mem_req_byteen_p;
assign mem_req_data_s = mem_req_data_p;
end
assign mem_req_rw_s = mem_req_rw_p;
assign mem_req_byteen_s = mem_req_byteen_p;
assign mem_req_data_s = mem_req_data_p;
end else begin
`UNUSED_VAR (mem_req_byteen_p)
`UNUSED_VAR (mem_req_wsel_p)
`UNUSED_VAR (mem_req_data_p)
`UNUSED_VAR (mem_req_rw_p)
assign mem_req_rw_s = 0;
assign mem_req_byteen_s = {LINE_SIZE{1'b1}};
assign mem_req_data_s = '0;
@ -527,10 +545,10 @@ module VX_cache import VX_gpu_pkg::*; #(
// per cycle: core_reads, core_writes
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
wire [NUM_REQS-1:0] perf_core_reads_per_req;
wire [NUM_REQS-1:0] perf_core_writes_per_req;
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
@ -539,16 +557,16 @@ module VX_cache import VX_gpu_pkg::*; #(
`BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw);
`BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw);
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req);
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req);
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
wire [NUM_REQS-1:0] perf_crsp_stall_per_req;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign perf_crsp_stall_per_req[i] = core_bus_if[i].rsp_valid && ~core_bus_if[i].rsp_ready;
assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready;
end
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req);
@ -561,7 +579,7 @@ module VX_cache import VX_gpu_pkg::*; #(
reg [`PERF_CTR_BITS-1:0] perf_write_misses;
reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls;
reg [`PERF_CTR_BITS-1:0] perf_mem_stalls;
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
always @(posedge clk) begin
if (reset) begin

View file

@ -41,6 +41,9 @@ module VX_cache_bank #(
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Request debug identifier
parameter UUID_WIDTH = 0,
@ -69,12 +72,13 @@ module VX_cache_bank #(
// Core Request
input wire core_req_valid,
input wire [`CS_LINE_ADDR_WIDTH-1:0] core_req_addr,
input wire core_req_rw,
input wire [WORD_SEL_WIDTH-1:0] core_req_wsel,
input wire [WORD_SIZE-1:0] core_req_byteen,
input wire [`CS_WORD_WIDTH-1:0] core_req_data,
input wire [TAG_WIDTH-1:0] core_req_tag,
input wire [REQ_SEL_WIDTH-1:0] core_req_idx,
input wire core_req_rw, // write enable
input wire [WORD_SEL_WIDTH-1:0] core_req_wsel, // select the word in a cacheline, e.g. word size = 4 bytes, cacheline size = 64 bytes, it should have log(64/4)= 4 bits
input wire [WORD_SIZE-1:0] core_req_byteen,// which bytes in data to write
input wire [`CS_WORD_WIDTH-1:0] core_req_data, // data to be written
input wire [TAG_WIDTH-1:0] core_req_tag, // identifier of the request (request id)
input wire [REQ_SEL_WIDTH-1:0] core_req_idx, // index of the request in the core request array
input wire core_req_flush, // flush enable
output wire core_req_ready,
// Core Response
@ -88,10 +92,10 @@ module VX_cache_bank #(
output wire mem_req_valid,
output wire [`CS_LINE_ADDR_WIDTH-1:0] mem_req_addr,
output wire mem_req_rw,
output wire [WORD_SEL_WIDTH-1:0] mem_req_wsel,
output wire [WORD_SIZE-1:0] mem_req_byteen,
output wire [`CS_WORD_WIDTH-1:0] mem_req_data,
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
output wire [LINE_SIZE-1:0] mem_req_byteen,
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, // index of the head entry in the mshr
output wire mem_req_flush,
input wire mem_req_ready,
// Memory response
@ -100,9 +104,9 @@ module VX_cache_bank #(
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
output wire mem_rsp_ready,
// initialization
input wire init_enable,
input wire [`CS_LINE_SEL_BITS-1:0] init_line_sel
// flush
input wire flush_valid,
output wire flush_ready
);
localparam PIPELINE_STAGES = 2;
@ -128,23 +132,56 @@ module VX_cache_bank #(
wire [MSHR_ADDR_WIDTH-1:0] replay_id;
wire replay_ready;
wire is_init_st0;
wire is_flush_st0, is_flush_st1;
wire [NUM_WAYS-1:0] flush_way_st0;
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
wire rw_st0, rw_st1;
wire [WORD_SEL_WIDTH-1:0] wsel_st0, wsel_st1;
wire [WORD_SIZE-1:0] byteen_st0, byteen_st1;
wire [REQ_SEL_WIDTH-1:0] req_idx_st0, req_idx_st1;
wire [TAG_WIDTH-1:0] tag_st0, tag_st1;
wire rw_sel, rw_st0, rw_st1;
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1;
wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1;
wire [`CS_WORD_WIDTH-1:0] read_data_st1;
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1;
wire valid_sel, valid_st0, valid_st1;
wire is_init_st0;
wire is_creq_st0, is_creq_st1;
wire is_fill_st0, is_fill_st1;
wire is_replay_st0, is_replay_st1;
wire creq_flush_st0, creq_flush_st1;
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
wire [NUM_WAYS-1:0] tag_matches_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1;
wire mshr_pending_st0, mshr_pending_st1;
wire mshr_empty;
wire line_flush_valid;
wire line_flush_init;
wire [`CS_LINE_SEL_BITS-1:0] line_flush_sel;
wire [NUM_WAYS-1:0] line_flush_way;
wire line_flush_ready;
// flush unit
VX_bank_flush #(
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WRITEBACK (WRITEBACK)
) flush_unit (
.clk (clk),
.reset (reset),
.flush_in_valid (flush_valid),
.flush_in_ready (flush_ready),
.flush_out_init (line_flush_init),
.flush_out_valid (line_flush_valid),
.flush_out_line (line_flush_sel),
.flush_out_way (line_flush_way),
.flush_out_ready (line_flush_ready),
.mshr_empty (mshr_empty)
);
wire rdw_hazard_st0;
reg rdw_hazard_st1;
@ -154,76 +191,77 @@ module VX_cache_bank #(
// inputs arbitration:
// mshr replay has highest priority to maximize utilization since there is no miss.
// handle memory responses next to prevent deadlock with potential memory request from a miss.
wire replay_grant = ~init_enable;
// flush has precedence over core requests to ensure that the cache is in a consistent state.
wire replay_grant = ~line_flush_init;
wire replay_enable = replay_grant && replay_valid;
wire fill_grant = ~init_enable && ~replay_enable;
wire fill_grant = ~line_flush_init && ~replay_enable;
wire fill_enable = fill_grant && mem_rsp_valid;
wire creq_grant = ~init_enable && ~replay_enable && ~fill_enable;
wire flush_grant = ~line_flush_init && ~replay_enable && ~fill_enable;
wire flush_enable = flush_grant && line_flush_valid;
wire creq_grant = ~line_flush_init && ~replay_enable && ~fill_enable && ~flush_enable;
wire creq_enable = creq_grant && core_req_valid;
assign replay_ready = replay_grant
&& ~rdw_hazard_st0
&& ~pipe_stall;
&& ~rdw_hazard_st0
&& ~pipe_stall;
assign mem_rsp_ready = fill_grant
&& ~pipe_stall;
assign core_req_ready = creq_grant
&& ~mreq_queue_alm_full
&& ~mshr_alm_full
&& ~pipe_stall;
assign line_flush_ready = flush_grant
&& ~mreq_queue_alm_full
&& ~pipe_stall;
wire init_fire = init_enable;
assign core_req_ready = creq_grant
&& ~mreq_queue_alm_full
&& ~mshr_alm_full
&& ~pipe_stall;
wire init_fire = line_flush_init;
wire replay_fire = replay_valid && replay_ready;
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
wire flush_fire = line_flush_valid && line_flush_ready;
wire core_req_fire = core_req_valid && core_req_ready;
wire [TAG_WIDTH-1:0] mshr_creq_tag = replay_enable ? replay_tag : core_req_tag;
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
assign tag_sel = replay_valid ? replay_tag : core_req_tag;
assign addr_sel = (line_flush_init | line_flush_valid) ? `CS_LINE_ADDR_WIDTH'(line_flush_sel) :
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
if (WRITE_ENABLE) begin
assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data);
end else begin
assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0];
`UNUSED_VAR (core_req_data)
`UNUSED_VAR (replay_data)
end
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin
assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel
end
if (UUID_WIDTH != 0) begin
assign req_uuid_sel = mshr_creq_tag[TAG_WIDTH-1 -: UUID_WIDTH];
assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign req_uuid_sel = 0;
end
`UNUSED_VAR (mshr_creq_tag)
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || core_req_fire;
assign addr_sel = init_enable ? `CS_LINE_ADDR_WIDTH'(init_line_sel) :
(replay_valid ? replay_addr :
(mem_rsp_valid ? mem_rsp_addr : core_req_addr));
assign data_sel[`CS_WORD_WIDTH-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : (replay_valid ? replay_data : core_req_data);
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin
assign data_sel[i] = mem_rsp_data[i];
end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.data_in ({
valid_sel,
init_enable,
replay_enable,
fill_enable,
creq_enable,
addr_sel,
data_sel,
replay_valid ? replay_rw : core_req_rw,
replay_valid ? replay_byteen : core_req_byteen,
replay_valid ? replay_wsel : core_req_wsel,
replay_valid ? replay_idx : core_req_idx,
replay_valid ? replay_tag : core_req_tag,
replay_id
}),
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_creq_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
.data_in ({valid_sel, line_flush_init, replay_enable, fill_enable, flush_enable, creq_enable, core_req_flush, line_flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
);
if (UUID_WIDTH != 0) begin
@ -232,20 +270,24 @@ module VX_cache_bank #(
assign req_uuid_st0 = 0;
end
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
wire do_fill_st0 = valid_st0 && is_fill_st0;
wire do_init_st0 = valid_st0 && is_init_st0;
wire do_flush_st0 = valid_st0 && is_flush_st0;
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0;
wire do_fill_st0 = valid_st0 && is_fill_st0;
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_init_st0);
wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0;
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
wire [NUM_WAYS-1:0] tag_matches_st0, tag_matches_st1;
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
wire [NUM_WAYS-1:0] repl_way_st0;
wire [`CS_TAG_SEL_BITS-1:0] repl_tag_st0;
`RESET_RELAY (tag_reset, reset);
VX_cache_tags #(
.INSTANCE_ID(INSTANCE_ID),
.INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)),
.BANK_ID (BANK_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
@ -261,30 +303,37 @@ module VX_cache_bank #(
.stall (pipe_stall),
// read/Fill
// init/fill/lookup/flush
.init (do_init_st0 || do_flush_st0),
.fill (do_fill_st0),
.lookup (do_lookup_st0),
.line_addr (addr_st0),
.fill (do_fill_st0),
.init (do_init_st0),
.way_sel (way_sel_st0),
.tag_matches(tag_matches_st0)
.tag_matches(tag_matches_st0),
// replacement
.repl_way (repl_way_st0),
.repl_tag (repl_tag_st0)
);
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
assign way_sel_st0 = is_fill_st0 ? repl_way_st0 : (is_flush_st0 ? flush_way_st0 : tag_matches_st0);
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_r_st0 = (is_fill_st0 || is_flush_st0) ? {repl_tag_st0, addr_st0[`CS_LINE_SEL_BITS-1:0]} : addr_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + NUM_WAYS + 1),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.data_in ({valid_st0, is_replay_st0, is_fill_st0, is_creq_st0, rw_st0, addr_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, tag_matches_st0, way_sel_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_replay_st1, is_fill_st1, is_creq_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, tag_matches_st1, way_sel_st1, mshr_pending_st1})
.data_in ({valid_st0, is_flush_st0, is_replay_st0, is_fill_st0, is_creq_st0, creq_flush_st0, rw_st0, addr_r_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_flush_st1, is_replay_st1, is_fill_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, mshr_pending_st1})
);
// we have a tag hit
wire is_hit_st1 = (| tag_matches_st1);
wire is_hit_st1 = (| way_sel_st1);
if (UUID_WIDTH != 0) begin
assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH];
@ -292,37 +341,62 @@ module VX_cache_bank #(
assign req_uuid_st1 = 0;
end
wire do_creq_rd_st1 = valid_st1 && is_creq_st1 && ~rw_st1;
wire do_creq_wr_st1 = valid_st1 && is_creq_st1 && rw_st1;
wire is_read_st1 = is_creq_st1 && ~rw_st1;
wire is_write_st1 = is_creq_st1 && rw_st1;
wire do_creq_rd_st1 = valid_st1 && is_read_st1;
wire do_creq_wr_st1 = valid_st1 && is_write_st1;
wire do_fill_st1 = valid_st1 && is_fill_st1;
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1;
wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1;
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
wire do_flush_st1 = valid_st1 && is_flush_st1;
`UNUSED_VAR (do_write_miss_st1)
// ensure mshr replay always get a hit
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: invalid mshr replay"));
// detect BRAM's read-during-write hazard
assign rdw_hazard_st0 = do_fill_st0; // after a fill
always @(posedge clk) begin
rdw_hazard_st1 <= (do_creq_rd_st0 && do_write_hit_st1 && (addr_st0 == addr_st1))
&& ~rdw_hazard_st1; // after a write to same address
assign rdw_hazard_st0 = do_fill_st0; // stall cycle after a fill
wire rdw_case1 = do_cache_rd_st0 && do_cache_wr_st1 && (addr_st0 == addr_st1); // standard cache access
wire rdw_case2 = WRITEBACK && (do_flush_st0 || do_fill_st0) && do_cache_wr_st1; // a writeback can evict preceeding write
always @(posedge clk) begin // after a write to same address
rdw_hazard_st1 <= (rdw_case1 || rdw_case2)
&& ~rdw_hazard_st1; // invalidate if pipeline stalled to avoid repeats
end
wire [`CS_WORD_WIDTH-1:0] write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0];
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1;
wire [LINE_SIZE-1:0] write_byteen_st1;
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
wire [LINE_SIZE-1:0] dirty_byteen_st1;
wire dirty_valid_st1;
if (`CS_WORDS_PER_LINE > 1) begin
reg [LINE_SIZE-1:0] write_byteen_r;
always @(*) begin
write_byteen_r = '0;
write_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1;
end
assign write_byteen_st1 = write_byteen_r;
end else begin
assign write_byteen_st1 = byteen_st1;
end
`RESET_RELAY (data_reset, reset);
VX_cache_data #(
.INSTANCE_ID (INSTANCE_ID),
.INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
.BANK_ID (BANK_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
@ -330,6 +404,7 @@ module VX_cache_bank #(
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH)
) cache_data (
.clk (clk),
@ -339,23 +414,38 @@ module VX_cache_bank #(
.stall (pipe_stall),
.read (do_read_hit_st1 || do_replay_rd_st1),
.fill (do_fill_st1),
.write (do_write_hit_st1 || do_replay_wr_st1),
.way_sel (way_sel_st1 | tag_matches_st1),
.read (do_cache_rd_st1),
.fill (do_fill_st1 && ~rdw_hazard_st1),
.flush (do_flush_st1),
.write (do_cache_wr_st1),
.way_sel (way_sel_st1),
.line_addr (addr_st1),
.wsel (wsel_st1),
.byteen (byteen_st1),
.fill_data (fill_data_st1),
.write_data (write_data_st1),
.read_data (read_data_st1)
.write_byteen(write_byteen_st1),
.read_data (read_data_st1),
.dirty_valid(dirty_valid_st1),
.dirty_data (dirty_data_st1),
.dirty_byteen(dirty_byteen_st1)
);
wire [MSHR_SIZE-1:0] mshr_matches_st0;
wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0;
wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0;
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall;
wire mshr_lookup_st0 = mshr_allocate_st0;
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall;
wire mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
// release allocated mshr entry if we had a hit
wire mshr_release_st1;
if (WRITEBACK) begin
assign mshr_release_st1 = is_hit_st1;
end else begin
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
// this can happen when writes are sent late, when the fill was already in flight.
assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
end
VX_pending_size #(
.SIZE (MSHR_SIZE)
@ -364,15 +454,17 @@ module VX_cache_bank #(
.reset (reset),
.incr (core_req_fire),
.decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)),
.empty (mshr_empty),
`UNUSED_PIN (alm_empty),
.full (mshr_alm_full),
`UNUSED_PIN (size),
`UNUSED_PIN (empty)
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
`RESET_RELAY (mshr_reset, reset);
VX_cache_mshr #(
.INSTANCE_ID (INSTANCE_ID),
.INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)),
.BANK_ID (BANK_ID),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
@ -412,7 +504,8 @@ module VX_cache_bank #(
// lookup
.lookup_valid (mshr_lookup_st0),
.lookup_addr (addr_st0),
.lookup_matches (mshr_matches_st0),
.lookup_pending (mshr_lookup_pending_st0),
.lookup_rw (mshr_lookup_rw_st0),
// finalize
.finalize_valid (mshr_finalize_st1),
@ -422,10 +515,12 @@ module VX_cache_bank #(
.finalize_prev (mshr_prev_st1)
);
// ignore allocated id from mshr matches
// check if there are pending requests to same line in the MSHR
wire [MSHR_SIZE-1:0] lookup_matches;
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
assign lookup_matches[i] = (i != mshr_alloc_id_st0) && mshr_matches_st0[i];
assign lookup_matches[i] = mshr_lookup_pending_st0[i]
&& (i != mshr_alloc_id_st0) // exclude current mshr id
&& (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough
end
assign mshr_pending_st0 = (| lookup_matches);
@ -436,7 +531,7 @@ module VX_cache_bank #(
wire [REQ_SEL_WIDTH-1:0] crsp_queue_idx;
wire [TAG_WIDTH-1:0] crsp_queue_tag;
assign crsp_queue_valid = do_read_hit_st1 || do_replay_rd_st1;
assign crsp_queue_valid = do_cache_rd_st1;
assign crsp_queue_idx = req_idx_st1;
assign crsp_queue_data = read_data_st1;
assign crsp_queue_tag = tag_st1;
@ -463,29 +558,40 @@ module VX_cache_bank #(
// schedule memory request
wire mreq_queue_push, mreq_queue_pop, mreq_queue_empty;
wire [`CS_WORD_WIDTH-1:0] mreq_queue_data;
wire [WORD_SIZE-1:0] mreq_queue_byteen;
wire [WORD_SEL_WIDTH-1:0] mreq_queue_wsel;
wire [`CS_LINE_WIDTH-1:0] mreq_queue_data;
wire [LINE_SIZE-1:0] mreq_queue_byteen;
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
wire [MSHR_ADDR_WIDTH-1:0] mreq_queue_id;
wire mreq_queue_rw;
wire mreq_queue_flush;
assign mreq_queue_push = (do_read_miss_st1 && ~mshr_pending_st1)
|| do_creq_wr_st1;
wire is_evict_st1 = (is_fill_st1 || is_flush_st1) && dirty_valid_st1;
wire do_writeback_st1 = valid_st1 && is_evict_st1;
`UNUSED_VAR (do_writeback_st1)
if (WRITEBACK) begin
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|| do_writeback_st1)
&& ~rdw_hazard_st1;
end else begin
`UNUSED_VAR (dirty_valid_st1)
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|| do_creq_wr_st1)
&& ~rdw_hazard_st1;
end
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
assign mreq_queue_rw = WRITE_ENABLE && rw_st1;
assign mreq_queue_rw = WRITE_ENABLE && (WRITEBACK ? is_evict_st1 : rw_st1);
assign mreq_queue_addr = addr_st1;
assign mreq_queue_id = mshr_id_st1;
assign mreq_queue_wsel = wsel_st1;
assign mreq_queue_byteen = byteen_st1;
assign mreq_queue_data = write_data_st1;
assign mreq_queue_data = is_write_st1 ? write_data_st1 : dirty_data_st1;
assign mreq_queue_byteen = is_write_st1 ? write_byteen_st1 : dirty_byteen_st1;
assign mreq_queue_flush = creq_flush_st1;
`RESET_RELAY (mreq_queue_reset, reset);
VX_fifo_queue #(
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH),
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1),
.DEPTH (MREQ_SIZE),
.ALM_FULL (MREQ_SIZE-PIPELINE_STAGES),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
@ -494,8 +600,8 @@ module VX_cache_bank #(
.reset (mreq_queue_reset),
.push (mreq_queue_push),
.pop (mreq_queue_pop),
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_wsel, mreq_queue_data}),
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_wsel, mem_req_data}),
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}),
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_data, mem_req_flush}),
.empty (mreq_queue_empty),
.alm_full (mreq_queue_alm_full),
`UNUSED_PIN (full),
@ -515,35 +621,34 @@ module VX_cache_bank #(
`ifdef DBG_TRACE_CACHE
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid)
&& ~(replay_fire || mem_rsp_fire || core_req_fire);
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid || line_flush_valid)
&& ~(replay_fire || mem_rsp_fire || core_req_fire || line_flush_valid);
always @(posedge clk) begin
if (pipeline_stall) begin
`TRACE(3, ("%d: *** %s-bank%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, BANK_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full));
end
if (init_enable) begin
`TRACE(2, ("%d: %s-bank%0d init: addr=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(init_line_sel, BANK_ID)));
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw_st0=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard_st0));
end
if (mem_rsp_fire) begin
`TRACE(2, ("%d: %s-bank%0d fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
`TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
end
if (replay_fire) begin
`TRACE(2, ("%d: %s-bank%0d mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
`TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
end
if (core_req_fire) begin
if (core_req_rw)
`TRACE(2, ("%d: %s-bank%0d core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
`TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
else
`TRACE(2, ("%d: %s-bank%0d core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
`TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
end
if (crsp_queue_fire) begin
`TRACE(2, ("%d: %s-bank%0d core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
`TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
end
if (mreq_queue_push) begin
if (do_creq_wr_st1)
`TRACE(2, ("%d: %s-bank%0d writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
if (do_creq_wr_st1 && !WRITEBACK)
`TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
else if (do_writeback_st1)
`TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%b, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data));
else
`TRACE(2, ("%d: %s-bank%0d fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1));
`TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1));
end
end
`endif

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -18,16 +18,16 @@ module VX_cache_bypass #(
parameter TAG_SEL_IDX = 0,
parameter PASSTHRU = 0,
parameter NC_ENABLE = 0,
parameter NC_ENABLE = 0,
parameter WORD_SIZE = 1,
parameter LINE_SIZE = 1,
parameter LINE_SIZE = 1,
parameter CORE_ADDR_WIDTH = 1,
parameter CORE_TAG_WIDTH = 1,
parameter MEM_ADDR_WIDTH = 1,
parameter MEM_ADDR_WIDTH = 1,
parameter MEM_TAG_IN_WIDTH = 1,
parameter MEM_TAG_OUT_WIDTH = 1,
@ -35,9 +35,9 @@ module VX_cache_bypass #(
parameter CORE_OUT_BUF = 0,
parameter MEM_OUT_BUF = 0,
parameter CORE_DATA_WIDTH = WORD_SIZE * 8
) (
) (
input wire clk,
input wire reset,
@ -71,40 +71,39 @@ module VX_cache_bypass #(
wire core_req_nc_valid;
wire [NUM_REQS-1:0] core_req_nc_valids;
wire [NUM_REQS-1:0] core_req_nc_idxs;
wire [NUM_REQS-1:0] core_req_nc_idxs;
wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx;
wire [NUM_REQS-1:0] core_req_nc_sel;
wire [NUM_REQS-1:0] core_req_nc_sel;
wire core_req_nc_ready;
for (genvar i = 0; i < NUM_REQS; ++i) begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
if (PASSTHRU != 0) begin
assign core_req_nc_idxs[i] = 1'b1;
end else if (NC_ENABLE) begin
assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO];
end else begin
assign core_req_nc_idxs[i] = 1'b0;
end
end
assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i];
end
end
VX_generic_arbiter #(
.NUM_REQS (NUM_REQS),
.TYPE (PASSTHRU ? "R" : "P"),
.LOCK_ENABLE (1)
.TYPE (PASSTHRU ? "R" : "P")
) core_req_nc_arb (
.clk (clk),
.reset (reset),
.requests (core_req_nc_valids),
.reset (reset),
.requests (core_req_nc_valids),
.grant_index (core_req_nc_idx),
.grant_onehot (core_req_nc_sel),
.grant_valid (core_req_nc_valid),
.grant_unlock (core_req_nc_ready)
.grant_ready (core_req_nc_ready)
);
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i];
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i])
assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i])
: core_bus_out_if[i].req_ready;
end
@ -118,7 +117,7 @@ module VX_cache_bypass #(
wire [`CS_LINE_WIDTH-1:0] mem_req_out_data;
wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag;
wire mem_req_out_ready;
wire core_req_nc_sel_rw;
wire [WORD_SIZE-1:0] core_req_nc_sel_byteen;
wire [CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr;
@ -129,22 +128,22 @@ module VX_cache_bypass #(
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_nc_mux_in[i] = {
core_bus_in_if[i].req_data.rw,
core_bus_in_if[i].req_data.rw,
core_bus_in_if[i].req_data.byteen,
core_bus_in_if[i].req_data.addr,
core_bus_in_if[i].req_data.atype,
core_bus_in_if[i].req_data.data,
core_bus_in_if[i].req_data.tag
core_bus_in_if[i].req_data.tag
};
end
assign {
core_req_nc_sel_rw,
core_req_nc_sel_byteen,
core_req_nc_sel_addr,
core_req_nc_sel_atype,
core_req_nc_sel_data,
core_req_nc_sel_tag
core_req_nc_sel_tag
} = core_req_nc_mux_in[core_req_nc_idx];
assign core_req_nc_ready = ~mem_bus_in_if.req_valid && mem_req_out_ready;
@ -157,11 +156,11 @@ module VX_cache_bypass #(
wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass;
wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0];
if (WORDS_PER_LINE > 1) begin
reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r;
reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0];
always @(*) begin
@ -176,7 +175,7 @@ module VX_cache_bypass #(
assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r;
if (NUM_REQS > 1) begin
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id});
end else begin
end else begin
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id});
end
end else begin
@ -189,7 +188,7 @@ module VX_cache_bypass #(
end
end
wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass;
wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass;
if (UUID_WIDTH != 0) begin
assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass};
@ -202,7 +201,7 @@ module VX_cache_bypass #(
`UNUSED_VAR (mem_bus_in_if.req_data.tag)
end else begin
if (NC_ENABLE) begin
VX_bits_insert #(
VX_bits_insert #(
.N (MEM_TAG_OUT_WIDTH-1),
.S (1),
.POS (TAG_SEL_IDX)
@ -213,8 +212,8 @@ module VX_cache_bypass #(
);
end else begin
assign mem_req_out_tag = mem_bus_in_if.req_data.tag;
end
end
end
end
assign mem_bus_in_if.req_ready = mem_req_out_ready;
@ -225,11 +224,11 @@ module VX_cache_bypass #(
) mem_req_buf (
.clk (clk),
.reset (reset),
.valid_in (mem_req_out_valid),
.ready_in (mem_req_out_ready),
.valid_in (mem_req_out_valid),
.ready_in (mem_req_out_ready),
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}),
.data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.atype, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}),
.valid_out (mem_bus_out_if.req_valid),
.valid_out (mem_bus_out_if.req_valid),
.ready_out (mem_bus_out_if.req_ready)
);
@ -253,7 +252,7 @@ module VX_cache_bypass #(
wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc;
VX_bits_remove #(
VX_bits_remove #(
.N (MEM_TAG_OUT_WIDTH),
.S (NC_ENABLE),
.POS (TAG_SEL_IDX)
@ -265,10 +264,10 @@ module VX_cache_bypass #(
wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx;
if (NUM_REQS > 1) begin
assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS];
end else begin
end else begin
assign rsp_idx = 1'b0;
end
reg [NUM_REQS-1:0] rsp_nc_valid_r;
always @(*) begin
rsp_nc_valid_r = '0;
@ -277,13 +276,13 @@ module VX_cache_bypass #(
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i];
assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i];
assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i];
end
if (WORDS_PER_LINE > 1) begin
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ?
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ?
core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
end
end else begin
@ -306,7 +305,7 @@ module VX_cache_bypass #(
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2;
end else begin
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag;
end
end
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
@ -320,7 +319,7 @@ module VX_cache_bypass #(
.valid_in (core_rsp_in_valid[i]),
.ready_in (core_rsp_in_ready[i]),
.data_in ({core_rsp_in_data[i], core_rsp_in_tag[i]}),
.data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}),
.data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}),
.valid_out (core_bus_in_if[i].rsp_valid),
.ready_out (core_bus_in_if[i].rsp_ready)
);
@ -341,7 +340,7 @@ module VX_cache_bypass #(
assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data;
assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc;
end
wire [NUM_REQS-1:0] core_rsp_out_valid;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid;

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -24,20 +24,20 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
parameter MSHR_SIZE = 8,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
@ -46,6 +46,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Request debug identifier
parameter UUID_WIDTH = 0,
@ -60,7 +63,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
// Memory request output buffer
parameter MEM_OUT_BUF = 0
) (
) (
input wire clk,
input wire reset,
@ -74,17 +77,16 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
);
localparam NUM_CACHES = `UP(NUM_UNITS);
localparam PASSTHRU = (NUM_UNITS == 0);
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
`STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter"))
`ifdef PERF_ENABLE
cache_perf_t perf_cache_tmp[1], perf_cache_unit[NUM_CACHES];
`PERF_CACHE_ADD (perf_cache_tmp, perf_cache_unit, 1, NUM_CACHES)
assign cache_perf = perf_cache_tmp[0];
cache_perf_t perf_cache_unit[NUM_CACHES];
`PERF_CACHE_ADD (cache_perf, perf_cache_unit, NUM_CACHES)
`endif
VX_mem_bus_if #(
@ -97,8 +99,6 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
.TAG_WIDTH (ARB_TAG_WIDTH)
) arb_core_bus_if[NUM_CACHES * NUM_REQS]();
`RESET_RELAY (arb_reset, reset);
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
@ -114,6 +114,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
end
`RESET_RELAY (arb_reset, reset);
VX_mem_arb #(
.NUM_INPUTS (NUM_INPUTS),
.NUM_OUTPUTS (NUM_CACHES),
@ -135,9 +137,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
end
end
`RESET_RELAY (cache_reset, reset);
for (genvar i = 0; i < NUM_CACHES; ++i) begin : caches
for (genvar i = 0; i < NUM_CACHES; ++i) begin
`RESET_RELAY (cache_reset, reset);
VX_cache_wrap #(
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)),
@ -152,6 +154,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (ARB_TAG_WIDTH),
.TAG_SEL_IDX (TAG_SEL_IDX),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,17 +17,19 @@ module VX_cache_data #(
parameter `STRING INSTANCE_ID= "",
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 1,
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Request debug identifier
parameter UUID_WIDTH = 0
) (
@ -41,59 +43,100 @@ module VX_cache_data #(
input wire stall,
input wire read,
input wire fill,
input wire fill,
input wire flush,
input wire write,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel,
input wire [WORD_SIZE-1:0] byteen,
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
input wire [`CS_WORD_WIDTH-1:0] write_data,
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] write_data,
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
input wire [NUM_WAYS-1:0] way_sel,
output wire [`CS_WORD_WIDTH-1:0] read_data
output wire [`CS_WORD_WIDTH-1:0] read_data,
output wire dirty_valid,
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
output wire [LINE_SIZE-1:0] dirty_byteen
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_PARAM (WORD_SIZE)
`UNUSED_VAR (reset)
`UNUSED_VAR (stall)
`UNUSED_VAR (line_addr)
`UNUSED_VAR (read)
`UNUSED_VAR (flush)
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
if (WRITEBACK) begin
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0][LINE_SIZE-1:0] dirty_bytes_r;
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0] dirty_blocks_r;
wire [`CLOG2(`CS_LINES_PER_BANK * NUM_WAYS)-1:0] way_addr;
if (NUM_WAYS > 1) begin
assign way_addr = {line_sel, way_idx};
end else begin
assign way_addr = line_sel;
end
always @(posedge clk) begin
if (fill) begin
dirty_bytes_r[way_addr] <= '0;
end else if (write) begin
dirty_bytes_r[way_addr] <= dirty_bytes_r[way_addr] | write_byteen;
end
end
always @(posedge clk) begin
if (reset) begin
for (integer i = 0; i < `CS_LINES_PER_BANK * NUM_WAYS; ++i) begin
dirty_blocks_r[i] <= 0;
end
end else begin
if (fill) begin
dirty_blocks_r[way_addr] <= 0;
end else if (write) begin
dirty_blocks_r[way_addr] <= 1;
end
end
end
assign dirty_byteen = dirty_bytes_r[way_addr];
assign dirty_valid = dirty_blocks_r[way_addr];
end else begin
assign dirty_byteen = '0;
assign dirty_valid = 0;
end
// order the data layout to perform ways multiplexing last.
// this allows converting way index to binary in parallel with BRAM read.
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
wire [BYTEENW-1:0] wren;
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
reg [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata_r;
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
always @(*) begin
wdata_r = {`CS_WORDS_PER_LINE{write_data}};
wren_r = '0;
wren_r[wsel] = byteen;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
assign wdata[i] = (fill || !WRITE_ENABLE) ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{write_data[i]}};
end
// order the data layout to perform ways multiplexing last
// this allows performing onehot encoding of the way index in parallel with BRAM read.
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
assign wdata[i] = fill ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{wdata_r[i]}};
for (genvar j = 0; j < NUM_WAYS; ++j) begin
assign wren_w[i][j] = (fill ? {WORD_SIZE{1'b1}} : wren_r[i])
& {WORD_SIZE{((NUM_WAYS == 1) || way_sel[j])}};
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
end
end
assign wren = wren_w;
end else begin
`UNUSED_VAR (write)
`UNUSED_VAR (byteen)
`UNUSED_VAR (write_byteen)
`UNUSED_VAR (write_data)
assign wdata = fill_data;
assign wren = fill;
end
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
VX_onehot_encoder #(
.N (NUM_WAYS)
@ -105,8 +148,6 @@ module VX_cache_data #(
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata;
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK),
@ -119,34 +160,41 @@ module VX_cache_data #(
.wren (wren),
.addr (line_sel),
.wdata (wdata),
.rdata (rdata)
.rdata (rdata)
);
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
if (`CS_WORDS_PER_LINE > 1) begin
assign per_way_rdata = rdata[wsel];
end else begin
`UNUSED_VAR (wsel)
assign per_way_rdata = rdata;
end
end
assign read_data = per_way_rdata[way_idx];
`UNUSED_VAR (stall)
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] dirty_data_w;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
for (genvar j = 0; j < NUM_WAYS; ++j) begin
assign dirty_data_w[j][i] = rdata[i][j];
end
end
assign dirty_data = dirty_data_w[way_idx];
`ifdef DBG_TRACE_CACHE
always @(posedge clk) begin
always @(posedge clk) begin
if (fill && ~stall) begin
`TRACE(3, ("%d: %s-bank%0d data-fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
end
if (flush && ~stall) begin
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b, byteen=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_valid, dirty_byteen));
end
if (read && ~stall) begin
`TRACE(3, ("%d: %s-bank%0d data-read: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, read_data, req_uuid));
end
`TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid));
end
if (write && ~stall) begin
`TRACE(3, ("%d: %s-bank%0d data-write: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, byteen, write_data, req_uuid));
end
end
`TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid));
end
end
`endif
endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,7 +14,7 @@
`ifndef VX_CACHE_DEFINE_VH
`define VX_CACHE_DEFINE_VH
`include "VX_define.vh"
`include "VX_define.vh"
`define CS_REQ_SEL_BITS `CLOG2(NUM_REQS)
@ -50,7 +50,7 @@
`define CS_TAG_SEL_ADDR_START (1+`CS_LINE_SEL_ADDR_END)
`define CS_TAG_SEL_ADDR_END (`CS_WORD_ADDR_WIDTH-1)
`define CS_LINE_TAG_ADDR(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS]
`define CS_LINE_ADDR_TAG(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS]
///////////////////////////////////////////////////////////////////////////////
@ -64,14 +64,14 @@
///////////////////////////////////////////////////////////////////////////////
`define PERF_CACHE_ADD(dst, src, dcount, scount) \
`PERF_COUNTER_ADD (dst, src, reads, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
`PERF_COUNTER_ADD (dst, src, writes, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
`PERF_COUNTER_ADD (dst, src, read_misses, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
`PERF_COUNTER_ADD (dst, src, write_misses, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
`PERF_COUNTER_ADD (dst, src, bank_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
`PERF_COUNTER_ADD (dst, src, mshr_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
`PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \
`PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1))
`define PERF_CACHE_ADD(dst, src, count) \
`PERF_COUNTER_ADD (dst, src, reads, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, writes, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, read_misses, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, write_misses, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, bank_stalls, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, mshr_stalls, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, count, (count > 1))
`endif // VX_CACHE_DEFINE_VH

154
hw/rtl/cache/VX_cache_flush.sv vendored Normal file
View file

@ -0,0 +1,154 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_flush #(
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Number of banks
parameter NUM_BANKS = 1,
// Bank select latency
parameter BANK_SEL_LATENCY = 1
) (
input wire clk,
input wire reset,
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
input wire [NUM_BANKS-1:0] bank_req_fire,
output wire [NUM_BANKS-1:0] flush_valid,
input wire [NUM_BANKS-1:0] flush_ready
);
localparam STATE_IDLE = 0;
localparam STATE_WAIT = 1;
localparam STATE_FLUSH = 2;
localparam STATE_DONE = 3;
// track in-flight core requests
wire no_inflight_reqs;
if (BANK_SEL_LATENCY != 0) begin
localparam NUM_REQS_W = `CLOG2(NUM_REQS+1);
localparam NUM_BANKS_W = `CLOG2(NUM_BANKS+1);
wire [NUM_REQS-1:0] core_bus_out_fire;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_bus_out_fire[i] = core_bus_out_if[i].req_valid && core_bus_out_if[i].req_ready;
end
wire [NUM_REQS_W-1:0] core_bus_out_cnt;
wire [NUM_BANKS_W-1:0] bank_req_cnt;
`POP_COUNT(core_bus_out_cnt, core_bus_out_fire);
`POP_COUNT(bank_req_cnt, bank_req_fire);
`UNUSED_VAR (core_bus_out_cnt)
VX_pending_size #(
.SIZE (BANK_SEL_LATENCY * NUM_BANKS),
.INCRW (NUM_BANKS_W),
.DECRW (NUM_BANKS_W)
) pending_size (
.clk (clk),
.reset (reset),
.incr (NUM_BANKS_W'(core_bus_out_cnt)),
.decr (bank_req_cnt),
.empty (no_inflight_reqs),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end else begin
assign no_inflight_reqs = 0;
`UNUSED_VAR (bank_req_fire)
end
reg [1:0] state, state_n;
reg [NUM_BANKS-1:0] flush_done, flush_done_n;
wire [NUM_REQS-1:0] flush_req_mask;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
end
wire flush_req_enable = (| flush_req_mask);
reg [NUM_REQS-1:0] lock_released, lock_released_n;
for (genvar i = 0; i < NUM_REQS; ++i) begin
wire input_enable = ~flush_req_enable || lock_released[i];
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && input_enable;
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
assign core_bus_in_if[i].req_ready = core_bus_out_if[i].req_ready && input_enable;
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_bus_in_if[i].rsp_valid = core_bus_out_if[i].rsp_valid;
assign core_bus_in_if[i].rsp_data = core_bus_out_if[i].rsp_data;
assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready;
end
wire [NUM_REQS-1:0] core_bus_out_ready;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready;
end
always @(*) begin
state_n = state;
flush_done_n = flush_done;
lock_released_n = lock_released;
case (state)
STATE_IDLE: begin
if (flush_req_enable) begin
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT : STATE_FLUSH;
end
end
STATE_WAIT: begin
if (no_inflight_reqs) begin
state_n = STATE_FLUSH;
end
end
STATE_FLUSH: begin
flush_done_n = flush_done | flush_ready;
if (flush_done_n == 0) begin
state_n = STATE_DONE;
lock_released_n = flush_req_mask;
end
end
STATE_DONE: begin
lock_released_n = lock_released & ~core_bus_out_ready;
if (lock_released_n == 0) begin
state_n = STATE_IDLE;
end
end
endcase
end
always @(posedge clk) begin
if (reset) begin
state <= STATE_IDLE;
flush_done <= '0;
lock_released <= '0;
end else begin
state <= state_n;
flush_done <= flush_done_n;
lock_released <= lock_released_n;
end
end
assign flush_valid = {NUM_BANKS{state == STATE_FLUSH}};
endmodule

View file

@ -13,6 +13,7 @@
`include "VX_cache_define.vh"
// cache flush unit
module VX_cache_init #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,

View file

@ -104,7 +104,8 @@ module VX_cache_mshr #(
// lookup
input wire lookup_valid,
input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr,
output wire [MSHR_SIZE-1:0] lookup_matches,
output wire [MSHR_SIZE-1:0] lookup_pending,
output wire [MSHR_SIZE-1:0] lookup_rw,
// finalize
input wire finalize_valid,
@ -216,13 +217,13 @@ module VX_cache_mshr #(
next_table <= next_table_n;
end
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s-bank%0d inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID,
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid))
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s-bank%0d invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID,
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s-bank%0d invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, BANK_ID,
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
VX_dp_ram #(
@ -251,7 +252,9 @@ module VX_cache_mshr #(
assign dequeue_rw = write_table[dequeue_id_r];
assign dequeue_id = dequeue_id_r;
assign lookup_matches = addr_matches & ~write_table;
// return pending entries for the given cache line
assign lookup_pending = addr_matches;
assign lookup_rw = write_table;
`UNUSED_VAR (lookup_valid)
@ -264,22 +267,22 @@ module VX_cache_mshr #(
show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire;
end
if (allocate_fire)
`TRACE(3, ("%d: %s-bank%0d mshr-allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
`TRACE(3, ("%d: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid));
if (lookup_valid)
`TRACE(3, ("%d: %s-bank%0d mshr-lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_matches, lkp_req_uuid));
`TRACE(3, ("%d: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid));
if (finalize_valid)
`TRACE(3, ("%d: %s-bank%0d mshr-finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
`TRACE(3, ("%d: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid));
if (fill_valid)
`TRACE(3, ("%d: %s-bank%0d mshr-fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, BANK_ID,
`TRACE(3, ("%d: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id));
if (dequeue_fire)
`TRACE(3, ("%d: %s-bank%0d mshr-dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
`TRACE(3, ("%d: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid));
if (show_table) begin
`TRACE(3, ("%d: %s-bank%0d mshr-table", $time, INSTANCE_ID, BANK_ID));
`TRACE(3, ("%d: %s table", $time, INSTANCE_ID));
for (integer i = 0; i < MSHR_SIZE; ++i) begin
if (valid_table[i]) begin
`TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID)));

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,15 +17,15 @@ module VX_cache_tags #(
parameter `STRING INSTANCE_ID = "",
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 1,
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1,
parameter NUM_WAYS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
parameter WORD_SIZE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0
) (
@ -38,45 +38,63 @@ module VX_cache_tags #(
input wire stall,
// read/fill
// init/fill/lookup
input wire init,
input wire fill,
input wire lookup,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire fill,
input wire init,
output wire [NUM_WAYS-1:0] way_sel,
output wire [NUM_WAYS-1:0] tag_matches
output wire [NUM_WAYS-1:0] tag_matches,
// replacement
output wire [NUM_WAYS-1:0] repl_way,
output wire [`CS_TAG_SEL_BITS-1:0] repl_tag
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_VAR (reset)
`UNUSED_VAR (lookup)
// valid, tag
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS;
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_TAG_ADDR(line_addr);
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
wire [NUM_WAYS-1:0] read_valid;
if (NUM_WAYS > 1) begin
reg [NUM_WAYS-1:0] repl_way;
reg [NUM_WAYS-1:0] repl_way_r;
// cyclic assignment of replacement way
always @(posedge clk) begin
if (reset) begin
repl_way <= 1;
repl_way_r <= 1;
end else if (~stall) begin // hold the value on stalls prevent filling different slots twice
repl_way <= {repl_way[NUM_WAYS-2:0], repl_way[NUM_WAYS-1]};
repl_way_r <= {repl_way_r[NUM_WAYS-2:0], repl_way_r[NUM_WAYS-1]};
end
end
for (genvar i = 0; i < NUM_WAYS; ++i) begin
assign way_sel[i] = fill && repl_way[i];
end
assign repl_way = repl_way_r;
VX_onehot_mux #(
.DATAW (`CS_TAG_SEL_BITS),
.N (NUM_WAYS)
) repl_tag_sel (
.data_in (read_tag),
.sel_in (repl_way_r),
.data_out (repl_tag)
);
end else begin
`UNUSED_VAR (stall)
assign way_sel = fill;
assign repl_way = 1'b1;
assign repl_tag = read_tag;
end
for (genvar i = 0; i < NUM_WAYS; ++i) begin
wire [`CS_TAG_SEL_BITS-1:0] read_tag;
wire read_valid;
wire do_fill = fill && repl_way[i];
wire do_write = init || do_fill;
wire line_valid = ~init;
VX_sp_ram #(
.DATAW (TAG_WIDTH),
@ -85,32 +103,34 @@ module VX_cache_tags #(
) tag_store (
.clk (clk),
.read (1'b1),
.write (way_sel[i] || init),
`UNUSED_PIN (wren),
.write (do_write),
`UNUSED_PIN (wren),
.addr (line_sel),
.wdata ({~init, line_tag}),
.rdata ({read_valid, read_tag})
.wdata ({line_valid, line_tag}),
.rdata ({read_valid[i], read_tag[i]})
);
assign tag_matches[i] = read_valid && (line_tag == read_tag);
end
for (genvar i = 0; i < NUM_WAYS; ++i) begin
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
end
`ifdef DBG_TRACE_CACHE
always @(posedge clk) begin
if (fill && ~stall) begin
`TRACE(3, ("%d: %s-bank%0d tag-fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag));
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), repl_way, line_sel, line_tag));
end
if (init) begin
`TRACE(3, ("%d: %s-bank%0d tag-init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
`TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
end
if (lookup && ~stall) begin
if (tag_matches != 0) begin
`TRACE(3, ("%d: %s-bank%0d tag-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag, req_uuid));
`TRACE(3, ("%d: %s hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
end else begin
`TRACE(3, ("%d: %s-bank%0d tag-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
`TRACE(3, ("%d: %s miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
end
end
end
end
end
`endif
endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -23,20 +23,20 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
// Size of cache in bytes
parameter CACHE_SIZE = 4096,
parameter CACHE_SIZE = 4096,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 4,
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
parameter MSHR_SIZE = 8,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
@ -45,6 +45,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Request debug identifier
parameter UUID_WIDTH = 0,
@ -63,7 +66,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
// Memory request output buffer
parameter MEM_OUT_BUF = 0
) (
input wire clk,
input wire reset,
@ -80,7 +83,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
@ -98,7 +101,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
) mem_bus_cache_if();
if (NC_OR_BYPASS) begin
`RESET_RELAY (nc_bypass_reset, reset);
VX_cache_bypass #(
@ -108,13 +111,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
.PASSTHRU (PASSTHRU),
.NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE),
.WORD_SIZE (WORD_SIZE),
.WORD_SIZE (WORD_SIZE),
.LINE_SIZE (LINE_SIZE),
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
.CORE_TAG_WIDTH (TAG_WIDTH),
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
.MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH),
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH),
@ -132,15 +135,15 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
.mem_bus_in_if (mem_bus_cache_if),
.mem_bus_out_if (mem_bus_if)
);
end else begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
end
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if);
end
end
if (PASSTHRU != 0) begin
@ -152,7 +155,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
assign core_bus_cache_if[i].rsp_valid = 0;
assign core_bus_cache_if[i].rsp_data = '0;
`UNUSED_VAR (core_bus_cache_if[i].rsp_ready)
end
end
assign mem_bus_cache_if.req_valid = 0;
assign mem_bus_cache_if.req_data = '0;
@ -183,6 +186,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
@ -195,8 +199,8 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
`endif
.core_bus_if (core_bus_cache_if),
.mem_bus_if (mem_bus_cache_if)
);
);
end
`ifdef DBG_TRACE_CACHE
@ -225,9 +229,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
end
if (core_rsp_fire) begin
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
end
end
end
end
end
wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid;
wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid;
@ -246,17 +250,17 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
always @(posedge clk) begin
if (mem_req_fire) begin
if (mem_bus_if.req_data.rw)
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
else
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
end
if (mem_rsp_fire) begin
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
end
end
end
`endif
endmodule

View file

@ -14,7 +14,7 @@
`include "VX_define.vh"
module VX_alu_int #(
parameter CORE_ID = 0,
parameter `STRING INSTANCE_ID = "",
parameter BLOCK_IDX = 0,
parameter NUM_LANES = 1
) (
@ -29,7 +29,7 @@ module VX_alu_int #(
VX_branch_ctl_if.master branch_ctl_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam LANE_BITS = `CLOG2(NUM_LANES);
localparam LANE_WIDTH = `UP(LANE_BITS);
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
@ -121,7 +121,7 @@ module VX_alu_int #(
case ({is_alu_w, op_class})
3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
3'b001: alu_result[i] = sub_slt_br_result; // SUB, SLTU, SLTI, BR*
3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO*
3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO*
3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI
3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW
3'b101: alu_result[i] = sub_result_w[i]; // SUBW
@ -181,7 +181,7 @@ module VX_alu_int #(
.clk (clk),
.reset (reset),
.enable (1'b1),
.data_in ({br_enable, br_wid, br_taken, br_dest}),
.data_in ({br_enable, br_wid, br_taken, br_dest}),
.data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest})
);
@ -193,9 +193,9 @@ module VX_alu_int #(
`ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin
if (branch_ctl_if.valid) begin
`TRACE(1, ("%d: core%0d-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
$time, CORE_ID, branch_ctl_if.wid, {commit_if.data.PC, 1'b0}, branch_ctl_if.taken, {branch_ctl_if.dest, 1'b0}, commit_if.data.uuid));
if (br_enable) begin
`TRACE(1, ("%d: %s-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
$time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid));
end
end
`endif

View file

@ -14,7 +14,7 @@
`include "VX_define.vh"
module VX_alu_muldiv #(
parameter CORE_ID = 0,
parameter `STRING INSTANCE_ID = "",
parameter NUM_LANES = 1
) (
input wire clk,
@ -26,7 +26,7 @@ module VX_alu_muldiv #(
// Outputs
VX_commit_if.master commit_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam TAG_WIDTH = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + PID_WIDTH + 1 + 1;
@ -69,7 +69,7 @@ module VX_alu_muldiv #(
wire mul_fire_in = mul_valid_in && mul_ready_in;
for (genvar i = 0; i < NUM_LANES; ++i) begin
wire [`XLEN-1:0] mul_resultl, mul_resulth;
reg [`XLEN-1:0] mul_resultl, mul_resulth;
wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i];
wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i];
always @(*) begin
@ -235,7 +235,7 @@ module VX_alu_muldiv #(
wire div_fire_in = div_valid_in && div_ready_in;
for (genvar i = 0; i < NUM_LANES; ++i) begin
wire [`XLEN-1:0] div_quotient, div_remainder;
reg [`XLEN-1:0] div_quotient, div_remainder;
always @(*) begin
dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder);
end

View file

@ -14,7 +14,7 @@
`include "VX_define.vh"
module VX_alu_unit #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
@ -27,7 +27,7 @@ module VX_alu_unit #(
VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS]
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam BLOCK_SIZE = `NUM_ALU_BLOCKS;
localparam NUM_LANES = `NUM_ALU_LANES;
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
@ -75,7 +75,7 @@ module VX_alu_unit #(
`RESET_RELAY (int_reset, block_reset);
VX_alu_int #(
.CORE_ID (CORE_ID),
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
.BLOCK_IDX (block_idx),
.NUM_LANES (NUM_LANES)
) alu_int (
@ -90,59 +90,61 @@ module VX_alu_unit #(
VX_execute_if #(
.NUM_LANES (NUM_LANES)
) mdv_execute_if();
) muldiv_execute_if();
VX_commit_if #(
.NUM_LANES (NUM_LANES)
) mdv_commit_if();
) muldiv_commit_if();
assign mdv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
assign mdv_execute_if.data = per_block_execute_if[block_idx].data;
assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
assign muldiv_execute_if.data = per_block_execute_if[block_idx].data;
`RESET_RELAY (mdv_reset, block_reset);
`RESET_RELAY (muldiv_reset, block_reset);
VX_alu_muldiv #(
.CORE_ID (CORE_ID),
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
.NUM_LANES (NUM_LANES)
) mdv_unit (
) muldiv_unit (
.clk (clk),
.reset (mdv_reset),
.execute_if (mdv_execute_if),
.commit_if (mdv_commit_if)
.reset (muldiv_reset),
.execute_if (muldiv_execute_if),
.commit_if (muldiv_commit_if)
);
`endif
assign per_block_execute_if[block_idx].ready =
`ifdef EXT_M_ENABLE
is_muldiv_op ? mdv_execute_if.ready :
is_muldiv_op ? muldiv_execute_if.ready :
`endif
int_execute_if.ready;
// send response
`RESET_RELAY (arb_reset, block_reset);
VX_stream_arb #(
.NUM_INPUTS (RSP_ARB_SIZE),
.DATAW (RSP_ARB_DATAW),
.OUT_BUF (PARTIAL_BW ? 1 : 3)
) rsp_arb (
.clk (clk),
.reset (block_reset),
.reset (arb_reset),
.valid_in ({
`ifdef EXT_M_ENABLE
mdv_commit_if.valid,
muldiv_commit_if.valid,
`endif
int_commit_if.valid
}),
.ready_in ({
`ifdef EXT_M_ENABLE
mdv_commit_if.ready,
muldiv_commit_if.ready,
`endif
int_commit_if.ready
}),
.data_in ({
`ifdef EXT_M_ENABLE
mdv_commit_if.data,
muldiv_commit_if.data,
`endif
int_commit_if.data
}),

View file

@ -13,8 +13,8 @@
`include "VX_define.vh"
module VX_commit import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
@ -27,7 +27,7 @@ module VX_commit import VX_gpu_pkg::*; #(
VX_commit_csr_if.master commit_csr_if,
VX_commit_sched_if.master commit_sched_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1;
localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1);
localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1;
@ -36,12 +36,10 @@ module VX_commit import VX_gpu_pkg::*; #(
VX_commit_if commit_arb_if[`ISSUE_WIDTH]();
wire [`ISSUE_WIDTH-1:0] commit_fire;
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] commit_wid;
wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] commit_tmask;
wire [`ISSUE_WIDTH-1:0] commit_eop;
`RESET_RELAY (arb_reset, reset);
wire [`ISSUE_WIDTH-1:0] per_issue_commit_fire;
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] per_issue_commit_wid;
wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] per_issue_commit_tmask;
wire [`ISSUE_WIDTH-1:0] per_issue_commit_eop;
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
@ -55,6 +53,8 @@ module VX_commit import VX_gpu_pkg::*; #(
assign commit_if[j * `ISSUE_WIDTH + i].ready = ready_in[j];
end
`RESET_RELAY (arb_reset, reset);
VX_stream_arb #(
.NUM_INPUTS (`NUM_EX_UNITS),
.DATAW (DATAW),
@ -72,10 +72,10 @@ module VX_commit import VX_gpu_pkg::*; #(
`UNUSED_PIN (sel_out)
);
assign commit_fire[i] = commit_arb_if[i].valid && commit_arb_if[i].ready;
assign commit_tmask[i]= {`NUM_THREADS{commit_fire[i]}} & commit_arb_if[i].data.tmask;
assign commit_wid[i] = commit_arb_if[i].data.wid;
assign commit_eop[i] = commit_arb_if[i].data.eop;
assign per_issue_commit_fire[i] = commit_arb_if[i].valid && commit_arb_if[i].ready;
assign per_issue_commit_tmask[i]= {`NUM_THREADS{per_issue_commit_fire[i]}} & commit_arb_if[i].data.tmask;
assign per_issue_commit_wid[i] = commit_arb_if[i].data.wid;
assign per_issue_commit_eop[i] = commit_arb_if[i].data.eop;
end
// CSRs update
@ -84,11 +84,11 @@ module VX_commit import VX_gpu_pkg::*; #(
wire [COMMIT_ALL_SIZEW-1:0] commit_size_all_r, commit_size_all_rr;
wire commit_fire_any, commit_fire_any_r, commit_fire_any_rr;
assign commit_fire_any = (| commit_fire);
assign commit_fire_any = (| per_issue_commit_fire);
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
wire [COMMIT_SIZEW-1:0] count;
`POP_COUNT(count, commit_tmask[i]);
`POP_COUNT(count, per_issue_commit_tmask[i]);
assign commit_size[i] = count;
end
@ -136,19 +136,28 @@ module VX_commit import VX_gpu_pkg::*; #(
end
assign commit_csr_if.instret = instret;
// Committed instructions
// Track committed instructions
wire [`ISSUE_WIDTH-1:0] committed = commit_fire & commit_eop;
reg [`NUM_WARPS-1:0] committed_warps;
always @(*) begin
committed_warps = 0;
for (integer i = 0; i < `ISSUE_WIDTH; ++i) begin
if (per_issue_commit_fire[i] && per_issue_commit_eop[i]) begin
committed_warps[per_issue_commit_wid[i]] = 1;
end
end
end
VX_pipe_register #(
.DATAW (`ISSUE_WIDTH * (1 + `NW_WIDTH)),
.RESETW (`ISSUE_WIDTH)
.DATAW (`NUM_WARPS),
.RESETW (`NUM_WARPS)
) committed_pipe_reg (
.clk (clk),
.reset (reset),
.enable (1'b1),
.data_in ({committed, commit_wid}),
.data_out ({commit_sched_if.committed, commit_sched_if.committed_wid})
.data_in (committed_warps),
.data_out ({commit_sched_if.committed_warps})
);
// Writeback
@ -171,7 +180,7 @@ module VX_commit import VX_gpu_pkg::*; #(
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
always @(posedge clk) begin
if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}));
`TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}));
trace_ex_type(1, j);
`TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop));
`TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS);

View file

@ -18,7 +18,8 @@
`endif
module VX_core import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
parameter CORE_ID = 0,
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
@ -94,13 +95,14 @@ module VX_core import VX_gpu_pkg::*; #(
`SCOPE_IO_SWITCH (3)
VX_schedule #(
.INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)),
.CORE_ID (CORE_ID)
) schedule (
.clk (clk),
.reset (schedule_reset),
`ifdef PERF_ENABLE
.perf_schedule_if (pipeline_perf_if.schedule),
.sched_perf (pipeline_perf_if.sched),
`endif
.base_dcrs (base_dcrs),
@ -121,7 +123,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_fetch #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-fetch", INSTANCE_ID))
) fetch (
`SCOPE_IO_BIND (0)
.clk (clk),
@ -132,7 +134,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_decode #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID))
) decode (
.clk (clk),
.reset (decode_reset),
@ -142,7 +144,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_issue #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-issue", INSTANCE_ID))
) issue (
`SCOPE_IO_BIND (1)
@ -150,7 +152,7 @@ module VX_core import VX_gpu_pkg::*; #(
.reset (issue_reset),
`ifdef PERF_ENABLE
.perf_issue_if (pipeline_perf_if.issue),
.issue_perf (pipeline_perf_if.issue),
`endif
.decode_if (decode_if),
@ -159,6 +161,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_execute #(
.INSTANCE_ID ($sformatf("%s-execute", INSTANCE_ID)),
.CORE_ID (CORE_ID)
) execute (
`SCOPE_IO_BIND (2)
@ -186,7 +189,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_commit #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID))
) commit (
.clk (clk),
.reset (commit_reset),
@ -210,7 +213,7 @@ module VX_core import VX_gpu_pkg::*; #(
`RESET_RELAY (lmem_unit_reset, reset);
VX_lmem_unit #(
.CORE_ID (CORE_ID)
.INSTANCE_ID (INSTANCE_ID)
) lmem_unit (
.clk (clk),
.reset (lmem_unit_reset),
@ -229,20 +232,20 @@ module VX_core import VX_gpu_pkg::*; #(
`endif
VX_lsu_mem_if #(
.NUM_LANES (DCACHE_CHANNELS),
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH)
) dcache_coalesced_if[`NUM_LSU_BLOCKS]();
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin
VX_lsu_mem_if #(
.NUM_LANES (DCACHE_CHANNELS),
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH)
) dcache_coalesced_if();
`RESET_RELAY (coalescer_reset, reset);
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
`RESET_RELAY (mem_coalescer_reset, reset);
VX_mem_coalescer #(
.INSTANCE_ID ($sformatf("core%0d-coalescer", CORE_ID)),
.INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)),
.NUM_REQS (`NUM_LSU_LANES),
.DATA_IN_SIZE (LSU_WORD_SIZE),
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),
@ -251,9 +254,9 @@ module VX_core import VX_gpu_pkg::*; #(
.TAG_WIDTH (LSU_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.QUEUE_SIZE (`LSUQ_OUT_SIZE)
) coalescer (
) mem_coalescer (
.clk (clk),
.reset (coalescer_reset),
.reset (mem_coalescer_reset),
// Input request
.in_req_valid (lsu_dcache_if[i].req_valid),
@ -274,42 +277,37 @@ module VX_core import VX_gpu_pkg::*; #(
.in_rsp_ready (lsu_dcache_if[i].rsp_ready),
// Output request
.out_req_valid (dcache_coalesced_if[i].req_valid),
.out_req_mask (dcache_coalesced_if[i].req_data.mask),
.out_req_rw (dcache_coalesced_if[i].req_data.rw),
.out_req_byteen (dcache_coalesced_if[i].req_data.byteen),
.out_req_addr (dcache_coalesced_if[i].req_data.addr),
.out_req_atype (dcache_coalesced_if[i].req_data.atype),
.out_req_data (dcache_coalesced_if[i].req_data.data),
.out_req_tag (dcache_coalesced_if[i].req_data.tag),
.out_req_ready (dcache_coalesced_if[i].req_ready),
.out_req_valid (dcache_coalesced_if.req_valid),
.out_req_mask (dcache_coalesced_if.req_data.mask),
.out_req_rw (dcache_coalesced_if.req_data.rw),
.out_req_byteen (dcache_coalesced_if.req_data.byteen),
.out_req_addr (dcache_coalesced_if.req_data.addr),
.out_req_atype (dcache_coalesced_if.req_data.atype),
.out_req_data (dcache_coalesced_if.req_data.data),
.out_req_tag (dcache_coalesced_if.req_data.tag),
.out_req_ready (dcache_coalesced_if.req_ready),
// Output response
.out_rsp_valid (dcache_coalesced_if[i].rsp_valid),
.out_rsp_mask (dcache_coalesced_if[i].rsp_data.mask),
.out_rsp_data (dcache_coalesced_if[i].rsp_data.data),
.out_rsp_tag (dcache_coalesced_if[i].rsp_data.tag),
.out_rsp_ready (dcache_coalesced_if[i].rsp_ready)
.out_rsp_valid (dcache_coalesced_if.rsp_valid),
.out_rsp_mask (dcache_coalesced_if.rsp_data.mask),
.out_rsp_data (dcache_coalesced_if.rsp_data.data),
.out_rsp_tag (dcache_coalesced_if.rsp_data.tag),
.out_rsp_ready (dcache_coalesced_if.rsp_ready)
);
end else begin
`ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]);
end
end else begin
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
`ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if[i], lsu_dcache_if[i]);
end
end
`RESET_RELAY (lsu_adapter_reset, reset);
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
VX_mem_bus_if #(
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH)
) dcache_bus_tmp_if[DCACHE_CHANNELS]();
`RESET_RELAY (lsu_adapter_reset, reset);
VX_lsu_adapter #(
.NUM_LANES (DCACHE_CHANNELS),
.DATA_SIZE (DCACHE_WORD_SIZE),
@ -320,15 +318,17 @@ module VX_core import VX_gpu_pkg::*; #(
) lsu_adapter (
.clk (clk),
.reset (lsu_adapter_reset),
.lsu_mem_if (dcache_coalesced_if[i]),
.lsu_mem_if (dcache_coalesced_if),
.mem_bus_if (dcache_bus_tmp_if)
);
for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]);
end
end
`ifdef PERF_ENABLE
wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle;

View file

@ -144,6 +144,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
`endif
VX_core #(
.INSTANCE_ID ($sformatf("core")),
.CORE_ID (CORE_ID)
) core (
`SCOPE_IO_BIND (0)

View file

@ -26,13 +26,13 @@
addr+12'h80 : dst = 32'(src[$bits(src)-1:32])
`endif
module VX_csr_data
import VX_gpu_pkg::*;
`ifdef EXT_F_ENABLE
import VX_fpu_pkg::*;
`endif
#(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0
) (
input wire clk,
@ -147,7 +147,7 @@ import VX_fpu_pkg::*;
mscratch <= write_data;
end
default: begin
`ASSERT(0, ("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid));
`ASSERT(0, ("%t: *** %s invalid CSR write address: %0h (#%0d)", $time, INSTANCE_ID, write_addr, write_uuid));
end
endcase
end
@ -212,21 +212,21 @@ import VX_fpu_pkg::*;
`VX_DCR_MPM_CLASS_CORE: begin
case (read_addr)
// PERF: pipeline
`CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched_idles);
`CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched_stalls);
`CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.ibf_stalls);
`CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.scb_stalls);
`CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_ALU]);
`CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched.idles);
`CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched.stalls);
`CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.issue.ibf_stalls);
`CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.issue.scb_stalls);
`CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_r, pipeline_perf_if.issue.opd_stalls);
`CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_ALU]);
`ifdef EXT_F_ENABLE
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_FPU]);
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_FPU]);
`else
`VX_CSR_MPM_SCRB_FPU : read_data_ro_r = '0;
`VX_CSR_MPM_SCRB_FPU_H : read_data_ro_r = '0;
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, `PERF_CTR_BITS'(0));
`endif
`CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_LSU]);
`CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_SFU]);
`CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.sfu_uses[`SFU_CSRS]);
`CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.sfu_uses[`SFU_WCTL]);
`CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_LSU]);
`CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_SFU]);
`CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]);
`CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]);
// PERF: memory
`CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_r, pipeline_perf_if.ifetches);
`CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_r, pipeline_perf_if.loads);

View file

@ -14,6 +14,7 @@
`include "VX_define.vh"
module VX_csr_unit import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0,
parameter NUM_LANES = 1
) (
@ -36,7 +37,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
VX_execute_if.slave execute_if,
VX_commit_if.master commit_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
@ -72,7 +73,8 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
wire csr_write_enable = (execute_if.data.op_type == `INST_SFU_CSRRW);
VX_csr_data #(
.CORE_ID (CORE_ID)
.INSTANCE_ID (INSTANCE_ID),
.CORE_ID (CORE_ID)
) csr_data (
.clk (clk),
.reset (reset),

View file

@ -12,9 +12,8 @@
// limitations under the License.
`include "VX_define.vh"
`include "VX_trace.vh"
module VX_dcr_data import VX_gpu_pkg::*; (
module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; (
input wire clk,
input wire reset,

View file

@ -12,7 +12,6 @@
// limitations under the License.
`include "VX_define.vh"
`include "VX_trace.vh"
`ifdef EXT_F_ENABLE
`define USED_IREG(x) \
@ -28,8 +27,8 @@
use_``x = 1
`endif
module VX_decode import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
@ -44,7 +43,7 @@ module VX_decode import VX_gpu_pkg::*; #(
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + (`NR_BITS * 4);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
@ -145,6 +144,12 @@ module VX_decode import VX_gpu_pkg::*; #(
end
`endif
`STATIC_ASSERT($bits(alu_args_t) == $bits(op_args_t), ("alu_args_t size mismatch: current=%0d, expected=%0d", $bits(alu_args_t), $bits(op_args_t)));
`STATIC_ASSERT($bits(fpu_args_t) == $bits(op_args_t), ("fpu_args_t size mismatch: current=%0d, expected=%0d", $bits(fpu_args_t), $bits(op_args_t)));
`STATIC_ASSERT($bits(lsu_args_t) == $bits(op_args_t), ("lsu_args_t size mismatch: current=%0d, expected=%0d", $bits(lsu_args_t), $bits(op_args_t)));
`STATIC_ASSERT($bits(csr_args_t) == $bits(op_args_t), ("csr_args_t size mismatch: current=%0d, expected=%0d", $bits(csr_args_t), $bits(op_args_t)));
`STATIC_ASSERT($bits(wctl_args_t) == $bits(op_args_t), ("wctl_args_t size mismatch: current=%0d, expected=%0d", $bits(wctl_args_t), $bits(op_args_t)));
always @(*) begin
ex_type = '0;
@ -552,7 +557,7 @@ module VX_decode import VX_gpu_pkg::*; #(
`ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin
if (decode_if.valid && decode_if.ready) begin
`TRACE(1, ("%d: core%0d-decode: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, CORE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr));
`TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr));
trace_ex_type(1, decode_if.data.ex_type);
`TRACE(1, (", op="));
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);

View file

@ -12,10 +12,9 @@
// limitations under the License.
`include "VX_define.vh"
`include "VX_trace.vh"
module VX_dispatch import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
@ -24,12 +23,12 @@ module VX_dispatch import VX_gpu_pkg::*; #(
output wire [`PERF_CTR_BITS-1:0] perf_stalls [`NUM_EX_UNITS],
`endif
// inputs
VX_operands_if.slave operands_if [`ISSUE_WIDTH],
VX_operands_if.slave operands_if,
// outputs
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH]
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS]
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH;
@ -38,104 +37,71 @@ module VX_dispatch import VX_gpu_pkg::*; #(
assign tids[i] = `NT_WIDTH'(i);
end
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
wire [`NT_WIDTH-1:0] last_active_tid;
wire [`NT_WIDTH-1:0] last_active_tid;
VX_find_first #(
.N (`NUM_THREADS),
.DATAW (`NT_WIDTH),
.REVERSE (1)
) last_tid_select (
.valid_in (operands_if.data.tmask),
.data_in (tids),
.data_out (last_active_tid),
`UNUSED_PIN (valid_out)
);
VX_find_first #(
.N (`NUM_THREADS),
.DATAW (`NT_WIDTH),
.REVERSE (1)
) last_tid_select (
.valid_in (operands_if[i].data.tmask),
.data_in (tids),
.data_out (last_active_tid),
`UNUSED_PIN (valid_out)
wire [`NUM_EX_UNITS-1:0] operands_reset;
assign operands_if.ready = operands_reset[operands_if.data.ex_type];
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
`RESET_RELAY (buffer_reset, reset);
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (2),
.OUT_REG (2), // 2-cycle EB for area reduction
.LUTRAM (1)
) buffer (
.clk (clk),
.reset (buffer_reset),
.valid_in (operands_if.valid && (operands_if.data.ex_type == `EX_BITS'(i))),
.ready_in (operands_reset[i]),
.data_in ({
operands_if.data.uuid,
operands_if.data.wis,
operands_if.data.tmask,
operands_if.data.PC,
operands_if.data.op_type,
operands_if.data.op_args,
operands_if.data.wb,
operands_if.data.rd,
last_active_tid,
operands_if.data.rs1_data,
operands_if.data.rs2_data,
operands_if.data.rs3_data
}),
.data_out (dispatch_if[i].data),
.valid_out (dispatch_if[i].valid),
.ready_out (dispatch_if[i].ready)
);
wire [`NUM_EX_UNITS-1:0] operands_reset;
`RESET_RELAY (buf_reset, reset);
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (2),
.OUT_REG (2)
) buffer (
.clk (clk),
.reset (buf_reset),
.valid_in (operands_if[i].valid && (operands_if[i].data.ex_type == j)),
.ready_in (operands_reset[j]),
.data_in (`TO_DISPATCH_DATA(operands_if[i].data, last_active_tid)),
.data_out (dispatch_if[j * `ISSUE_WIDTH + i].data),
.valid_out (dispatch_if[j * `ISSUE_WIDTH + i].valid),
.ready_out (dispatch_if[j * `ISSUE_WIDTH + i].ready)
);
end
assign operands_if[i].ready = operands_reset[operands_if[i].data.ex_type];
end
`ifdef PERF_ENABLE
wire [`NUM_EX_UNITS-1:0] perf_unit_stalls_per_cycle, perf_unit_stalls_per_cycle_r;
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_stalls_per_cycle;
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_r;
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
always @(*) begin
perf_issue_unit_stalls_per_cycle[i] = '0;
if (operands_if[i].valid && ~operands_if[i].ready) begin
perf_issue_unit_stalls_per_cycle[i][operands_if[i].data.ex_type] = 1;
end
end
end
VX_reduce #(
.DATAW_IN (`NUM_EX_UNITS),
.N (`ISSUE_WIDTH),
.OP ("|")
) reduce (
.data_in (perf_issue_unit_stalls_per_cycle),
.data_out (perf_unit_stalls_per_cycle)
);
`BUFFER(perf_unit_stalls_per_cycle_r, perf_unit_stalls_per_cycle);
wire operands_if_stall = operands_if.valid && ~operands_if.ready;
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
always @(posedge clk) begin
if (reset) begin
perf_stalls_r[i] <= '0;
end else begin
perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(perf_unit_stalls_per_cycle_r[i]);
perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(operands_if_stall && operands_if.data.ex_type == `EX_BITS'(i));
end
end
end
for (genvar i=0; i < `NUM_EX_UNITS; ++i) begin
assign perf_stalls[i] = perf_stalls_r[i];
end
`endif
`ifdef DBG_TRACE_PIPELINE
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
always @(posedge clk) begin
if (operands_if[i].valid && operands_if[i].ready) begin
`TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, wis_to_wid(operands_if[i].data.wis, i), {operands_if[i].data.PC, 1'b0}));
trace_ex_type(1, operands_if[i].data.ex_type);
`TRACE(1, (", op="));
trace_ex_op(1, operands_if[i].data.ex_type, operands_if[i].data.op_type, operands_if[i].data.op_args);
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if[i].data.tmask, operands_if[i].data.wb, operands_if[i].data.rd));
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs1_data, `NUM_THREADS);
`TRACE(1, (", rs2_data="));
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs2_data, `NUM_THREADS);
`TRACE(1, (", rs3_data="));
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs3_data, `NUM_THREADS);
trace_op_args(1, operands_if[i].data.ex_type, operands_if[i].data.op_type, operands_if[i].data.op_args);
`TRACE(1, (" (#%0d)\n", operands_if[i].data.uuid));
end
end
end
`endif
endmodule

View file

@ -14,6 +14,7 @@
`include "VX_define.vh"
module VX_execute import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0
) (
`SCOPE_IO_DECL
@ -55,7 +56,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`RESET_RELAY (sfu_reset, reset);
VX_alu_unit #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID))
) alu_unit (
.clk (clk),
.reset (alu_reset),
@ -67,7 +68,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`SCOPE_IO_SWITCH (1)
VX_lsu_unit #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID))
) lsu_unit (
`SCOPE_IO_BIND (0)
.clk (clk),
@ -81,7 +82,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`RESET_RELAY (fpu_reset, reset);
VX_fpu_unit #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID))
) fpu_unit (
.clk (clk),
.reset (fpu_reset),
@ -92,6 +93,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`endif
VX_sfu_unit #(
.INSTANCE_ID ($sformatf("%s-sfu", INSTANCE_ID)),
.CORE_ID (CORE_ID)
) sfu_unit (
.clk (clk),

View file

@ -14,7 +14,7 @@
`include "VX_define.vh"
module VX_fetch import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
@ -30,7 +30,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
// outputs
VX_fetch_if.master fetch_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_VAR (reset)
wire icache_req_valid;
@ -78,9 +78,11 @@ module VX_fetch import VX_gpu_pkg::*; #(
.reset (reset),
.incr (icache_req_fire && schedule_if.data.wid == i),
.decr (fetch_if.ibuf_pop[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (alm_empty),
.full (pending_ibuf_full[i]),
`UNUSED_PIN (size),
`UNUSED_PIN (empty)
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
wire ibuf_ready = ~pending_ibuf_full[schedule_if.data.wid];
@ -89,7 +91,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
`endif
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, {schedule_if.data.PC, 1'b0}, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
("%t: *** %s invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, INSTANCE_ID, {schedule_if.data.PC, 1'b0}, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
// Icache Request
@ -129,45 +131,33 @@ module VX_fetch import VX_gpu_pkg::*; #(
assign icache_bus_if.rsp_ready = fetch_if.ready;
`ifdef DBG_SCOPE_FETCH
if (CORE_ID == 0) begin
`ifdef SCOPE
wire schedule_fire = schedule_if.valid && schedule_if.ready;
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
VX_scope_tap #(
.SCOPE_ID (1),
.TRIGGERW (4),
.PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
(ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH)
) scope_tap (
.clk(clk),
.reset(scope_reset),
.start(1'b0),
.stop(1'b0),
.triggers({
reset,
schedule_fire,
icache_req_fire,
icache_rsp_fire
}),
.probes({
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
}),
.bus_in(scope_bus_in),
.bus_out(scope_bus_out)
);
`endif
`ifdef CHIPSCOPE
ila_fetch ila_fetch_inst (
.clk (clk),
.probe0 ({reset, schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, schedule_if.ready, schedule_if.valid}),
.probe1 ({icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.req_ready, icache_bus_if.req_valid}),
.probe2 ({icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid})
);
`endif
end
wire schedule_fire = schedule_if.valid && schedule_if.ready;
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
VX_scope_tap #(
.SCOPE_ID (1),
.TRIGGERW (4),
.PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
(ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH)
) scope_tap (
.clk (clk),
.reset (scope_reset),
.start (1'b0),
.stop (1'b0),
.triggers ({
reset,
schedule_fire,
icache_req_fire,
icache_rsp_fire
}),
.probes ({
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
}),
.bus_in (scope_bus_in),
.bus_out (scope_bus_out)
);
`else
`SCOPE_IO_UNUSED()
`endif
@ -177,10 +167,10 @@ module VX_fetch import VX_gpu_pkg::*; #(
wire fetch_fire = fetch_if.valid && fetch_if.ready;
always @(posedge clk) begin
if (schedule_fire) begin
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid));
`TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid));
end
if (fetch_fire) begin
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
`TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
end
end
`endif

View file

@ -14,7 +14,7 @@
`include "VX_fpu_define.vh"
module VX_fpu_unit import VX_fpu_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
@ -26,7 +26,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
VX_commit_if.master commit_if [`ISSUE_WIDTH],
VX_fpu_csr_if.master fpu_csr_if[`NUM_FPU_BLOCKS]
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam BLOCK_SIZE = `NUM_FPU_BLOCKS;
localparam NUM_LANES = `NUM_FPU_LANES;
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
@ -84,12 +84,14 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
wire execute_fire = per_block_execute_if[block_idx].valid && per_block_execute_if[block_idx].ready;
wire fpu_rsp_fire = fpu_rsp_valid && fpu_rsp_ready;
`RESET_RELAY (ibuf_reset, block_reset);
VX_index_buffer #(
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + PID_WIDTH + 1 + 1),
.SIZE (`FPUQ_SIZE)
) tag_store (
.clk (clk),
.reset (block_reset),
.reset (ibuf_reset),
.acquire_en (execute_fire),
.write_addr (fpu_req_tag),
.write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}),
@ -226,12 +228,14 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
// send response
`RESET_RELAY (rsp_reset, block_reset);
VX_elastic_buffer #(
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
.SIZE (0)
) rsp_buf (
.clk (clk),
.reset (block_reset),
.reset (rsp_reset),
.valid_in (fpu_rsp_valid),
.ready_in (fpu_rsp_ready),
.data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),

View file

@ -14,33 +14,36 @@
`include "VX_define.vh"
module VX_ibuffer import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output wire [`PERF_CTR_BITS-1:0] perf_stalls,
`endif
// inputs
VX_decode_if.slave decode_if,
// outputs
VX_ibuffer_if.master ibuffer_if [`NUM_WARPS]
VX_ibuffer_if.master ibuffer_if [PER_ISSUE_WARPS]
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4);
wire [`NUM_WARPS-1:0] ibuf_ready_in;
wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in;
assign decode_if.ready = ibuf_ready_in[decode_if.data.wid];
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`IBUF_SIZE),
.OUT_REG (2) // use a 2-cycle FIFO
.OUT_REG (2) // 2-cycle EB for area reduction
) instr_buf (
.clk (clk),
.reset (reset),
.valid_in (decode_if.valid && decode_if.data.wid == i),
.valid_in (decode_if.valid && decode_if.data.wid == ISSUE_WIS_W'(w)),
.data_in ({
decode_if.data.uuid,
decode_if.data.tmask,
@ -52,15 +55,32 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
decode_if.data.rd,
decode_if.data.rs1,
decode_if.data.rs2,
decode_if.data.rs3}),
.ready_in (ibuf_ready_in[i]),
.valid_out(ibuffer_if[i].valid),
.data_out (ibuffer_if[i].data),
.ready_out(ibuffer_if[i].ready)
decode_if.data.rs3
}),
.ready_in (ibuf_ready_in[w]),
.valid_out(ibuffer_if[w].valid),
.data_out (ibuffer_if[w].data),
.ready_out(ibuffer_if[w].ready)
);
`ifndef L1_ENABLE
assign decode_if.ibuf_pop[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
assign decode_if.ibuf_pop[w] = ibuffer_if[w].valid && ibuffer_if[w].ready;
`endif
end
`ifdef PERF_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
wire decode_if_stall = decode_if.valid && ~decode_if.ready;
always @(posedge clk) begin
if (reset) begin
perf_ibf_stalls <= '0;
end else begin
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(decode_if_stall);
end
end
assign perf_stalls = perf_ibf_stalls;
`endif
endmodule

View file

@ -12,10 +12,9 @@
// limitations under the License.
`include "VX_define.vh"
`include "VX_trace.vh"
module VX_issue #(
parameter CORE_ID = 0
module VX_issue import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
@ -23,137 +22,81 @@ module VX_issue #(
input wire reset,
`ifdef PERF_ENABLE
VX_pipeline_perf_if.issue perf_issue_if,
output issue_perf_t issue_perf,
`endif
VX_decode_if.slave decode_if,
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH]
);
VX_ibuffer_if ibuffer_if [`NUM_WARPS]();
VX_scoreboard_if scoreboard_if [`ISSUE_WIDTH]();
VX_operands_if operands_if [`ISSUE_WIDTH]();
`RESET_RELAY (ibuf_reset, reset);
`RESET_RELAY (scoreboard_reset, reset);
`RESET_RELAY (operands_reset, reset);
`RESET_RELAY (dispatch_reset, reset);
VX_ibuffer #(
.CORE_ID (CORE_ID)
) ibuffer (
.clk (clk),
.reset (ibuf_reset),
.decode_if (decode_if),
.ibuffer_if (ibuffer_if)
);
VX_scoreboard #(
.CORE_ID (CORE_ID)
) scoreboard (
.clk (clk),
.reset (scoreboard_reset),
`ifdef PERF_ENABLE
.perf_scb_stalls(perf_issue_if.scb_stalls),
.perf_units_uses(perf_issue_if.units_uses),
.perf_sfu_uses (perf_issue_if.sfu_uses),
`endif
.writeback_if (writeback_if),
.ibuffer_if (ibuffer_if),
.scoreboard_if (scoreboard_if)
);
VX_operands #(
.CORE_ID (CORE_ID)
) operands (
.clk (clk),
.reset (operands_reset),
.writeback_if (writeback_if),
.scoreboard_if (scoreboard_if),
.operands_if (operands_if)
);
VX_dispatch #(
.CORE_ID (CORE_ID)
) dispatch (
.clk (clk),
.reset (dispatch_reset),
`ifdef PERF_ENABLE
`UNUSED_PIN (perf_stalls),
`endif
.operands_if (operands_if),
.dispatch_if (dispatch_if)
);
`ifdef DBG_SCOPE_ISSUE
if (CORE_ID == 0) begin
`ifdef SCOPE
wire operands_if_fire = operands_if[0].valid && operands_if[0].ready;
wire operands_if_not_ready = ~operands_if[0].ready;
wire writeback_if_valid = writeback_if[0].valid;
VX_scope_tap #(
.SCOPE_ID (2),
.TRIGGERW (4),
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS +
1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) +
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
) scope_tap (
.clk(clk),
.reset(scope_reset),
.start(1'b0),
.stop(1'b0),
.triggers({
reset,
operands_if_fire,
operands_if_not_ready,
writeback_if_valid
}),
.probes({
operands_if[0].data.uuid,
operands_if[0].data.tmask,
operands_if[0].data.ex_type,
operands_if[0].data.op_type,
operands_if[0].data.wb,
operands_if[0].data.rd,
operands_if[0].data.rs1_data,
operands_if[0].data.rs2_data,
operands_if[0].data.rs3_data,
writeback_if[0].data.uuid,
writeback_if[0].data.tmask,
writeback_if[0].data.rd,
writeback_if[0].data.data,
writeback_if[0].data.eop
}),
.bus_in(scope_bus_in),
.bus_out(scope_bus_out)
);
`endif
`ifdef CHIPSCOPE
ila_issue ila_issue_inst (
.clk (clk),
.probe0 ({operands_if.uuid, ibuffer.rs3, ibuffer.rs2, ibuffer.rs1, operands_if.PC, operands_if.tmask, operands_if.wid, operands_if.ex_type, operands_if.op_type, operands_if.ready, operands_if.valid}),
.probe1 ({writeback_if.uuid, writeback_if.data[0], writeback_if.PC, writeback_if.tmask, writeback_if.wid, writeback_if.eop, writeback_if.valid})
);
`endif
end
`else
`SCOPE_IO_UNUSED()
`endif
`ifdef PERF_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
wire decode_stall = decode_if.valid && ~decode_if.ready;
always @(posedge clk) begin
if (reset) begin
perf_ibf_stalls <= '0;
end else begin
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(decode_stall);
end
issue_perf_t per_issue_perf [`ISSUE_WIDTH];
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, ibf_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, scb_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, opd_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, units_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
end
for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, sfu_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
end
assign perf_issue_if.ibf_stalls = perf_ibf_stalls;
`endif
wire [ISSUE_ISW_W-1:0] decode_isw = wid_to_isw(decode_if.data.wid);
wire [ISSUE_WIS_W-1:0] decode_wis = wid_to_wis(decode_if.data.wid);
wire [`ISSUE_WIDTH-1:0] decode_ready_in;
assign decode_if.ready = decode_ready_in[decode_isw];
`SCOPE_IO_SWITCH (`ISSUE_WIDTH)
for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : issue_slices
VX_decode_if #(
.NUM_WARPS (PER_ISSUE_WARPS)
) per_issue_decode_if();
VX_dispatch_if per_issue_dispatch_if[`NUM_EX_UNITS]();
assign per_issue_decode_if.valid = decode_if.valid && (decode_isw == ISSUE_ISW_W'(issue_id));
assign per_issue_decode_if.data.uuid = decode_if.data.uuid;
assign per_issue_decode_if.data.wid = decode_wis;
assign per_issue_decode_if.data.tmask = decode_if.data.tmask;
assign per_issue_decode_if.data.PC = decode_if.data.PC;
assign per_issue_decode_if.data.ex_type = decode_if.data.ex_type;
assign per_issue_decode_if.data.op_type = decode_if.data.op_type;
assign per_issue_decode_if.data.op_args = decode_if.data.op_args;
assign per_issue_decode_if.data.wb = decode_if.data.wb;
assign per_issue_decode_if.data.rd = decode_if.data.rd;
assign per_issue_decode_if.data.rs1 = decode_if.data.rs1;
assign per_issue_decode_if.data.rs2 = decode_if.data.rs2;
assign per_issue_decode_if.data.rs3 = decode_if.data.rs3;
assign decode_ready_in[issue_id] = per_issue_decode_if.ready;
`ifndef L1_ENABLE
assign decode_if.ibuf_pop[issue_id * PER_ISSUE_WARPS +: PER_ISSUE_WARPS] = per_issue_decode_if.ibuf_pop;
`endif
`RESET_RELAY (slice_reset, reset);
VX_issue_slice #(
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)),
.ISSUE_ID (issue_id)
) issue_slice (
`SCOPE_IO_BIND(issue_id)
.clk (clk),
.reset (slice_reset),
`ifdef PERF_ENABLE
.issue_perf (per_issue_perf[issue_id]),
`endif
.decode_if (per_issue_decode_if),
.writeback_if (writeback_if[issue_id]),
.dispatch_if (per_issue_dispatch_if)
);
// Assign transposed dispatch_if
for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin
`ASSIGN_VX_IF(dispatch_if[ex_id * `ISSUE_WIDTH + issue_id], per_issue_dispatch_if[ex_id]);
end
end
endmodule

View file

@ -0,0 +1,159 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0
) (
`SCOPE_IO_DECL
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output issue_perf_t issue_perf,
`endif
VX_decode_if.slave decode_if,
VX_writeback_if.slave writeback_if,
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS]
);
`UNUSED_PARAM (ISSUE_ID)
VX_ibuffer_if ibuffer_if [PER_ISSUE_WARPS]();
VX_scoreboard_if scoreboard_if();
VX_operands_if operands_if();
`RESET_RELAY (ibuf_reset, reset);
`RESET_RELAY (scoreboard_reset, reset);
`RESET_RELAY (operands_reset, reset);
`RESET_RELAY (dispatch_reset, reset);
VX_ibuffer #(
.INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID))
) ibuffer (
.clk (clk),
.reset (ibuf_reset),
`ifdef PERF_ENABLE
.perf_stalls (issue_perf.ibf_stalls),
`endif
.decode_if (decode_if),
.ibuffer_if (ibuffer_if)
);
VX_scoreboard #(
.INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID))
) scoreboard (
.clk (clk),
.reset (scoreboard_reset),
`ifdef PERF_ENABLE
.perf_stalls (issue_perf.scb_stalls),
.perf_units_uses(issue_perf.units_uses),
.perf_sfu_uses (issue_perf.sfu_uses),
`endif
.writeback_if (writeback_if),
.ibuffer_if (ibuffer_if),
.scoreboard_if (scoreboard_if)
);
VX_operands #(
.INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID))
) operands (
.clk (clk),
.reset (operands_reset),
`ifdef PERF_ENABLE
.perf_stalls (issue_perf.opd_stalls),
`endif
.writeback_if (writeback_if),
.scoreboard_if (scoreboard_if),
.operands_if (operands_if)
);
VX_dispatch #(
.INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID))
) dispatch (
.clk (clk),
.reset (dispatch_reset),
`ifdef PERF_ENABLE
`UNUSED_PIN (perf_stalls),
`endif
.operands_if (operands_if),
.dispatch_if (dispatch_if)
);
`ifdef DBG_SCOPE_ISSUE
wire operands_if_fire = operands_if.valid && operands_if.ready;
wire operands_if_not_ready = ~operands_if.ready;
wire writeback_if_valid = writeback_if.valid;
VX_scope_tap #(
.SCOPE_ID (2),
.TRIGGERW (4),
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS +
1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) +
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
) scope_tap (
.clk (clk),
.reset (scope_reset),
.start (1'b0),
.stop (1'b0),
.triggers ({
reset,
operands_if_fire,
operands_if_not_ready,
writeback_if_valid
}),
.probes ({
operands_if.data.uuid,
operands_if.data.tmask,
operands_if.data.ex_type,
operands_if.data.op_type,
operands_if.data.wb,
operands_if.data.rd,
operands_if.data.rs1_data,
operands_if.data.rs2_data,
operands_if.data.rs3_data,
writeback_if.data.uuid,
writeback_if.data.tmask,
writeback_if.data.rd,
writeback_if.data.data,
writeback_if.data.eop
}),
.bus_in (scope_bus_in),
.bus_out (scope_bus_out)
);
`else
`SCOPE_IO_UNUSED()
`endif
`ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin
if (operands_if.valid && operands_if.ready) begin
`TRACE(1, ("%d: %s wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0}));
trace_ex_type(1, operands_if.data.ex_type);
`TRACE(1, (", op="));
trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd));
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS);
`TRACE(1, (", rs2_data="));
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS);
`TRACE(1, (", rs3_data="));
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS);
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid));
end
end
`endif
endmodule

132
hw/rtl/core/VX_issue_top.sv Normal file
View file

@ -0,0 +1,132 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_issue_top import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "issue"
) (
// Clock
input wire clk,
input wire reset,
input wire decode_valid,
input wire [`UUID_WIDTH-1:0] decode_uuid,
input wire [`NW_WIDTH-1:0] decode_wid,
input wire [`NUM_THREADS-1:0] decode_tmask,
input wire [`PC_BITS-1:0] decode_PC,
input wire [`EX_BITS-1:0] decode_ex_type,
input wire [`INST_OP_BITS-1:0] decode_op_type,
input op_args_t decode_op_args,
input wire decode_wb,
input wire [`NR_BITS-1:0] decode_rd,
input wire [`NR_BITS-1:0] decode_rs1,
input wire [`NR_BITS-1:0] decode_rs2,
input wire [`NR_BITS-1:0] decode_rs3,
output wire decode_ready,
input wire writeback_valid[`ISSUE_WIDTH],
input wire [`UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH],
input wire [ISSUE_WIS_W-1:0] writeback_wis[`ISSUE_WIDTH],
input wire [`NUM_THREADS-1:0] writeback_tmask[`ISSUE_WIDTH],
input wire [`PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH],
input wire [`NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH],
input wire [`NUM_THREADS-1:0][`XLEN-1:0] writeback_data[`ISSUE_WIDTH],
input wire writeback_sop[`ISSUE_WIDTH],
input wire writeback_eop[`ISSUE_WIDTH],
output wire dispatch_valid[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`UUID_WIDTH-1:0] dispatch_uuid[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [ISSUE_WIS_W-1:0] dispatch_wis[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0] dispatch_tmask[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`PC_BITS-1:0] dispatch_PC[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`INST_ALU_BITS-1:0] dispatch_op_type[`NUM_EX_UNITS * `ISSUE_WIDTH],
output op_args_t dispatch_op_args[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire dispatch_wb[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NR_BITS-1:0] dispatch_rd[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NT_WIDTH-1:0] dispatch_tid[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
input wire dispatch_ready[`NUM_EX_UNITS * `ISSUE_WIDTH]
);
VX_decode_if decode_if();
VX_dispatch_if dispatch_if[`NUM_EX_UNITS * `ISSUE_WIDTH]();
VX_writeback_if writeback_if[`ISSUE_WIDTH]();
assign decode_if.valid = decode_valid;
assign decode_if.data.uuid = decode_uuid;
assign decode_if.data.wid = decode_wid;
assign decode_if.data.tmask = decode_tmask;
assign decode_if.data.PC = decode_PC;
assign decode_if.data.ex_type = decode_ex_type;
assign decode_if.data.op_type = decode_op_type;
assign decode_if.data.op_args = decode_op_args;
assign decode_if.data.wb = decode_wb;
assign decode_if.data.rd = decode_rd;
assign decode_if.data.rs1 = decode_rs1;
assign decode_if.data.rs2 = decode_rs2;
assign decode_if.data.rs3 = decode_rs3;
assign decode_ready = decode_if.ready;
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
assign writeback_if[i].valid = writeback_valid[i];
assign writeback_if[i].data.uuid = writeback_uuid[i];
assign writeback_if[i].data.wis = writeback_wis[i];
assign writeback_if[i].data.tmask = writeback_tmask[i];
assign writeback_if[i].data.PC = writeback_PC[i];
assign writeback_if[i].data.rd = writeback_rd[i];
assign writeback_if[i].data.data = writeback_data[i];
assign writeback_if[i].data.sop = writeback_sop[i];
assign writeback_if[i].data.eop = writeback_eop[i];
end
for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin
assign dispatch_valid[i] = dispatch_if[i].valid;
assign dispatch_uuid[i] = dispatch_if[i].data.uuid;
assign dispatch_wis[i] = dispatch_if[i].data.wis;
assign dispatch_tmask[i] = dispatch_if[i].data.tmask;
assign dispatch_PC[i] = dispatch_if[i].data.PC;
assign dispatch_op_type[i] = dispatch_if[i].data.op_type;
assign dispatch_op_args[i] = dispatch_if[i].data.op_args;
assign dispatch_wb[i] = dispatch_if[i].data.wb;
assign dispatch_rd[i] = dispatch_if[i].data.rd;
assign dispatch_tid[i] = dispatch_if[i].data.tid;
assign dispatch_rs1_data[i] = dispatch_if[i].data.rs1_data;
assign dispatch_rs2_data[i] = dispatch_if[i].data.rs2_data;
assign dispatch_rs3_data[i] = dispatch_if[i].data.rs3_data;
assign dispatch_if[i].ready = dispatch_ready[i];
end
`ifdef PERF_ENABLE
issue_perf_t issue_perf = '0;
`endif
VX_issue #(
.INSTANCE_ID (INSTANCE_ID)
) issue (
`SCOPE_IO_BIND (0)
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.issue_perf (issue_perf),
`endif
.decode_if (decode_if),
.writeback_if (writeback_if),
.dispatch_if (dispatch_if)
);
endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,11 +14,11 @@
`include "VX_define.vh"
module VX_lmem_unit import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
@ -37,31 +37,31 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
.NUM_LANES (`NUM_LSU_LANES),
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH)
) lmem_lsu_if[`NUM_LSU_BLOCKS]();
`RESET_RELAY (req_reset, reset);
) lsu_switch_if[`NUM_LSU_BLOCKS]();
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL];
end
wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask);
wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask);
wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask);
wire req_global_ready;
wire req_local_ready;
`RESET_RELAY (switch_reset, reset);
VX_elastic_buffer #(
.DATAW (REQ_DATAW),
.SIZE (2),
.OUT_REG (1)
) req_global_buf (
.clk (clk),
.reset (req_reset),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
.reset (switch_reset),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
.data_in ({
lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask,
lsu_mem_in_if[i].req_data.rw,
@ -81,7 +81,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
lsu_mem_out_if[i].req_data.atype,
lsu_mem_out_if[i].req_data.data,
lsu_mem_out_if[i].req_data.tag
}),
}),
.ready_out (lsu_mem_out_if[i].req_ready)
);
@ -91,8 +91,8 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
.OUT_REG (0)
) req_local_buf (
.clk (clk),
.reset (req_reset),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
.reset (switch_reset),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
.data_in ({
lsu_mem_in_if[i].req_data.mask & is_addr_local_mask,
lsu_mem_in_if[i].req_data.rw,
@ -103,73 +103,47 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
lsu_mem_in_if[i].req_data.tag
}),
.ready_in (req_local_ready),
.valid_out (lmem_lsu_if[i].req_valid),
.valid_out (lsu_switch_if[i].req_valid),
.data_out ({
lmem_lsu_if[i].req_data.mask,
lmem_lsu_if[i].req_data.rw,
lmem_lsu_if[i].req_data.byteen,
lmem_lsu_if[i].req_data.addr,
lmem_lsu_if[i].req_data.atype,
lmem_lsu_if[i].req_data.data,
lmem_lsu_if[i].req_data.tag
}),
.ready_out (lmem_lsu_if[i].req_ready)
lsu_switch_if[i].req_data.mask,
lsu_switch_if[i].req_data.rw,
lsu_switch_if[i].req_data.byteen,
lsu_switch_if[i].req_data.addr,
lsu_switch_if[i].req_data.atype,
lsu_switch_if[i].req_data.data,
lsu_switch_if[i].req_data.tag
}),
.ready_out (lsu_switch_if[i].req_ready)
);
assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global)
assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global)
|| (req_local_ready && is_addr_local);
end
`RESET_RELAY (rsp_reset, reset);
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
wire rsp_arb_valid;
wire rsp_arb_index;
wire rsp_arb_ready;
VX_generic_arbiter #(
.NUM_REQS (2),
.LOCK_ENABLE (1),
.TYPE ("R")
) arbiter (
.clk (clk),
.reset (rsp_reset),
.requests ({
lmem_lsu_if[i].rsp_valid,
VX_stream_arb #(
.NUM_INPUTS (2),
.DATAW (RSP_DATAW),
.ARBITER ("R"),
.OUT_BUF (1)
) rsp_arb (
.clk (clk),
.reset (switch_reset),
.valid_in ({
lsu_switch_if[i].rsp_valid,
lsu_mem_out_if[i].rsp_valid
}),
.grant_valid (rsp_arb_valid),
.grant_index (rsp_arb_index),
`UNUSED_PIN (grant_onehot),
.grant_unlock(rsp_arb_ready)
);
VX_elastic_buffer #(
.DATAW (RSP_DATAW),
.SIZE (2),
.OUT_REG (0)
) rsp_buf (
.clk (clk),
.reset (rsp_reset),
.valid_in (rsp_arb_valid),
.data_in ({
rsp_arb_index ? lmem_lsu_if[i].rsp_data.mask : lsu_mem_out_if[i].rsp_data.mask,
rsp_arb_index ? lmem_lsu_if[i].rsp_data.data : lsu_mem_out_if[i].rsp_data.data,
rsp_arb_index ? lmem_lsu_if[i].rsp_data.tag : lsu_mem_out_if[i].rsp_data.tag
.ready_in ({
lsu_switch_if[i].rsp_ready,
lsu_mem_out_if[i].rsp_ready
}),
.ready_in (rsp_arb_ready),
.data_in ({
lsu_switch_if[i].rsp_data,
lsu_mem_out_if[i].rsp_data
}),
.data_out (lsu_mem_in_if[i].rsp_data),
.valid_out (lsu_mem_in_if[i].rsp_valid),
.data_out ({
lsu_mem_in_if[i].rsp_data.mask,
lsu_mem_in_if[i].rsp_data.data,
lsu_mem_in_if[i].rsp_data.tag
}),
.ready_out (lsu_mem_in_if[i].rsp_ready)
.ready_out (lsu_mem_in_if[i].rsp_ready),
`UNUSED_PIN (sel_out)
);
assign lsu_mem_out_if[i].rsp_ready = rsp_arb_ready && ~rsp_arb_index;
assign lmem_lsu_if[i].rsp_ready = rsp_arb_ready && rsp_arb_index;
end
VX_mem_bus_if #(
@ -177,25 +151,25 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
.TAG_WIDTH (LSU_TAG_WIDTH)
) lmem_bus_if[LSU_NUM_REQS]();
`RESET_RELAY (adapter_reset, reset);
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
VX_mem_bus_if #(
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH)
) lmem_bus_tmp_if[`NUM_LSU_LANES]();
`RESET_RELAY (adapter_reset, reset);
VX_lsu_adapter #(
.NUM_LANES (`NUM_LSU_LANES),
.DATA_SIZE (LSU_WORD_SIZE),
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH),
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
.REQ_OUT_BUF (2),
.RSP_OUT_BUF (1)
.REQ_OUT_BUF (3),
.RSP_OUT_BUF (0)
) lsu_adapter (
.clk (clk),
.reset (adapter_reset),
.lsu_mem_if (lmem_lsu_if[i]),
.lsu_mem_if (lsu_switch_if[i]),
.mem_bus_if (lmem_bus_tmp_if)
);
@ -205,17 +179,18 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
end
`RESET_RELAY (lmem_reset, reset);
VX_local_mem #(
.INSTANCE_ID($sformatf("core%0d-lmem", CORE_ID)),
.INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)),
.SIZE (1 << `LMEM_LOG_SIZE),
.NUM_REQS (LSU_NUM_REQS),
.NUM_BANKS (`LMEM_NUM_BANKS),
.WORD_SIZE (LSU_WORD_SIZE),
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.TAG_WIDTH (LSU_TAG_WIDTH)
) local_mem (
.UUID_WIDTH (`UUID_WIDTH),
.TAG_WIDTH (LSU_TAG_WIDTH),
.OUT_BUF (3)
) local_mem (
.clk (clk),
.reset (lmem_reset),
`ifdef PERF_ENABLE

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,10 +14,10 @@
`include "VX_define.vh"
module VX_lsu_adapter import VX_gpu_pkg::*; #(
parameter NUM_LANES = 1,
parameter DATA_SIZE = 1,
parameter TAG_WIDTH = 1,
parameter TAG_SEL_BITS = 0,
parameter NUM_LANES = 1,
parameter DATA_SIZE = 1,
parameter TAG_WIDTH = 1,
parameter TAG_SEL_BITS = 0,
parameter `STRING ARBITER = "P",
parameter REQ_OUT_BUF = 0,
parameter RSP_OUT_BUF = 0
@ -63,12 +63,12 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #(
assign mem_bus_if[i].req_data.tag = req_tag_out[i];
assign req_ready_out[i] = mem_bus_if[i].req_ready;
end
VX_stream_unpack #(
.NUM_REQS (NUM_LANES),
.DATA_WIDTH (REQ_DATA_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.OUT_BUF (REQ_OUT_BUF)
.NUM_REQS (NUM_LANES),
.DATA_WIDTH (REQ_DATA_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.OUT_BUF (REQ_OUT_BUF)
) stream_unpack (
.clk (clk),
.reset (reset),
@ -77,7 +77,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #(
.data_in (req_data_in),
.tag_in (lsu_mem_if.req_data.tag),
.ready_in (lsu_mem_if.req_ready),
.valid_out (req_valid_out),
.valid_out (req_valid_out),
.data_out (req_data_out),
.tag_out (req_tag_out),
.ready_out (req_ready_out)

View file

@ -13,9 +13,8 @@
`include "VX_define.vh"
module VX_lsu_slice import VX_gpu_pkg::*; #(
parameter CORE_ID = 0,
parameter BLOCK_ID = 0
module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
@ -88,7 +87,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
wire [NUM_LANES-1:0] mem_req_mask;
wire mem_req_rw;
wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] mem_req_addr;
reg [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] mem_req_byteen;
wire [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] mem_req_byteen;
reg [NUM_LANES-1:0][LSU_WORD_SIZE*8-1:0] mem_req_data;
wire [TAG_WIDTH-1:0] mem_req_tag;
wire mem_req_ready;
@ -159,27 +158,30 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
// byte enable formatting
for (genvar i = 0; i < NUM_LANES; ++i) begin
reg [LSU_WORD_SIZE-1:0] mem_req_byteen_r;
always @(*) begin
mem_req_byteen[i] = '0;
mem_req_byteen_r = '0;
case (`INST_LSU_WSIZE(execute_if.data.op_type))
0: begin // 8-bit
mem_req_byteen[i][req_align[i]] = 1'b1;
mem_req_byteen_r[req_align[i]] = 1'b1;
end
1: begin // 16 bit
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1;
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1;
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1;
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1;
end
`ifdef XLEN_64
2: begin // 32 bit
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1;
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1;
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1;
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1;
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1;
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1;
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1;
mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1;
end
`endif
default : mem_req_byteen[i] = {LSU_WORD_SIZE{1'b1}};
// 3: 64 bit
default : mem_req_byteen_r = {LSU_WORD_SIZE{1'b1}};
endcase
end
assign mem_req_byteen[i] = mem_req_byteen_r;
end
// memory misalignment not supported!
@ -312,7 +314,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`RESET_RELAY (mem_scheduler_reset, reset);
VX_mem_scheduler #(
.INSTANCE_ID ($sformatf("core%0d-lsu-memsched%0d", CORE_ID, BLOCK_ID)),
.INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)),
.CORE_REQS (NUM_LANES),
.MEM_CHANNELS(NUM_LANES),
.WORD_SIZE (LSU_WORD_SIZE),
@ -504,11 +506,11 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`ifdef DBG_TRACE_MEM
always @(posedge clk) begin
if (execute_if.valid && fence_lock) begin
`TRACE(1, ("%d: *** D$%0d fence wait\n", $time, CORE_ID));
`TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID));
end
if (mem_req_fire) begin
if (mem_req_rw) begin
`TRACE(1, ("%d: D$%0d Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
`TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
`TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
@ -516,7 +518,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES);
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid));
end else begin
`TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
`TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
`TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
@ -524,8 +526,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
end
end
if (mem_rsp_fire) begin
`TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
$time, CORE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
`TRACE(1, ("%d: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
$time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES);
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid));
end
@ -533,36 +535,20 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`endif
`ifdef DBG_SCOPE_LSU
if (CORE_ID == 0 && BLOCK_ID == 0) begin
`ifdef SCOPE
VX_scope_tap #(
.SCOPE_ID (3),
.TRIGGERW (3),
.PROBEW (`UUID_WIDTH+NUM_LANES*(`XLEN+4+`XLEN)+1+`UUID_WIDTH+NUM_LANES*`XLEN)
) scope_tap (
.clk(clk),
.reset(scope_reset),
.start(1'b0),
.stop(1'b0),
.triggers({reset, mem_req_fire, mem_rsp_fire}),
.probes({execute_if.data.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}),
.bus_in(scope_bus_in),
.bus_out(scope_bus_out)
);
`endif
`ifdef CHIPSCOPE
wire [31:0] full_addr_0 = full_addr[0];
wire [31:0] mem_req_data_0 = mem_req_data[0];
wire [31:0] rsp_data_0 = rsp_data[0];
ila_lsu ila_lsu_inst (
.clk (clk),
.probe0 ({mem_req_data_0, execute_if.data.uuid, execute_if.data.wid, execute_if.data.PC, mem_req_mask, full_addr_0, mem_req_byteen, mem_req_rw, mem_req_ready, mem_req_valid}),
.probe1 ({rsp_data_0, rsp_uuid, mem_rsp_eop, rsp_pc, rsp_rd, mem_rsp_mask, rsp_wid, mem_rsp_ready, mem_rsp_valid}),
.probe2 ({lsu_mem_if.req_data.data, lsu_mem_if.req_data.tag, lsu_mem_if.req_data.byteen, lsu_mem_if.req_data.addr, lsu_mem_if.req_data.rw, lsu_mem_if.req_ready, lsu_mem_if.req_valid}),
.probe3 ({lsu_mem_if.rsp_data.data, lsu_mem_if.rsp_data.tag, lsu_mem_if.rsp_ready, lsu_mem_if.rsp_valid})
);
`endif
end
VX_scope_tap #(
.SCOPE_ID (3),
.TRIGGERW (3),
.PROBEW (1 + NUM_LANES*(`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE*8) + `UUID_WIDTH + NUM_LANES*LSU_WORD_SIZE*8 + `UUID_WIDTH)
) scope_tap (
.clk (clk),
.reset (scope_reset),
.start (1'b0),
.stop (1'b0),
.triggers({reset, mem_req_fire, mem_rsp_fire}),
.probes ({mem_req_rw, full_addr, mem_req_byteen, mem_req_data, execute_if.data.uuid, rsp_data, rsp_uuid}),
.bus_in (scope_bus_in),
.bus_out(scope_bus_out)
);
`else
`SCOPE_IO_UNUSED()
`endif

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,8 +14,8 @@
`include "VX_define.vh"
module VX_lsu_unit import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
) (
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
input wire clk,
@ -24,7 +24,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
// Inputs
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
// Outputs
// Outputs
VX_commit_if.master commit_if [`ISSUE_WIDTH],
VX_lsu_mem_if.master lsu_mem_if [`NUM_LSU_BLOCKS]
);
@ -32,10 +32,9 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
localparam NUM_LANES = `NUM_LSU_LANES;
`ifdef SCOPE
localparam scope_lsu = 0;
`SCOPE_IO_SWITCH (BLOCK_SIZE);
`endif
VX_execute_if #(
.NUM_LANES (NUM_LANES)
) per_block_execute_if[BLOCK_SIZE]();
@ -55,17 +54,16 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
.NUM_LANES (NUM_LANES)
) per_block_commit_if[BLOCK_SIZE]();
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_slices
`RESET_RELAY (block_reset, reset);
`RESET_RELAY (slice_reset, reset);
VX_lsu_slice #(
.CORE_ID (CORE_ID),
.BLOCK_ID (block_idx)
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx))
) lsu_slice(
`SCOPE_IO_BIND (scope_lsu+block_idx)
`SCOPE_IO_BIND (block_idx)
.clk (clk),
.reset (block_reset),
.reset (slice_reset),
.execute_if (per_block_execute_if[block_idx]),
.commit_if (per_block_commit_if[block_idx]),
.lsu_mem_if (lsu_mem_if[block_idx])
@ -82,5 +80,5 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
.commit_in_if (per_block_commit_if),
.commit_out_if (commit_if)
);
endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,29 +14,288 @@
`include "VX_define.vh"
module VX_operands import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = "",
parameter NUM_BANKS = 4,
parameter OUT_BUF = 4 // using 2-cycle EB for area reduction
) (
input wire clk,
input wire reset,
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
VX_scoreboard_if.slave scoreboard_if [`ISSUE_WIDTH],
VX_operands_if.master operands_if [`ISSUE_WIDTH]
`ifdef PERF_ENABLE
output wire [`PERF_CTR_BITS-1:0] perf_stalls,
`endif
VX_writeback_if.slave writeback_if,
VX_scoreboard_if.slave scoreboard_if,
VX_operands_if.master operands_if
);
`UNUSED_SPARAM (INSTANCE_ID)
localparam NUM_SRC_REGS = 3;
localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_REGS);
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
localparam METADATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS;
localparam DATAW = `UUID_WIDTH + METADATAW + 3 * `NUM_THREADS * `XLEN;
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
localparam XLEN_SIZE = `XLEN / 8;
localparam BYTEENW = `NUM_THREADS * XLEN_SIZE;
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
`RESET_RELAY (slice_reset, reset);
`UNUSED_VAR (writeback_if.data.sop)
VX_gpr_slice #(
.CORE_ID (CORE_ID)
) gpr_slice (
.clk (clk),
.reset (slice_reset),
.writeback_if (writeback_if[i]),
.scoreboard_if(scoreboard_if[i]),
.operands_if (operands_if[i])
wire [NUM_SRC_REGS-1:0] src_valid;
wire [NUM_SRC_REGS-1:0] req_in_valid;
wire [NUM_SRC_REGS-1:0] req_in_ready;
wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data;
wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
wire [NUM_BANKS-1:0] gpr_rd_valid_n, gpr_rd_ready;
reg [NUM_BANKS-1:0] gpr_rd_valid;
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr_n;
reg [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr;
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx_n;
reg [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx;
wire pipe_in_ready;
reg pipe_out_valid;
wire pipe_out_ready;
reg [`UUID_WIDTH-1:0] pipe_out_uuid;
reg [METADATAW-1:0] pipe_out_data;
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data, src_data_n;
reg [NUM_SRC_REGS-1:0] data_fetched;
reg has_collision, has_collision_n;
wire stg_in_valid, stg_in_ready;
wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3,
scoreboard_if.data.rs2,
scoreboard_if.data.rs1};
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
if (ISSUE_WIS != 0) begin
assign req_in_data[i] = {src_regs[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis};
end else begin
assign req_in_data[i] = src_regs[i][`NR_BITS-1:BANK_SEL_BITS];
end
if (NUM_BANKS != 1) begin
assign req_bank_idx[i] = src_regs[i][BANK_SEL_BITS-1:0];
end else begin
assign req_bank_idx[i] = '0;
end
end
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched[i];
end
assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid;
VX_stream_xbar #(
.NUM_INPUTS (NUM_SRC_REGS),
.NUM_OUTPUTS (NUM_BANKS),
.DATAW (PER_BANK_ADDRW),
.ARBITER ("P"), // use priority arbiter
.PERF_CTR_BITS(`PERF_CTR_BITS),
.OUT_BUF (0) // no output buffering
) req_xbar (
.clk (clk),
.reset (reset),
`UNUSED_PIN(collisions),
.valid_in (req_in_valid),
.data_in (req_in_data),
.sel_in (req_bank_idx),
.ready_in (req_in_ready),
.valid_out (gpr_rd_valid_n),
.data_out (gpr_rd_addr_n),
.sel_out (gpr_rd_req_idx_n),
.ready_out (gpr_rd_ready)
);
assign gpr_rd_ready = {NUM_BANKS{stg_in_ready}};
always @(*) begin
has_collision_n = 0;
for (integer i = 0; i < NUM_SRC_REGS; ++i) begin
for (integer j = 1; j < (NUM_SRC_REGS-i); ++j) begin
has_collision_n |= src_valid[i]
&& src_valid[j+i]
&& (req_bank_idx[i] == req_bank_idx[j+i]);
end
end
end
always @(*) begin
src_data_n = src_data;
for (integer b = 0; b < NUM_BANKS; ++b) begin
if (gpr_rd_valid[b]) begin
src_data_n[gpr_rd_req_idx[b]] = gpr_rd_data[b];
end
end
end
wire pipe_stall = pipe_out_valid && ~pipe_out_ready;
assign pipe_in_ready = ~pipe_stall;
assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n;
wire stg_in_fire = stg_in_valid && stg_in_ready;
always @(posedge clk) begin
if (reset) begin
pipe_out_valid <= 0;
gpr_rd_valid <= '0;
data_fetched <= '0;
src_data <= '0;
end else begin
if (~pipe_stall) begin
pipe_out_valid <= scoreboard_if.valid;
gpr_rd_valid <= gpr_rd_valid_n;
if (scoreboard_if.ready) begin
data_fetched <= '0;
end else begin
data_fetched <= data_fetched | req_in_ready;
end
if (stg_in_fire) begin
src_data <= '0;
end else begin
src_data <= src_data_n;
end
end
end
if (~pipe_stall) begin
pipe_out_uuid <= scoreboard_if.data.uuid;
pipe_out_data <= {
scoreboard_if.data.wis,
scoreboard_if.data.tmask,
scoreboard_if.data.PC,
scoreboard_if.data.wb,
scoreboard_if.data.ex_type,
scoreboard_if.data.op_type,
scoreboard_if.data.op_args,
scoreboard_if.data.rd
};
has_collision <= has_collision_n;
gpr_rd_addr <= gpr_rd_addr_n;
gpr_rd_req_idx <= gpr_rd_req_idx_n;
end
end
assign pipe_out_ready = stg_in_ready;
assign stg_in_valid = pipe_out_valid && ~has_collision;
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (1)
) out_buffer (
.clk (clk),
.reset (reset),
.valid_in (stg_in_valid),
.ready_in (stg_in_ready),
.data_in ({
pipe_out_uuid,
pipe_out_data,
src_data_n[0],
src_data_n[1],
src_data_n[2]
}),
.data_out ({
operands_if.data.uuid,
operands_if.data.wis,
operands_if.data.tmask,
operands_if.data.PC,
operands_if.data.wb,
operands_if.data.ex_type,
operands_if.data.op_type,
operands_if.data.op_args,
operands_if.data.rd,
operands_if.data.rs1_data,
operands_if.data.rs2_data,
operands_if.data.rs3_data
}),
.valid_out (operands_if.valid),
.ready_out (operands_if.ready)
);
wire [PER_BANK_ADDRW-1:0] gpr_wr_addr;
if (ISSUE_WIS != 0) begin
assign gpr_wr_addr = {writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS], writeback_if.data.wis};
end else begin
assign gpr_wr_addr = writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS];
end
wire [BANK_SEL_WIDTH-1:0] gpr_wr_bank_idx;
if (NUM_BANKS != 1) begin
assign gpr_wr_bank_idx = writeback_if.data.rd[BANK_SEL_BITS-1:0];
end else begin
assign gpr_wr_bank_idx = '0;
end
`ifdef GPR_RESET
reg wr_enabled = 0;
always @(posedge clk) begin
if (reset) begin
wr_enabled <= 1;
end
end
`else
wire wr_enabled = 1;
`endif
for (genvar b = 0; b < NUM_BANKS; ++b) begin
wire gpr_wr_enabled;
if (BANK_SEL_BITS != 0) begin
assign gpr_wr_enabled = wr_enabled
&& writeback_if.valid
&& (gpr_wr_bank_idx == BANK_SEL_BITS'(b));
end else begin
assign gpr_wr_enabled = wr_enabled && writeback_if.valid;
end
wire [BYTEENW-1:0] wren;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}};
end
`ifdef GPR_RESET
VX_dp_ram_rst #(
`else
VX_dp_ram #(
`endif
.DATAW (`XLEN * `NUM_THREADS),
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
.WRENW (BYTEENW),
.NO_RWCHECK (1)
) gpr_ram (
.clk (clk),
`ifdef GPR_RESET
.reset (reset),
`endif
.read (1'b1),
.wren (wren),
.write (gpr_wr_enabled),
.waddr (gpr_wr_addr),
.wdata (writeback_if.data.data),
.raddr (gpr_rd_addr[b]),
.rdata (gpr_rd_data[b])
);
end
`ifdef PERF_ENABLE
reg [`PERF_CTR_BITS-1:0] collisions_r;
always @(posedge clk) begin
if (reset) begin
collisions_r <= '0;
end else begin
collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_in_ready && has_collision_n);
end
end
assign perf_stalls = collisions_r;
`endif
endmodule

View file

@ -14,13 +14,14 @@
`include "VX_define.vh"
module VX_schedule import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
VX_pipeline_perf_if.schedule perf_schedule_if,
output sched_perf_t sched_perf,
`endif
// configuration
@ -42,6 +43,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
// status
output wire busy
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (CORE_ID)
reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // updated when a warp is activated or disabled
@ -290,7 +292,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
`RESET_RELAY (split_join_reset, reset);
VX_split_join #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID))
) split_join (
.clk (clk),
.reset (split_join_reset),
@ -368,24 +370,42 @@ module VX_schedule import VX_gpu_pkg::*; #(
assign schedule_if.data.uuid = instr_uuid;
`RESET_RELAY (pending_instr_reset, reset);
// Track pending instructions per warp
wire no_pending_instr;
VX_pending_instr #(
.CTR_WIDTH (12),
.DECR_COUNT (`ISSUE_WIDTH),
.ALM_EMPTY (1)
) pending_instr(
.clk (clk),
.reset (pending_instr_reset),
.incr (schedule_if_fire),
.incr_wid (schedule_if.data.wid),
.decr (commit_sched_if.committed),
.decr_wid (commit_sched_if.committed_wid),
.alm_empty_wid (sched_csr_if.alm_empty_wid),
.alm_empty (sched_csr_if.alm_empty),
.empty (no_pending_instr)
);
reg [`NUM_WARPS-1:0] per_warp_incr;
always @(*) begin
per_warp_incr = 0;
if (schedule_if_fire) begin
per_warp_incr[schedule_if.data.wid] = 1;
end
end
wire [`NUM_WARPS-1:0] pending_warp_empty;
wire [`NUM_WARPS-1:0] pending_warp_alm_empty;
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
`RESET_RELAY (pending_instr_reset, reset);
VX_pending_size #(
.SIZE (4096),
.ALM_EMPTY (1)
) counter (
.clk (clk),
.reset (pending_instr_reset),
.incr (per_warp_incr[i]),
.decr (commit_sched_if.committed_warps[i]),
.empty (pending_warp_empty[i]),
.alm_empty (pending_warp_alm_empty[i]),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
assign sched_csr_if.alm_empty = pending_warp_alm_empty[sched_csr_if.alm_empty_wid];
wire no_pending_instr = (& pending_warp_empty);
`BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1);
@ -412,7 +432,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
end
end
end
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** core%0d-scheduler-timeout: stalled_warps=%b", $time, CORE_ID, stalled_warps))
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** %s timeout: stalled_warps=%b", $time, INSTANCE_ID, stalled_warps))
`ifdef PERF_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_sched_idles;
@ -431,8 +451,8 @@ module VX_schedule import VX_gpu_pkg::*; #(
end
end
assign perf_schedule_if.sched_idles = perf_sched_idles;
assign perf_schedule_if.sched_stalls = perf_sched_stalls;
assign sched_perf.idles = perf_sched_idles;
assign sched_perf.stalls = perf_sched_stalls;
`endif
endmodule

View file

@ -14,39 +14,37 @@
`include "VX_define.vh"
module VX_scoreboard import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output reg [`PERF_CTR_BITS-1:0] perf_scb_stalls,
output reg [`PERF_CTR_BITS-1:0] perf_units_uses [`NUM_EX_UNITS],
output reg [`PERF_CTR_BITS-1:0] perf_sfu_uses [`NUM_SFU_UNITS],
output reg [`PERF_CTR_BITS-1:0] perf_stalls,
output reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_units_uses,
output reg [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_sfu_uses,
`endif
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
VX_ibuffer_if.slave ibuffer_if [`NUM_WARPS],
VX_scoreboard_if.master scoreboard_if [`ISSUE_WIDTH]
VX_writeback_if.slave writeback_if,
VX_ibuffer_if.slave ibuffer_if [PER_ISSUE_WARPS],
VX_scoreboard_if.master scoreboard_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4) + 1;
VX_ibuffer_if staging_if [PER_ISSUE_WARPS]();
reg [PER_ISSUE_WARPS-1:0] operands_ready;
`ifdef PERF_ENABLE
reg [`NUM_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle;
reg [PER_ISSUE_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle;
wire [`NUM_EX_UNITS-1:0] perf_units_per_cycle, perf_units_per_cycle_r;
reg [`NUM_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle;
reg [PER_ISSUE_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle;
wire [`NUM_SFU_UNITS-1:0] perf_sfu_per_cycle, perf_sfu_per_cycle_r;
wire [`NUM_WARPS-1:0] perf_issue_stalls_per_cycle;
wire [`CLOG2(`NUM_WARPS+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r;
`POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle);
VX_reduce #(
.DATAW_IN (`NUM_EX_UNITS),
.N (`NUM_WARPS),
.N (PER_ISSUE_WARPS),
.OP ("|")
) perf_units_reduce (
.data_in (perf_inuse_units_per_cycle),
@ -55,22 +53,28 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
VX_reduce #(
.DATAW_IN (`NUM_SFU_UNITS),
.N (`NUM_WARPS),
.N (PER_ISSUE_WARPS),
.OP ("|")
) perf_sfu_reduce (
.data_in (perf_inuse_sfu_per_cycle),
.data_out (perf_sfu_per_cycle)
);
`BUFFER(perf_stalls_per_cycle_r, perf_stalls_per_cycle);
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT));
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT));
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
wire [PER_ISSUE_WARPS-1:0] stg_valid_in;
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
assign stg_valid_in[w] = staging_if[w].valid;
end
wire perf_stall_per_cycle = (|stg_valid_in) && ~(|(stg_valid_in & operands_ready));
always @(posedge clk) begin
if (reset) begin
perf_scb_stalls <= '0;
perf_stalls <= '0;
end else begin
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(perf_stalls_per_cycle_r);
perf_stalls <= perf_stalls + `PERF_CTR_BITS'(perf_stall_per_cycle);
end
end
@ -95,138 +99,121 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
end
`endif
VX_ibuffer_if staging_if [`NUM_WARPS]();
wire [`NUM_WARPS-1:0][3:0] staging_opds_busy;
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (1)
) stanging_buf (
.clk (clk),
.reset (reset),
.valid_in (ibuffer_if[i].valid),
.data_in (ibuffer_if[i].data),
.ready_in (ibuffer_if[i].ready),
.valid_out(staging_if[i].valid),
.data_out (staging_if[i].data),
.ready_out(staging_if[i].ready)
.valid_in (ibuffer_if[w].valid),
.data_in (ibuffer_if[w].data),
.ready_in (ibuffer_if[w].ready),
.valid_out(staging_if[w].valid),
.data_out (staging_if[w].data),
.ready_out(staging_if[w].ready)
);
end
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
reg [`NUM_REGS-1:0] inuse_regs;
reg [3:0] operands_busy_r, operands_busy_n;
reg [3:0] operands_busy, operands_busy_n;
localparam iw = i % `ISSUE_WIDTH;
localparam wis = i / `ISSUE_WIDTH;
wire ibuffer_fire = ibuffer_if[w].valid && ibuffer_if[w].ready;
wire ibuffer_fire = ibuffer_if[i].valid && ibuffer_if[i].ready;
wire staging_fire = staging_if[w].valid && staging_if[w].ready;
wire staging_fire = staging_if[i].valid && staging_if[i].ready;
wire writeback_fire = writeback_if[iw].valid
&& (writeback_if[iw].data.wis == ISSUE_WIS_W'(wis))
&& writeback_if[iw].data.eop;
wire writeback_fire = writeback_if.valid
&& (writeback_if.data.wis == ISSUE_WIS_W'(w))
&& writeback_if.data.eop;
`ifdef PERF_ENABLE
reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units;
reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu;
reg [`SFU_WIDTH-1:0] sfu_type;
always @(*) begin
case (staging_if[i].data.op_type)
`INST_SFU_CSRRW,
`INST_SFU_CSRRS,
`INST_SFU_CSRRC: sfu_type = `SFU_CSRS;
default: sfu_type = `SFU_WCTL;
endcase
end
always @(*) begin
perf_inuse_units_per_cycle[i] = '0;
perf_inuse_sfu_per_cycle[i] = '0;
if (staging_if[i].valid) begin
if (operands_busy_r[0]) begin
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rd]] = 1;
if (inuse_units[staging_if[i].data.rd] == `EX_SFU) begin
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rd]] = 1;
perf_inuse_units_per_cycle[w] = '0;
perf_inuse_sfu_per_cycle[w] = '0;
if (staging_if[w].valid) begin
if (operands_busy[0]) begin
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rd]] = 1;
if (inuse_units[staging_if[w].data.rd] == `EX_SFU) begin
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rd]] = 1;
end
end
if (operands_busy_r[1]) begin
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs1]] = 1;
if (inuse_units[staging_if[i].data.rs1] == `EX_SFU) begin
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs1]] = 1;
if (operands_busy[1]) begin
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs1]] = 1;
if (inuse_units[staging_if[w].data.rs1] == `EX_SFU) begin
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs1]] = 1;
end
end
if (operands_busy_r[2]) begin
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs2]] = 1;
if (inuse_units[staging_if[i].data.rs2] == `EX_SFU) begin
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs2]] = 1;
if (operands_busy[2]) begin
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs2]] = 1;
if (inuse_units[staging_if[w].data.rs2] == `EX_SFU) begin
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs2]] = 1;
end
end
if (operands_busy_r[3]) begin
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs3]] = 1;
if (inuse_units[staging_if[i].data.rs3] == `EX_SFU) begin
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs3]] = 1;
if (operands_busy[3]) begin
perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs3]] = 1;
if (inuse_units[staging_if[w].data.rs3] == `EX_SFU) begin
perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs3]] = 1;
end
end
end
end
assign perf_issue_stalls_per_cycle[i] = staging_if[i].valid && ~staging_if[i].ready;
`endif
always @(*) begin
operands_busy_n = operands_busy_r;
operands_busy_n = operands_busy;
if (ibuffer_fire) begin
operands_busy_n = {
inuse_regs[ibuffer_if[i].data.rs3],
inuse_regs[ibuffer_if[i].data.rs2],
inuse_regs[ibuffer_if[i].data.rs1],
inuse_regs[ibuffer_if[i].data.rd]
inuse_regs[ibuffer_if[w].data.rs3],
inuse_regs[ibuffer_if[w].data.rs2],
inuse_regs[ibuffer_if[w].data.rs1],
inuse_regs[ibuffer_if[w].data.rd]
};
end
if (writeback_fire) begin
if (ibuffer_fire) begin
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rd) begin
if (writeback_if.data.rd == ibuffer_if[w].data.rd) begin
operands_busy_n[0] = 0;
end
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs1) begin
if (writeback_if.data.rd == ibuffer_if[w].data.rs1) begin
operands_busy_n[1] = 0;
end
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs2) begin
if (writeback_if.data.rd == ibuffer_if[w].data.rs2) begin
operands_busy_n[2] = 0;
end
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs3) begin
if (writeback_if.data.rd == ibuffer_if[w].data.rs3) begin
operands_busy_n[3] = 0;
end
end else begin
if (writeback_if[iw].data.rd == staging_if[i].data.rd) begin
if (writeback_if.data.rd == staging_if[w].data.rd) begin
operands_busy_n[0] = 0;
end
if (writeback_if[iw].data.rd == staging_if[i].data.rs1) begin
if (writeback_if.data.rd == staging_if[w].data.rs1) begin
operands_busy_n[1] = 0;
end
if (writeback_if[iw].data.rd == staging_if[i].data.rs2) begin
if (writeback_if.data.rd == staging_if[w].data.rs2) begin
operands_busy_n[2] = 0;
end
if (writeback_if[iw].data.rd == staging_if[i].data.rs3) begin
if (writeback_if.data.rd == staging_if[w].data.rs3) begin
operands_busy_n[3] = 0;
end
end
end
if (staging_fire && staging_if[i].data.wb) begin
if (staging_if[i].data.rd == ibuffer_if[i].data.rd) begin
if (staging_fire && staging_if[w].data.wb) begin
if (staging_if[w].data.rd == ibuffer_if[w].data.rd) begin
operands_busy_n[0] = 1;
end
if (staging_if[i].data.rd == ibuffer_if[i].data.rs1) begin
if (staging_if[w].data.rd == ibuffer_if[w].data.rs1) begin
operands_busy_n[1] = 1;
end
if (staging_if[i].data.rd == ibuffer_if[i].data.rs2) begin
if (staging_if[w].data.rd == ibuffer_if[w].data.rs2) begin
operands_busy_n[2] = 1;
end
if (staging_if[i].data.rd == ibuffer_if[i].data.rs3) begin
if (staging_if[w].data.rd == ibuffer_if[w].data.rs3) begin
operands_busy_n[3] = 1;
end
end
@ -237,25 +224,24 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
inuse_regs <= '0;
end else begin
if (writeback_fire) begin
inuse_regs[writeback_if[iw].data.rd] <= 0;
inuse_regs[writeback_if.data.rd] <= 0;
end
if (staging_fire && staging_if[i].data.wb) begin
inuse_regs[staging_if[i].data.rd] <= 1;
if (staging_fire && staging_if[w].data.wb) begin
inuse_regs[staging_if[w].data.rd] <= 1;
end
end
operands_busy_r <= operands_busy_n;
operands_busy <= operands_busy_n;
operands_ready[w] <= ~(| operands_busy_n);
`ifdef PERF_ENABLE
if (staging_fire && staging_if[i].data.wb) begin
inuse_units[staging_if[i].data.rd] <= staging_if[i].data.ex_type;
if (staging_if[i].data.ex_type == `EX_SFU) begin
inuse_sfu[staging_if[i].data.rd] <= sfu_type;
if (staging_fire && staging_if[w].data.wb) begin
inuse_units[staging_if[w].data.rd] <= staging_if[w].data.ex_type;
if (staging_if[w].data.ex_type == `EX_SFU) begin
inuse_sfu[staging_if[w].data.rd] <= op_to_sfu_type(staging_if[w].data.op_type);
end
end
`endif
end
assign staging_opds_busy[i] = operands_busy_r;
`ifdef SIMULATION
reg [31:0] timeout_ctr;
@ -263,11 +249,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
if (reset) begin
timeout_ctr <= '0;
end else begin
if (staging_if[i].valid && ~staging_if[i].ready) begin
if (staging_if[w].valid && ~staging_if[w].ready) begin
`ifdef DBG_TRACE_PIPELINE
`TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
$time, CORE_ID, i, {staging_if[i].data.PC, 1'b0}, staging_if[i].data.tmask, timeout_ctr,
operands_busy_r, staging_if[i].data.uuid));
`TRACE(3, ("%d: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
$time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
operands_busy, staging_if[w].data.uuid));
`endif
timeout_ctr <= timeout_ctr + 1;
end else if (ibuffer_fire) begin
@ -277,59 +263,57 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
end
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
$time, CORE_ID, i, {staging_if[i].data.PC, 1'b0}, staging_if[i].data.tmask, timeout_ctr,
operands_busy_r, staging_if[i].data.uuid));
("%t: *** %s timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
$time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
operands_busy, staging_if[w].data.uuid));
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[iw].data.rd] != 0,
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
$time, CORE_ID, i, {writeback_if[iw].data.PC, 1'b0}, writeback_if[iw].data.tmask, writeback_if[iw].data.rd, writeback_if[iw].data.uuid));
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if.data.rd] != 0,
("%t: *** %s invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
$time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid));
`endif
end
`RESET_RELAY (arb_reset, reset);
wire [PER_ISSUE_WARPS-1:0] arb_valid_in;
wire [PER_ISSUE_WARPS-1:0][DATAW-1:0] arb_data_in;
wire [PER_ISSUE_WARPS-1:0] arb_ready_in;
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
wire [ISSUE_RATIO-1:0] valid_in;
wire [ISSUE_RATIO-1:0][DATAW-1:0] data_in;
wire [ISSUE_RATIO-1:0] ready_in;
for (genvar j = 0; j < ISSUE_RATIO; ++j) begin
wire operands_ready = ~(| staging_opds_busy[j * `ISSUE_WIDTH + i]);
assign valid_in[j] = staging_if[j * `ISSUE_WIDTH + i].valid && operands_ready;
assign data_in[j] = staging_if[j * `ISSUE_WIDTH + i].data;
assign staging_if[j * `ISSUE_WIDTH + i].ready = ready_in[j] && operands_ready;
end
VX_stream_arb #(
.NUM_INPUTS (ISSUE_RATIO),
.DATAW (DATAW),
.ARBITER ("R"),
.OUT_BUF (2)
) out_arb (
.clk (clk),
.reset (arb_reset),
.valid_in (valid_in),
.ready_in (ready_in),
.data_in (data_in),
.data_out ({
scoreboard_if[i].data.uuid,
scoreboard_if[i].data.tmask,
scoreboard_if[i].data.PC,
scoreboard_if[i].data.ex_type,
scoreboard_if[i].data.op_type,
scoreboard_if[i].data.op_args,
scoreboard_if[i].data.wb,
scoreboard_if[i].data.rd,
scoreboard_if[i].data.rs1,
scoreboard_if[i].data.rs2,
scoreboard_if[i].data.rs3
}),
.valid_out (scoreboard_if[i].valid),
.ready_out (scoreboard_if[i].ready),
.sel_out (scoreboard_if[i].data.wis)
);
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
assign arb_valid_in[w] = staging_if[w].valid && operands_ready[w];
assign arb_data_in[w] = staging_if[w].data;
assign staging_if[w].ready = arb_ready_in[w] && operands_ready[w];
end
`RESET_RELAY (arb_reset, reset);
VX_stream_arb #(
.NUM_INPUTS (PER_ISSUE_WARPS),
.DATAW (DATAW),
.ARBITER ("F"),
.LUTRAM (1),
.OUT_BUF (4) // using 2-cycle EB for area reduction
) out_arb (
.clk (clk),
.reset (arb_reset),
.valid_in (arb_valid_in),
.ready_in (arb_ready_in),
.data_in (arb_data_in),
.data_out ({
scoreboard_if.data.uuid,
scoreboard_if.data.tmask,
scoreboard_if.data.PC,
scoreboard_if.data.ex_type,
scoreboard_if.data.op_type,
scoreboard_if.data.op_args,
scoreboard_if.data.wb,
scoreboard_if.data.rd,
scoreboard_if.data.rs1,
scoreboard_if.data.rs2,
scoreboard_if.data.rs3
}),
.valid_out (scoreboard_if.valid),
.ready_out (scoreboard_if.ready),
.sel_out (scoreboard_if.data.wis)
);
endmodule

View file

@ -14,6 +14,7 @@
`include "VX_define.vh"
module VX_sfu_unit import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0
) (
input wire clk,
@ -39,7 +40,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
VX_commit_if.master commit_if [`ISSUE_WIDTH],
VX_warp_ctl_if.master warp_ctl_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam BLOCK_SIZE = 1;
localparam NUM_LANES = `NUM_SFU_LANES;
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
@ -83,7 +84,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
`RESET_RELAY (wctl_reset, reset);
VX_wctl_unit #(
.CORE_ID (CORE_ID),
.INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)),
.NUM_LANES (NUM_LANES)
) wctl_unit (
.clk (clk),
@ -111,6 +112,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
`RESET_RELAY (csr_reset, reset);
VX_csr_unit #(
.INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)),
.CORE_ID (CORE_ID),
.NUM_LANES (NUM_LANES)
) csr_unit (

View file

@ -14,7 +14,7 @@
`include "VX_define.vh"
module VX_split_join import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
@ -31,7 +31,7 @@ module VX_split_join import VX_gpu_pkg::*; #(
input wire [`NW_WIDTH-1:0] stack_wid,
output wire [`DV_STACK_SIZEW-1:0] stack_ptr
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_data [`NUM_WARPS-1:0];
wire [`DV_STACK_SIZEW-1:0] ipdom_q_ptr [`NUM_WARPS-1:0];

399
hw/rtl/core/VX_trace_pkg.sv Normal file
View file

@ -0,0 +1,399 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`ifndef VX_TRACE_PKG_VH
`define VX_TRACE_PKG_VH
`include "VX_define.vh"
package VX_trace_pkg;
`ifdef SIMULATION
`ifdef SV_DPI
import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/);
`endif
import VX_gpu_pkg::*;
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
case (ex_type)
`EX_ALU: `TRACE(level, ("ALU"));
`EX_LSU: `TRACE(level, ("LSU"));
`EX_FPU: `TRACE(level, ("FPU"));
`EX_SFU: `TRACE(level, ("SFU"));
default: `TRACE(level, ("?"));
endcase
endtask
task trace_ex_op(input int level,
input [`EX_BITS-1:0] ex_type,
input [`INST_OP_BITS-1:0] op_type,
input VX_gpu_pkg::op_args_t op_args
);
case (ex_type)
`EX_ALU: begin
case (op_args.alu.xtype)
`ALU_TYPE_ARITH: begin
if (op_args.alu.is_w) begin
if (op_args.alu.use_imm) begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADDIW"));
`INST_ALU_SLL: `TRACE(level, ("SLLIW"));
`INST_ALU_SRL: `TRACE(level, ("SRLIW"));
`INST_ALU_SRA: `TRACE(level, ("SRAIW"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADDW"));
`INST_ALU_SUB: `TRACE(level, ("SUBW"));
`INST_ALU_SLL: `TRACE(level, ("SLLW"));
`INST_ALU_SRL: `TRACE(level, ("SRLW"));
`INST_ALU_SRA: `TRACE(level, ("SRAW"));
default: `TRACE(level, ("?"));
endcase
end
end else begin
if (op_args.alu.use_imm) begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADDI"));
`INST_ALU_SLL: `TRACE(level, ("SLLI"));
`INST_ALU_SRL: `TRACE(level, ("SRLI"));
`INST_ALU_SRA: `TRACE(level, ("SRAI"));
`INST_ALU_SLT: `TRACE(level, ("SLTI"));
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"));
`INST_ALU_XOR: `TRACE(level, ("XORI"));
`INST_ALU_OR: `TRACE(level, ("ORI"));
`INST_ALU_AND: `TRACE(level, ("ANDI"));
`INST_ALU_LUI: `TRACE(level, ("LUI"));
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADD"));
`INST_ALU_SUB: `TRACE(level, ("SUB"));
`INST_ALU_SLL: `TRACE(level, ("SLL"));
`INST_ALU_SRL: `TRACE(level, ("SRL"));
`INST_ALU_SRA: `TRACE(level, ("SRA"));
`INST_ALU_SLT: `TRACE(level, ("SLT"));
`INST_ALU_SLTU: `TRACE(level, ("SLTU"));
`INST_ALU_XOR: `TRACE(level, ("XOR"));
`INST_ALU_OR: `TRACE(level, ("OR"));
`INST_ALU_AND: `TRACE(level, ("AND"));
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"));
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"));
default: `TRACE(level, ("?"));
endcase
end
end
end
`ALU_TYPE_BRANCH: begin
case (`INST_BR_BITS'(op_type))
`INST_BR_EQ: `TRACE(level, ("BEQ"));
`INST_BR_NE: `TRACE(level, ("BNE"));
`INST_BR_LT: `TRACE(level, ("BLT"));
`INST_BR_GE: `TRACE(level, ("BGE"));
`INST_BR_LTU: `TRACE(level, ("BLTU"));
`INST_BR_GEU: `TRACE(level, ("BGEU"));
`INST_BR_JAL: `TRACE(level, ("JAL"));
`INST_BR_JALR: `TRACE(level, ("JALR"));
`INST_BR_ECALL: `TRACE(level, ("ECALL"));
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"));
`INST_BR_URET: `TRACE(level, ("URET"));
`INST_BR_SRET: `TRACE(level, ("SRET"));
`INST_BR_MRET: `TRACE(level, ("MRET"));
default: `TRACE(level, ("?"));
endcase
end
`ALU_TYPE_MULDIV: begin
if (op_args.alu.is_w) begin
case (`INST_M_BITS'(op_type))
`INST_M_MUL: `TRACE(level, ("MULW"));
`INST_M_DIV: `TRACE(level, ("DIVW"));
`INST_M_DIVU: `TRACE(level, ("DIVUW"));
`INST_M_REM: `TRACE(level, ("REMW"));
`INST_M_REMU: `TRACE(level, ("REMUW"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (`INST_M_BITS'(op_type))
`INST_M_MUL: `TRACE(level, ("MUL"));
`INST_M_MULH: `TRACE(level, ("MULH"));
`INST_M_MULHSU:`TRACE(level, ("MULHSU"));
`INST_M_MULHU: `TRACE(level, ("MULHU"));
`INST_M_DIV: `TRACE(level, ("DIV"));
`INST_M_DIVU: `TRACE(level, ("DIVU"));
`INST_M_REM: `TRACE(level, ("REM"));
`INST_M_REMU: `TRACE(level, ("REMU"));
default: `TRACE(level, ("?"));
endcase
end
end
default: `TRACE(level, ("?"));
endcase
end
`EX_LSU: begin
if (op_args.lsu.is_float) begin
case (`INST_LSU_BITS'(op_type))
`INST_LSU_LW: `TRACE(level, ("FLW"));
`INST_LSU_LD: `TRACE(level, ("FLD"));
`INST_LSU_SW: `TRACE(level, ("FSW"));
`INST_LSU_SD: `TRACE(level, ("FSD"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (`INST_LSU_BITS'(op_type))
`INST_LSU_LB: `TRACE(level, ("LB"));
`INST_LSU_LH: `TRACE(level, ("LH"));
`INST_LSU_LW: `TRACE(level, ("LW"));
`INST_LSU_LD: `TRACE(level, ("LD"));
`INST_LSU_LBU:`TRACE(level, ("LBU"));
`INST_LSU_LHU:`TRACE(level, ("LHU"));
`INST_LSU_LWU:`TRACE(level, ("LWU"));
`INST_LSU_SB: `TRACE(level, ("SB"));
`INST_LSU_SH: `TRACE(level, ("SH"));
`INST_LSU_SW: `TRACE(level, ("SW"));
`INST_LSU_SD: `TRACE(level, ("SD"));
`INST_LSU_FENCE:`TRACE(level,("FENCE"));
default: `TRACE(level, ("?"));
endcase
end
end
`EX_FPU: begin
case (`INST_FPU_BITS'(op_type))
`INST_FPU_ADD: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FADD.D"));
else
`TRACE(level, ("FADD.S"));
end
`INST_FPU_SUB: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FSUB.D"));
else
`TRACE(level, ("FSUB.S"));
end
`INST_FPU_MUL: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FMUL.D"));
else
`TRACE(level, ("FMUL.S"));
end
`INST_FPU_DIV: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FDIV.D"));
else
`TRACE(level, ("FDIV.S"));
end
`INST_FPU_SQRT: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FSQRT.D"));
else
`TRACE(level, ("FSQRT.S"));
end
`INST_FPU_MADD: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FMADD.D"));
else
`TRACE(level, ("FMADD.S"));
end
`INST_FPU_MSUB: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FMSUB.D"));
else
`TRACE(level, ("FMSUB.S"));
end
`INST_FPU_NMADD: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FNMADD.D"));
else
`TRACE(level, ("FNMADD.S"));
end
`INST_FPU_NMSUB: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FNMSUB.D"));
else
`TRACE(level, ("FNMSUB.S"));
end
`INST_FPU_CMP: begin
if (op_args.fpu.fmt[0]) begin
case (op_args.fpu.frm[1:0])
0: `TRACE(level, ("FLE.D"));
1: `TRACE(level, ("FLT.D"));
2: `TRACE(level, ("FEQ.D"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (op_args.fpu.frm[1:0])
0: `TRACE(level, ("FLE.S"));
1: `TRACE(level, ("FLT.S"));
2: `TRACE(level, ("FEQ.S"));
default: `TRACE(level, ("?"));
endcase
end
end
`INST_FPU_F2F: begin
if (op_args.fpu.fmt[0]) begin
`TRACE(level, ("FCVT.D.S"));
end else begin
`TRACE(level, ("FCVT.S.D"));
end
end
`INST_FPU_F2I: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.L.D"));
end else begin
`TRACE(level, ("FCVT.W.D"));
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.L.S"));
end else begin
`TRACE(level, ("FCVT.W.S"));
end
end
end
`INST_FPU_F2U: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.LU.D"));
end else begin
`TRACE(level, ("FCVT.WU.D"));
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.LU.S"));
end else begin
`TRACE(level, ("FCVT.WU.S"));
end
end
end
`INST_FPU_I2F: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.D.L"));
end else begin
`TRACE(level, ("FCVT.D.W"));
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.S.L"));
end else begin
`TRACE(level, ("FCVT.S.W"));
end
end
end
`INST_FPU_U2F: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.D.LU"));
end else begin
`TRACE(level, ("FCVT.D.WU"));
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.S.LU"));
end else begin
`TRACE(level, ("FCVT.S.WU"));
end
end
end
`INST_FPU_MISC: begin
if (op_args.fpu.fmt[0]) begin
case (op_args.fpu.frm)
0: `TRACE(level, ("FSGNJ.D"));
1: `TRACE(level, ("FSGNJN.D"));
2: `TRACE(level, ("FSGNJX.D"));
3: `TRACE(level, ("FCLASS.D"));
4: `TRACE(level, ("FMV.X.D"));
5: `TRACE(level, ("FMV.D.X"));
6: `TRACE(level, ("FMIN.D"));
7: `TRACE(level, ("FMAX.D"));
endcase
end else begin
case (op_args.fpu.frm)
0: `TRACE(level, ("FSGNJ.S"));
1: `TRACE(level, ("FSGNJN.S"));
2: `TRACE(level, ("FSGNJX.S"));
3: `TRACE(level, ("FCLASS.S"));
4: `TRACE(level, ("FMV.X.S"));
5: `TRACE(level, ("FMV.S.X"));
6: `TRACE(level, ("FMIN.S"));
7: `TRACE(level, ("FMAX.S"));
endcase
end
end
default: `TRACE(level, ("?"));
endcase
end
`EX_SFU: begin
case (`INST_SFU_BITS'(op_type))
`INST_SFU_TMC: `TRACE(level, ("TMC"));
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"));
`INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end
`INST_SFU_JOIN: `TRACE(level, ("JOIN"));
`INST_SFU_BAR: `TRACE(level, ("BAR"));
`INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end
`INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
`INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
`INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
default: `TRACE(level, ("?"));
endcase
end
default: `TRACE(level, ("?"));
endcase
endtask
task trace_op_args(input int level,
input [`EX_BITS-1:0] ex_type,
input [`INST_OP_BITS-1:0] op_type,
input VX_gpu_pkg::op_args_t op_args
);
case (ex_type)
`EX_ALU: begin
`TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm));
end
`EX_LSU: begin
`TRACE(level, (", offset=0x%0h", op_args.lsu.offset));
end
`EX_FPU: begin
`TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm));
end
`EX_SFU: begin
if (`INST_SFU_IS_CSR(op_type)) begin
`TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm));
end
end
default:;
endcase
endtask
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
case (addr)
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"));
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"));
`VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0"));
`VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1"));
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
default: `TRACE(level, ("?"));
endcase
endtask
`endif
endpackage
`endif // VX_TRACE_PKG_VH

View file

@ -14,7 +14,7 @@
`include "VX_define.vh"
module VX_wctl_unit import VX_gpu_pkg::*; #(
parameter CORE_ID = 0,
parameter `STRING INSTANCE_ID = "",
parameter NUM_LANES = 1
) (
input wire clk,
@ -27,7 +27,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
VX_warp_ctl_if.master warp_ctl_if,
VX_commit_if.master commit_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam LANE_BITS = `CLOG2(NUM_LANES);
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,7 +15,7 @@
`ifdef FPU_DPI
module VX_fpu_dpi import VX_fpu_pkg::*; #(
module VX_fpu_dpi import VX_fpu_pkg::*; #(
parameter NUM_LANES = 1,
parameter TAG_WIDTH = 1,
parameter OUT_BUF = 0
@ -29,7 +29,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0] mask_in,
input wire [TAG_WIDTH-1:0] tag_in,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_FMT_BITS-1:0] fmt,
input wire [`INST_FRM_BITS-1:0] frm,
@ -37,7 +37,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -55,31 +55,31 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
localparam FPC_BITS = `LOG2UP(NUM_FPC);
localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH;
wire [NUM_FPC-1:0] per_core_ready_in;
wire [NUM_FPC-1:0][NUM_LANES-1:0][`XLEN-1:0] per_core_result;
wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out;
reg [NUM_FPC-1:0] per_core_ready_out;
wire [NUM_FPC-1:0] per_core_valid_out;
wire [NUM_FPC-1:0] per_core_has_fflags;
fflags_t [NUM_FPC-1:0] per_core_fflags;
wire [NUM_FPC-1:0] per_core_valid_out;
wire [NUM_FPC-1:0] per_core_has_fflags;
fflags_t [NUM_FPC-1:0] per_core_fflags;
wire div_ready_in, sqrt_ready_in;
wire [NUM_LANES-1:0][`XLEN-1:0] div_result, sqrt_result;
wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out;
wire div_ready_out, sqrt_ready_out;
wire div_valid_out, sqrt_valid_out;
wire div_has_fflags, sqrt_has_fflags;
wire div_valid_out, sqrt_valid_out;
wire div_has_fflags, sqrt_has_fflags;
fflags_t div_fflags, sqrt_fflags;
reg [FPC_BITS-1:0] core_select;
reg is_fadd, is_fsub, is_fmul, is_fmadd, is_fmsub, is_fnmadd, is_fnmsub;
reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f;
reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f;
reg dst_fmt, int_fmt;
reg [NUM_LANES-1:0][63:0] operands [3];
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
operands[0][i] = 64'(dataa[i]);
@ -92,23 +92,23 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
always @(*) begin
is_fadd = 0;
is_fsub = 0;
is_fmul = 0;
is_fsub = 0;
is_fmul = 0;
is_fmadd = 0;
is_fmsub = 0;
is_fnmadd = 0;
is_fnmsub = 0;
is_div = 0;
is_fnmadd = 0;
is_fnmsub = 0;
is_div = 0;
is_fcmp = 0;
is_itof = 0;
is_utof = 0;
is_ftoi = 0;
is_ftou = 0;
is_f2f = 0;
dst_fmt = 0;
int_fmt = 0;
`ifdef FLEN_64
dst_fmt = fmt[0];
`endif
@ -132,23 +132,23 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
`INST_FPU_F2U: begin core_select = FPU_CVT; is_ftou = 1; end
`INST_FPU_I2F: begin core_select = FPU_CVT; is_itof = 1; end
`INST_FPU_U2F: begin core_select = FPU_CVT; is_utof = 1; end
`INST_FPU_F2F: begin core_select = FPU_CVT; is_f2f = 1; end
`INST_FPU_F2F: begin core_select = FPU_CVT; is_f2f = 1; end
default: begin core_select = FPU_NCP; end
endcase
end
generate
generate
begin : fma
reg [NUM_LANES-1:0][`XLEN-1:0] result_fma;
wire [NUM_LANES-1:0][63:0] result_fadd;
wire [NUM_LANES-1:0][63:0] result_fsub;
wire [NUM_LANES-1:0][63:0] result_fmul;
wire [NUM_LANES-1:0][63:0] result_fmadd;
wire [NUM_LANES-1:0][63:0] result_fmsub;
wire [NUM_LANES-1:0][63:0] result_fnmadd;
wire [NUM_LANES-1:0][63:0] result_fnmsub;
reg [NUM_LANES-1:0][63:0] result_fadd;
reg [NUM_LANES-1:0][63:0] result_fsub;
reg [NUM_LANES-1:0][63:0] result_fmul;
reg [NUM_LANES-1:0][63:0] result_fmadd;
reg [NUM_LANES-1:0][63:0] result_fmsub;
reg [NUM_LANES-1:0][63:0] result_fnmadd;
reg [NUM_LANES-1:0][63:0] result_fnmsub;
fflags_t [NUM_LANES-1:0] fflags_fma;
fflags_t [NUM_LANES-1:0] fflags_fadd;
fflags_t [NUM_LANES-1:0] fflags_fsub;
@ -162,7 +162,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
wire fma_ready = per_core_ready_out[FPU_FMA] || ~per_core_valid_out[FPU_FMA];
wire fma_fire = fma_valid && fma_ready;
always @(*) begin
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_fadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]);
dpi_fsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]);
@ -175,20 +175,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
result_fma[i] = is_fadd ? result_fadd[i][`XLEN-1:0] :
is_fsub ? result_fsub[i][`XLEN-1:0] :
is_fmul ? result_fmul[i][`XLEN-1:0] :
is_fmadd ? result_fmadd[i][`XLEN-1:0] :
is_fmadd ? result_fmadd[i][`XLEN-1:0] :
is_fmsub ? result_fmsub[i][`XLEN-1:0] :
is_fnmadd ? result_fnmadd[i][`XLEN-1:0] :
is_fnmadd ? result_fnmadd[i][`XLEN-1:0] :
is_fnmsub ? result_fnmsub[i][`XLEN-1:0] :
'0;
fflags_fma[i] = is_fadd ? fflags_fadd[i] :
is_fsub ? fflags_fsub[i] :
is_fmul ? fflags_fmul[i] :
is_fmadd ? fflags_fmadd[i] :
is_fmadd ? fflags_fmadd[i] :
is_fmsub ? fflags_fmsub[i] :
is_fnmadd ? fflags_fnmadd[i] :
is_fnmsub ? fflags_fnmsub[i] :
'0;
is_fnmadd ? fflags_fnmadd[i] :
is_fnmsub ? fflags_fnmsub[i] :
'0;
end
end
@ -213,19 +213,19 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
end
endgenerate
generate
generate
begin : fdiv
reg [NUM_LANES-1:0][`XLEN-1:0] result_fdiv_r;
wire [NUM_LANES-1:0][63:0] result_fdiv;
reg [NUM_LANES-1:0][63:0] result_fdiv;
fflags_t [NUM_LANES-1:0] fflags_fdiv;
wire fdiv_valid = (valid_in && core_select == FPU_DIVSQRT) && is_div;
wire fdiv_ready = div_ready_out || ~div_valid_out;
wire fdiv_fire = fdiv_valid && fdiv_ready;
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_fdiv (fdiv_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]);
result_fdiv_r[i] = result_fdiv[i][`XLEN-1:0];
end
@ -252,18 +252,18 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
end
endgenerate
generate
generate
begin : fsqrt
reg [NUM_LANES-1:0][`XLEN-1:0] result_fsqrt_r;
wire [NUM_LANES-1:0][63:0] result_fsqrt;
reg [NUM_LANES-1:0][63:0] result_fsqrt;
fflags_t [NUM_LANES-1:0] fflags_fsqrt;
wire fsqrt_valid = (valid_in && core_select == FPU_DIVSQRT) && ~is_div;
wire fsqrt_ready = sqrt_ready_out || ~sqrt_valid_out;
wire fsqrt_ready = sqrt_ready_out || ~sqrt_valid_out;
wire fsqrt_fire = fsqrt_valid && fsqrt_ready;
always @(*) begin
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_fsqrt (fsqrt_fire, int'(dst_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]);
result_fsqrt_r[i] = result_fsqrt[i][`XLEN-1:0];
@ -295,12 +295,12 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
begin : fcvt
reg [NUM_LANES-1:0][`XLEN-1:0] result_fcvt;
wire [NUM_LANES-1:0][63:0] result_itof;
wire [NUM_LANES-1:0][63:0] result_utof;
wire [NUM_LANES-1:0][63:0] result_ftoi;
wire [NUM_LANES-1:0][63:0] result_ftou;
wire [NUM_LANES-1:0][63:0] result_f2f;
reg [NUM_LANES-1:0][63:0] result_itof;
reg [NUM_LANES-1:0][63:0] result_utof;
reg [NUM_LANES-1:0][63:0] result_ftoi;
reg [NUM_LANES-1:0][63:0] result_ftou;
reg [NUM_LANES-1:0][63:0] result_f2f;
fflags_t [NUM_LANES-1:0] fflags_fcvt;
fflags_t [NUM_LANES-1:0] fflags_itof;
fflags_t [NUM_LANES-1:0] fflags_utof;
@ -310,20 +310,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
wire fcvt_valid = (valid_in && core_select == FPU_CVT);
wire fcvt_ready = per_core_ready_out[FPU_CVT] || ~per_core_valid_out[FPU_CVT];
wire fcvt_fire = fcvt_valid && fcvt_ready;
always @(*) begin
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_itof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]);
dpi_utof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]);
dpi_ftoi (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]);
dpi_ftou (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]);
dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]);
dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]);
result_fcvt[i] = is_itof ? result_itof[i][`XLEN-1:0] :
is_utof ? result_utof[i][`XLEN-1:0] :
is_ftoi ? result_ftoi[i][`XLEN-1:0] :
is_ftou ? result_ftou[i][`XLEN-1:0] :
is_f2f ? result_f2f[i][`XLEN-1:0] :
is_ftou ? result_ftou[i][`XLEN-1:0] :
is_f2f ? result_f2f[i][`XLEN-1:0] :
'0;
fflags_fcvt[i] = is_itof ? fflags_itof[i] :
@ -355,19 +355,19 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
end
endgenerate
generate
generate
begin : fncp
reg [NUM_LANES-1:0][`XLEN-1:0] result_fncp;
wire [NUM_LANES-1:0][63:0] result_fclss;
wire [NUM_LANES-1:0][63:0] result_flt;
wire [NUM_LANES-1:0][63:0] result_fle;
wire [NUM_LANES-1:0][63:0] result_feq;
wire [NUM_LANES-1:0][63:0] result_fmin;
wire [NUM_LANES-1:0][63:0] result_fmax;
wire [NUM_LANES-1:0][63:0] result_fsgnj;
wire [NUM_LANES-1:0][63:0] result_fsgnjn;
wire [NUM_LANES-1:0][63:0] result_fsgnjx;
reg [NUM_LANES-1:0][63:0] result_fclss;
reg [NUM_LANES-1:0][63:0] result_flt;
reg [NUM_LANES-1:0][63:0] result_fle;
reg [NUM_LANES-1:0][63:0] result_feq;
reg [NUM_LANES-1:0][63:0] result_fmin;
reg [NUM_LANES-1:0][63:0] result_fmax;
reg [NUM_LANES-1:0][63:0] result_fsgnj;
reg [NUM_LANES-1:0][63:0] result_fsgnjn;
reg [NUM_LANES-1:0][63:0] result_fsgnjx;
reg [NUM_LANES-1:0][63:0] result_fmvx;
reg [NUM_LANES-1:0][63:0] result_fmvf;
@ -381,15 +381,15 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
wire fncp_valid = (valid_in && core_select == FPU_NCP);
wire fncp_ready = per_core_ready_out[FPU_NCP] || ~per_core_valid_out[FPU_NCP];
wire fncp_fire = fncp_valid && fncp_ready;
always @(*) begin
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_fclss (fncp_fire, int'(dst_fmt), operands[0][i], result_fclss[i]);
dpi_fle (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]);
dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]);
dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]);
dpi_feq (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]);
dpi_fmin (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]);
dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]);
dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]);
dpi_fsgnj (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnj[i]);
dpi_fsgnjn (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]);
dpi_fsgnjx (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]);
@ -431,7 +431,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
.data_in ({fncp_valid, tag_in, has_fflags_fncp, result_fncp, fflags_merged}),
.data_out ({per_core_valid_out[FPU_NCP], per_core_tag_out[FPU_NCP], per_core_has_fflags[FPU_NCP], per_core_result[FPU_NCP], per_core_fflags[FPU_NCP]})
);
assign per_core_ready_in[FPU_NCP] = fncp_ready;
end
@ -443,15 +443,15 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
VX_stream_arb #(
.NUM_INPUTS (2),
.DATAW (RSP_DATAW),
.DATAW (RSP_DATAW),
.ARBITER ("R"),
.OUT_BUF (0)
) div_sqrt_arb (
.clk (clk),
.reset (reset),
.valid_in ({sqrt_valid_out, div_valid_out}),
.valid_in ({sqrt_valid_out, div_valid_out}),
.ready_in ({sqrt_ready_out, div_ready_out}),
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
{div_result, div_has_fflags, div_fflags, div_tag_out}}),
.data_out ({per_core_result[FPU_DIVSQRT], per_core_has_fflags[FPU_DIVSQRT], per_core_fflags[FPU_DIVSQRT], per_core_tag_out[FPU_DIVSQRT]}),
.valid_out (per_core_valid_out[FPU_DIVSQRT]),
@ -469,13 +469,13 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
VX_stream_arb #(
.NUM_INPUTS (NUM_FPC),
.DATAW (RSP_DATAW),
.ARBITER ("R"),
.DATAW (RSP_DATAW),
.ARBITER ("F"),
.OUT_BUF (OUT_BUF)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (per_core_valid_out),
.valid_in (per_core_valid_out),
.ready_in (per_core_ready_out),
.data_in (per_core_data_out),
.data_out ({result, has_fflags, fflags, tag_out}),

View file

@ -289,14 +289,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
end
wire [NUM_LANES-1:0][31:0] result_s;
wire [1:0] op_ret_int_out;
`UNUSED_VAR (op_ret_int_out)
VX_stream_arb #(
.NUM_INPUTS (NUM_FPC),
.DATAW (RSP_DATAW + 2),
.ARBITER ("R"),
.ARBITER ("F"),
.OUT_BUF (OUT_BUF)
) rsp_arb (
.clk (clk),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -21,7 +21,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
parameter TAG_WIDTH = 1
) (
input wire clk,
input wire reset,
input wire reset,
output wire ready_in,
input wire valid_in,
@ -29,7 +29,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0] mask_in,
input wire [TAG_WIDTH-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm,
input wire is_madd,
@ -39,7 +39,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
input wire [NUM_LANES-1:0][31:0] datac,
output wire [NUM_LANES-1:0][31:0] result,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -52,11 +52,11 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
`UNUSED_VAR (frm)
wire [NUM_LANES-1:0][3*32-1:0] data_in;
wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
wire pe_enable;
wire pe_enable;
wire [NUM_PES-1:0][3*32-1:0] pe_data_in;
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
@ -66,7 +66,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
always @(*) begin
if (is_madd) begin
// MADD / MSUB / NMADD / NMSUB
a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
b[i] = datab[i];
c[i] = (is_neg ^ is_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i];
end else begin
@ -81,7 +81,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
b[i] = dataa[i];
c[i] = is_sub ? {~datab[i][31], datab[i][30:0]} : datab[i];
end
end
end
end
end
@ -90,15 +90,15 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
assign data_in[i][32 +: 32] = b[i];
assign data_in[i][64 +: 32] = c[i];
end
VX_pe_serializer #(
.NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES),
.NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES),
.LATENCY (`LATENCY_FMA),
.DATA_IN_WIDTH(3*32),
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
.PE_REG (1)
.PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0)
) pe_serializer (
.clk (clk),
.reset (reset),
@ -123,7 +123,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
fflags_t [NUM_LANES-1:0] per_lane_fflags;
`ifdef QUARTUS
for (genvar i = 0; i < NUM_PES; ++i) begin
acl_fmadd fmadd (
.clk (clk),
@ -136,7 +136,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
);
assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x;
end
assign has_fflags = 0;
assign per_lane_fflags = 'x;
@ -144,7 +144,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
for (genvar i = 0; i < NUM_PES; ++i) begin
wire [2:0] tuser;
xil_fma fma (
.aclk (clk),
.aclken (pe_enable),
@ -172,15 +172,15 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
`UNUSED_VAR (r)
fflags_t f;
always @(*) begin
always @(*) begin
dpi_fmadd (
pe_enable,
int'(0),
{32'hffffffff, pe_data_in[i][0 +: 32]},
{32'hffffffff, pe_data_in[i][32 +: 32]},
{32'hffffffff, pe_data_in[i][64 +: 32]},
frm,
r,
pe_enable,
int'(0),
{32'hffffffff, pe_data_in[i][0 +: 32]},
{32'hffffffff, pe_data_in[i][32 +: 32]},
{32'hffffffff, pe_data_in[i][64 +: 32]},
frm,
r,
f
);
end

View file

@ -105,7 +105,7 @@ module VX_fpu_fpnew
`UNUSED_VAR (fmt)
always @(*) begin
fpu_op = 'x;
fpu_op = fpnew_pkg::operation_e'('x);
fpu_rnd = frm;
fpu_op_mod = 0;
fpu_has_fflags = 1;

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,17 +15,14 @@
interface VX_commit_sched_if ();
wire [`ISSUE_WIDTH-1:0] committed;
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] committed_wid;
wire [`NUM_WARPS-1:0] committed_warps;
modport master (
output committed,
output committed_wid
output committed_warps
);
modport slave (
input committed,
input committed_wid
input committed_warps
);
endinterface

View file

@ -13,11 +13,14 @@
`include "VX_define.vh"
interface VX_decode_if import VX_gpu_pkg::*; ();
interface VX_decode_if import VX_gpu_pkg::*; #(
parameter NUM_WARPS = `NUM_WARPS,
parameter NW_WIDTH = `LOG2UP(NUM_WARPS)
);
typedef struct packed {
logic [`UUID_WIDTH-1:0] uuid;
logic [`NW_WIDTH-1:0] wid;
logic [NW_WIDTH-1:0] wid;
logic [`NUM_THREADS-1:0] tmask;
logic [`PC_BITS-1:0] PC;
logic [`EX_BITS-1:0] ex_type;
@ -34,7 +37,7 @@ interface VX_decode_if import VX_gpu_pkg::*; ();
data_t data;
logic ready;
`ifndef L1_ENABLE
wire [`NUM_WARPS-1:0] ibuf_pop;
wire [NUM_WARPS-1:0] ibuf_pop;
`endif
modport master (

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -13,39 +13,29 @@
`include "VX_define.vh"
interface VX_pipeline_perf_if ();
wire [`PERF_CTR_BITS-1:0] sched_idles;
wire [`PERF_CTR_BITS-1:0] sched_stalls;
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
wire [`PERF_CTR_BITS-1:0] scb_stalls;
wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS];
wire [`PERF_CTR_BITS-1:0] sfu_uses [`NUM_SFU_UNITS];
interface VX_pipeline_perf_if import VX_gpu_pkg::*; ();
sched_perf_t sched;
issue_perf_t issue;
wire [`PERF_CTR_BITS-1:0] ifetches;
wire [`PERF_CTR_BITS-1:0] loads;
wire [`PERF_CTR_BITS-1:0] stores;
wire [`PERF_CTR_BITS-1:0] stores;
wire [`PERF_CTR_BITS-1:0] ifetch_latency;
wire [`PERF_CTR_BITS-1:0] load_latency;
modport schedule (
output sched_idles,
output sched_stalls
);
modport issue (
output ibf_stalls,
output scb_stalls,
output units_uses,
output sfu_uses
modport master (
output sched,
output issue,
output ifetches,
output loads,
output stores,
output ifetch_latency,
output load_latency
);
modport slave (
input sched_idles,
input sched_stalls,
input ibf_stalls,
input scb_stalls,
input units_uses,
input sfu_uses,
input sched,
input issue,
input ifetches,
input loads,
input stores,

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,11 +14,11 @@
`include "VX_define.vh"
`TRACING_OFF
module VX_avs_adapter #(
parameter DATA_WIDTH = 1,
parameter ADDR_WIDTH = 1,
module VX_avs_adapter #(
parameter DATA_WIDTH = 1,
parameter ADDR_WIDTH = 1,
parameter BURST_WIDTH = 1,
parameter NUM_BANKS = 1,
parameter NUM_BANKS = 1,
parameter TAG_WIDTH = 1,
parameter RD_QUEUE_SIZE = 1,
parameter REQ_OUT_BUF = 0,
@ -29,15 +29,15 @@ module VX_avs_adapter #(
// Memory request
input wire mem_req_valid,
input wire mem_req_rw,
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
input wire mem_req_rw,
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
input wire [ADDR_WIDTH-1:0] mem_req_addr,
input wire [DATA_WIDTH-1:0] mem_req_data,
input wire [TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_ready,
// Memory response
output wire mem_rsp_valid,
// Memory response
output wire mem_rsp_valid,
output wire [DATA_WIDTH-1:0] mem_rsp_data,
output wire [TAG_WIDTH-1:0] mem_rsp_tag,
input wire mem_rsp_ready,
@ -60,7 +60,7 @@ module VX_avs_adapter #(
localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS;
// Requests handling //////////////////////////////////////////////////////
wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out;
wire [NUM_BANKS-1:0] req_queue_going_full;
@ -70,38 +70,40 @@ module VX_avs_adapter #(
wire [NUM_BANKS-1:0] bank_req_ready;
if (NUM_BANKS > 1) begin
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
end else begin
assign req_bank_sel = '0;
end
assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS];
for (genvar i = 0; i < NUM_BANKS; ++i) begin
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i);
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin
VX_pending_size #(
VX_pending_size #(
.SIZE (RD_QUEUE_SIZE)
) pending_size (
.clk (clk),
.reset (reset),
.incr (req_queue_push[i]),
.decr (req_queue_pop[i]),
.decr (req_queue_pop[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (alm_empty),
.full (req_queue_going_full[i]),
.size (req_queue_size[i]),
`UNUSED_PIN (empty)
);
`UNUSED_PIN (alm_full),
.size (req_queue_size[i])
);
`UNUSED_VAR (req_queue_size)
VX_fifo_queue #(
.DATAW (TAG_WIDTH),
.DEPTH (RD_QUEUE_SIZE)
) rd_req_queue (
.clk (clk),
.reset (reset),
.push (req_queue_push[i]),
.push (req_queue_push[i]),
.pop (req_queue_pop[i]),
.data_in (mem_req_tag),
.data_out (req_queue_tag_out[i]),
@ -111,9 +113,9 @@ module VX_avs_adapter #(
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin
for (genvar i = 0; i < NUM_BANKS; ++i) begin
wire valid_out;
wire rw_out;
wire [DATA_SIZE-1:0] byteen_out;
@ -174,7 +176,7 @@ module VX_avs_adapter #(
.reset (reset),
.push (avs_readdatavalid[i]),
.pop (req_queue_pop[i]),
.data_in (avs_readdata[i]),
.data_in (avs_readdata[i]),
.data_out (rsp_queue_data_out[i]),
.empty (rsp_queue_empty[i]),
`UNUSED_PIN (full),
@ -183,7 +185,7 @@ module VX_avs_adapter #(
`UNUSED_PIN (size)
);
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign rsp_arb_valid_in[i] = !rsp_queue_empty[i];
assign rsp_arb_data_in[i] = {rsp_queue_data_out[i], req_queue_tag_out[i]};

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,22 +16,21 @@
`TRACING_OFF
module VX_cyclic_arbiter #(
parameter NUM_REQS = 1,
parameter LOCK_ENABLE = 0,
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) (
input wire clk,
input wire reset,
input wire [NUM_REQS-1:0] requests,
input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot,
output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid,
input wire grant_unlock
input wire grant_ready
);
if (NUM_REQS == 1) begin
if (NUM_REQS == 1) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (reset)
assign grant_index = '0;
assign grant_onehot = requests;
assign grant_valid = requests[0];
@ -45,10 +44,10 @@ module VX_cyclic_arbiter #(
always @(posedge clk) begin
if (reset) begin
grant_index_r <= '0;
end else begin
end else begin
if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin
grant_index_r <= '0;
end else if (!LOCK_ENABLE || ~grant_valid || grant_unlock) begin
end else if (~grant_valid || grant_ready) begin
grant_index_r <= grant_index_r + LOG_NUM_REQS'(1);
end
end
@ -60,11 +59,11 @@ module VX_cyclic_arbiter #(
grant_onehot_r[grant_index_r] = 1'b1;
end
assign grant_index = grant_index_r;
assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r;
assign grant_valid = requests[grant_index_r];
end
endmodule
`TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,20 +17,21 @@
module VX_dp_ram #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter ADDR_MIN = 0,
parameter WRENW = 1,
parameter OUT_REG = 0,
parameter NO_RWCHECK = 0,
parameter LUTRAM = 0,
parameter LUTRAM = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0,
parameter ADDRW = `LOG2UP(SIZE)
) (
) (
input wire clk,
input wire read,
input wire write,
input wire [WRENW-1:0] wren,
input wire [ADDRW-1:0] waddr,
input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata,
input wire [ADDRW-1:0] raddr,
output wire [DATAW-1:0] rdata
@ -48,16 +49,16 @@ module VX_dp_ram #(
ram[i] = INIT_VALUE; \
end \
end
`UNUSED_VAR (read)
`ifdef SYNTHESIS
if (WRENW > 1) begin
`ifdef QUARTUS
if (LUTRAM != 0) begin
if (OUT_REG != 0) begin
if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r;
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0];
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -72,7 +73,7 @@ module VX_dp_ram #(
end
assign rdata = rdata_r;
end else begin
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0];
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -87,7 +88,7 @@ module VX_dp_ram #(
end else begin
if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r;
reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0];
reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -103,7 +104,7 @@ module VX_dp_ram #(
assign rdata = rdata_r;
end else begin
if (NO_RWCHECK != 0) begin
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0];
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -115,7 +116,7 @@ module VX_dp_ram #(
end
assign rdata = ram[raddr];
end else begin
reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0];
reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -132,9 +133,9 @@ module VX_dp_ram #(
`else
// default synthesis
if (LUTRAM != 0) begin
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0];
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
if (OUT_REG != 0) begin
if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (write) begin
@ -161,7 +162,7 @@ module VX_dp_ram #(
end
end else begin
if (OUT_REG != 0) begin
reg [DATAW-1:0] ram [SIZE-1:0];
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
reg [DATAW-1:0] rdata_r;
`RAM_INITIALIZATION
always @(posedge clk) begin
@ -178,7 +179,7 @@ module VX_dp_ram #(
assign rdata = rdata_r;
end else begin
if (NO_RWCHECK != 0) begin
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0];
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -190,7 +191,7 @@ module VX_dp_ram #(
end
assign rdata = ram[raddr];
end else begin
reg [DATAW-1:0] ram [SIZE-1:0];
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -208,9 +209,9 @@ module VX_dp_ram #(
end else begin
// (WRENW == 1)
if (LUTRAM != 0) begin
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0];
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
if (OUT_REG != 0) begin
if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (write) begin
@ -231,7 +232,7 @@ module VX_dp_ram #(
end
end else begin
if (OUT_REG != 0) begin
reg [DATAW-1:0] ram [SIZE-1:0];
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
reg [DATAW-1:0] rdata_r;
`RAM_INITIALIZATION
always @(posedge clk) begin
@ -245,7 +246,7 @@ module VX_dp_ram #(
assign rdata = rdata_r;
end else begin
if (NO_RWCHECK != 0) begin
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0];
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -254,7 +255,7 @@ module VX_dp_ram #(
end
assign rdata = ram[raddr];
end else begin
reg [DATAW-1:0] ram [SIZE-1:0];
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -265,10 +266,10 @@ module VX_dp_ram #(
end
end
end
end
end
`else
// RAM emulation
reg [DATAW-1:0] ram [SIZE-1:0];
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
wire [DATAW-1:0] ram_n;
@ -276,8 +277,8 @@ module VX_dp_ram #(
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
end
if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r;
if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (write) begin
ram[waddr] <= ram_n;
@ -287,7 +288,7 @@ module VX_dp_ram #(
end
end
assign rdata = rdata_r;
end else begin
end else begin
reg [DATAW-1:0] prev_data;
reg [ADDRW-1:0] prev_waddr;
reg prev_write;
@ -298,7 +299,7 @@ module VX_dp_ram #(
prev_write <= (| wren);
prev_data <= ram[waddr];
prev_waddr <= waddr;
end
end
if (LUTRAM || !NO_RWCHECK) begin
`UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data)

View file

@ -0,0 +1,115 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_platform.vh"
`TRACING_OFF
module VX_dp_ram_rst #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter ADDR_MIN = 0,
parameter WRENW = 1,
parameter OUT_REG = 0,
parameter NO_RWCHECK = 0,
parameter LUTRAM = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0,
parameter ADDRW = `LOG2UP(SIZE)
) (
input wire clk,
input wire reset,
input wire read,
input wire write,
input wire [WRENW-1:0] wren,
input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata,
input wire [ADDRW-1:0] raddr,
output wire [DATAW-1:0] rdata
);
localparam WSELW = DATAW / WRENW;
`STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter"))
`define RAM_INITIALIZATION \
if (INIT_ENABLE != 0) begin \
if (INIT_FILE != "") begin \
initial $readmemh(INIT_FILE, ram); \
end else begin \
initial \
for (integer i = 0; i < SIZE; ++i) \
ram[i] = INIT_VALUE; \
end \
end
`UNUSED_VAR (read)
// RAM emulation
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
wire [DATAW-1:0] ram_n;
for (genvar i = 0; i < WRENW; ++i) begin
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
end
if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (reset) begin
for (integer i = 0; i < SIZE; ++i) begin
ram[i] <= DATAW'(INIT_VALUE);
end
rdata_r <= '0;
end else begin
if (write) begin
ram[waddr] <= ram_n;
end
if (read) begin
rdata_r <= ram[raddr];
end
end
end
assign rdata = rdata_r;
end else begin
reg [DATAW-1:0] prev_data;
reg [ADDRW-1:0] prev_waddr;
reg prev_write;
always @(posedge clk) begin
if (reset) begin
for (integer i = 0; i < SIZE; ++i) begin
ram[i] <= DATAW'(INIT_VALUE);
end
prev_write <= 0;
prev_data <= '0;
prev_waddr <= '0;
end else begin
if (write) begin
ram[waddr] <= ram_n;
end
prev_write <= (| wren);
prev_data <= ram[waddr];
prev_waddr <= waddr;
end
end
if (LUTRAM || !NO_RWCHECK) begin
`UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data)
`UNUSED_VAR (prev_waddr)
assign rdata = ram[raddr];
end else begin
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
end
end
endmodule
`TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -19,14 +19,14 @@ module VX_elastic_buffer #(
parameter SIZE = 1,
parameter OUT_REG = 0,
parameter LUTRAM = 0
) (
) (
input wire clk,
input wire reset,
input wire valid_in,
output wire ready_in,
output wire ready_in,
input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out,
input wire ready_out,
output wire valid_out
@ -55,7 +55,7 @@ module VX_elastic_buffer #(
.ready_out (ready_out)
);
end else if (SIZE == 2) begin
end else if (SIZE == 2 && LUTRAM == 0) begin
VX_skid_buffer #(
.DATAW (DATAW),
@ -71,9 +71,9 @@ module VX_elastic_buffer #(
.data_out (data_out),
.ready_out (ready_out)
);
end else begin
wire empty, full;
wire [DATAW-1:0] data_out_t;
@ -93,7 +93,7 @@ module VX_elastic_buffer #(
.push (push),
.pop (pop),
.data_in(data_in),
.data_out(data_out_t),
.data_out(data_out_t),
.empty (empty),
.full (full),
`UNUSED_PIN (alm_empty),
@ -105,15 +105,15 @@ module VX_elastic_buffer #(
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (OUT_REG == 2)
.SIZE ((OUT_REG == 2) ? 1 : 0)
) out_buf (
.clk (clk),
.reset (reset),
.valid_in (~empty),
.data_in (data_out_t),
.ready_in (ready_out_t),
.ready_in (ready_out_t),
.valid_out (valid_out),
.data_out (data_out),
.data_out (data_out),
.ready_out (ready_out)
);

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,53 +16,52 @@
`TRACING_OFF
module VX_fair_arbiter #(
parameter NUM_REQS = 1,
parameter LOCK_ENABLE = 0,
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) (
input wire clk,
input wire reset,
input wire [NUM_REQS-1:0] requests,
input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot,
output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid,
input wire grant_unlock
input wire grant_ready
);
if (NUM_REQS == 1) begin
if (NUM_REQS == 1) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (grant_unlock)
`UNUSED_VAR (reset)
`UNUSED_VAR (grant_ready)
assign grant_index = '0;
assign grant_onehot = requests;
assign grant_valid = requests[0];
end else begin
end else begin
reg [NUM_REQS-1:0] buffer;
reg [NUM_REQS-1:0] grant_mask;
wire [NUM_REQS-1:0] buffer_qual = buffer & requests;
wire [NUM_REQS-1:0] requests_qual = (| buffer) ? buffer_qual : requests;
wire [NUM_REQS-1:0] buffer_n = requests_qual & ~grant_onehot;
wire [NUM_REQS-1:0] requests_rem = requests & ~grant_mask;
wire rem_valid = (| requests_rem);
wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_rem : requests;
always @(posedge clk) begin
if (reset) begin
buffer <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin
buffer <= buffer_n;
grant_mask <= '0;
end else if (grant_ready) begin
grant_mask <= rem_valid ? (grant_mask | grant_onehot) : grant_onehot;
end
end
VX_priority_arbiter #(
.NUM_REQS (NUM_REQS)
) priority_arbiter (
.requests (requests_qual),
.requests (requests_qual),
.grant_index (grant_index),
.grant_onehot (grant_onehot),
.grant_valid (grant_valid)
);
end
endmodule
`TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -22,28 +22,28 @@ module VX_fifo_queue #(
parameter OUT_REG = 0,
parameter LUTRAM = 1,
parameter SIZEW = `CLOG2(DEPTH+1)
) (
) (
input wire clk,
input wire reset,
input wire reset,
input wire push,
input wire pop,
input wire pop,
input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out,
output wire empty,
output wire empty,
output wire alm_empty,
output wire full,
output wire full,
output wire alm_full,
output wire [SIZEW-1:0] size
);
localparam ADDRW = `CLOG2(DEPTH);
);
localparam ADDRW = `CLOG2(DEPTH);
`STATIC_ASSERT(ALM_FULL > 0, ("alm_full must be greater than 0!"))
`STATIC_ASSERT(ALM_FULL < DEPTH, ("alm_full must be smaller than size!"))
`STATIC_ASSERT(ALM_EMPTY > 0, ("alm_empty must be greater than 0!"))
`STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!"))
`STATIC_ASSERT(`IS_POW2(DEPTH), ("size must be a power of 2!"))
if (DEPTH == 1) begin
reg [DATAW-1:0] head_r;
@ -52,7 +52,7 @@ module VX_fifo_queue #(
always @(posedge clk) begin
if (reset) begin
head_r <= '0;
size_r <= '0;
size_r <= '0;
end else begin
`ASSERT(~push || ~full, ("runtime error: writing to a full queue"));
`ASSERT(~pop || ~empty, ("runtime error: reading an empty queue"));
@ -63,11 +63,11 @@ module VX_fifo_queue #(
end else if (pop) begin
size_r <= '0;
end
if (push) begin
if (push) begin
head_r <= data_in;
end
end
end
end
assign data_out = head_r;
assign empty = (size_r == 0);
@ -77,7 +77,7 @@ module VX_fifo_queue #(
assign size = size_r;
end else begin
reg empty_r, alm_empty_r;
reg full_r, alm_full_r;
reg [ADDRW-1:0] used_r;
@ -86,8 +86,8 @@ module VX_fifo_queue #(
always @(posedge clk) begin
if (reset) begin
empty_r <= 1;
alm_empty_r <= 1;
full_r <= 0;
alm_empty_r <= 1;
full_r <= 0;
alm_full_r <= 0;
used_r <= '0;
end else begin
@ -106,21 +106,21 @@ module VX_fifo_queue #(
end else if (pop) begin
full_r <= 0;
if (used_r == ADDRW'(ALM_FULL))
alm_full_r <= 0;
alm_full_r <= 0;
if (used_r == ADDRW'(1))
empty_r <= 1;
if (used_r == ADDRW'(ALM_EMPTY+1))
alm_empty_r <= 1;
end
used_r <= used_n;
end
end
used_r <= used_n;
end
end
if (DEPTH == 2) begin
if (DEPTH == 2 && LUTRAM == 0) begin
assign used_n = used_r ^ (push ^ pop);
if (0 == OUT_REG) begin
if (0 == OUT_REG) begin
reg [1:0][DATAW-1:0] shift_reg;
@ -131,8 +131,8 @@ module VX_fifo_queue #(
end
end
assign data_out = shift_reg[!used_r[0]];
assign data_out = shift_reg[!used_r[0]];
end else begin
reg [DATAW-1:0] data_out_r;
@ -152,16 +152,16 @@ module VX_fifo_queue #(
assign data_out = data_out_r;
end
end else begin
assign used_n = $signed(used_r) + ADDRW'($signed(2'(push) - 2'(pop)));
if (0 == OUT_REG) begin
if (0 == OUT_REG) begin
reg [ADDRW-1:0] rd_ptr_r;
reg [ADDRW-1:0] wr_ptr_r;
always @(posedge clk) begin
if (reset) begin
rd_ptr_r <= '0;
@ -169,7 +169,7 @@ module VX_fifo_queue #(
end else begin
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
rd_ptr_r <= rd_ptr_r + ADDRW'(pop);
end
end
end
VX_dp_ram #(
@ -179,8 +179,8 @@ module VX_fifo_queue #(
) dp_ram (
.clk(clk),
.read (1'b1),
.write (push),
`UNUSED_PIN (wren),
.write (push),
`UNUSED_PIN (wren),
.waddr (wr_ptr_r),
.wdata (data_in),
.raddr (rd_ptr_r),
@ -196,18 +196,18 @@ module VX_fifo_queue #(
reg [ADDRW-1:0] rd_ptr_n_r;
always @(posedge clk) begin
if (reset) begin
if (reset) begin
wr_ptr_r <= '0;
rd_ptr_r <= '0;
rd_ptr_n_r <= 1;
end else begin
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
if (pop) begin
rd_ptr_r <= rd_ptr_n_r;
if (DEPTH > 2) begin
rd_ptr_r <= rd_ptr_n_r;
if (DEPTH > 2) begin
rd_ptr_n_r <= rd_ptr_r + ADDRW'(2);
end else begin // (DEPTH == 2);
rd_ptr_n_r <= ~rd_ptr_n_r;
rd_ptr_n_r <= ~rd_ptr_n_r;
end
end
end
@ -227,13 +227,13 @@ module VX_fifo_queue #(
) dp_ram (
.clk (clk),
.read (1'b1),
.write (push),
`UNUSED_PIN (wren),
.write (push),
`UNUSED_PIN (wren),
.waddr (wr_ptr_r),
.wdata (data_in),
.raddr (rd_ptr_n_r),
.rdata (dout)
);
);
always @(posedge clk) begin
if (push && (empty_r || (going_empty && pop))) begin
@ -246,12 +246,12 @@ module VX_fifo_queue #(
assign data_out = dout_r;
end
end
assign empty = empty_r;
assign empty = empty_r;
assign alm_empty = alm_empty_r;
assign full = full_r;
assign alm_full = alm_full_r;
assign size = {full_r, used_r};
assign size = {full_r, used_r};
end
endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,29 +16,27 @@
`TRACING_OFF
module VX_generic_arbiter #(
parameter NUM_REQS = 1,
parameter LOCK_ENABLE = 0,
parameter `STRING TYPE = "P",
parameter `STRING TYPE = "P",
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) (
input wire clk,
input wire reset,
input wire reset,
input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot,
output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid,
input wire grant_unlock
input wire grant_ready
);
if (TYPE == "P") begin
`UNUSED_PARAM (LOCK_ENABLE)
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (grant_unlock)
`UNUSED_VAR (grant_ready)
VX_priority_arbiter #(
.NUM_REQS (NUM_REQS)
) priority_arbiter (
.requests (requests),
.requests (requests),
.grant_valid (grant_valid),
.grant_index (grant_index),
.grant_onehot (grant_onehot)
@ -47,68 +45,64 @@ module VX_generic_arbiter #(
end else if (TYPE == "R") begin
VX_rr_arbiter #(
.NUM_REQS (NUM_REQS),
.LOCK_ENABLE (LOCK_ENABLE)
.NUM_REQS (NUM_REQS)
) rr_arbiter (
.clk (clk),
.reset (reset),
.requests (requests),
.reset (reset),
.requests (requests),
.grant_valid (grant_valid),
.grant_index (grant_index),
.grant_onehot (grant_onehot),
.grant_unlock (grant_unlock)
.grant_ready (grant_ready)
);
end else if (TYPE == "F") begin
VX_fair_arbiter #(
.NUM_REQS (NUM_REQS),
.LOCK_ENABLE (LOCK_ENABLE)
.NUM_REQS (NUM_REQS)
) fair_arbiter (
.clk (clk),
.reset (reset),
.requests (requests),
.requests (requests),
.grant_valid (grant_valid),
.grant_index (grant_index),
.grant_onehot (grant_onehot),
.grant_unlock (grant_unlock)
.grant_ready (grant_ready)
);
end else if (TYPE == "M") begin
VX_matrix_arbiter #(
.NUM_REQS (NUM_REQS),
.LOCK_ENABLE (LOCK_ENABLE)
.NUM_REQS (NUM_REQS)
) matrix_arbiter (
.clk (clk),
.reset (reset),
.requests (requests),
.requests (requests),
.grant_valid (grant_valid),
.grant_index (grant_index),
.grant_onehot (grant_onehot),
.grant_unlock (grant_unlock)
.grant_ready (grant_ready)
);
end else if (TYPE == "C") begin
VX_cyclic_arbiter #(
.NUM_REQS (NUM_REQS),
.LOCK_ENABLE (LOCK_ENABLE)
.NUM_REQS (NUM_REQS)
) cyclic_arbiter (
.clk (clk),
.reset (reset),
.requests (requests),
.requests (requests),
.grant_valid (grant_valid),
.grant_index (grant_index),
.grant_onehot (grant_onehot),
.grant_unlock (grant_unlock)
.grant_ready (grant_ready)
);
end else begin
`ERROR(("invalid parameter"));
end
endmodule
`TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,52 +16,51 @@
`TRACING_OFF
module VX_matrix_arbiter #(
parameter NUM_REQS = 1,
parameter LOCK_ENABLE = 0,
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) (
input wire clk,
input wire reset,
input wire reset,
input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot,
output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid,
input wire grant_unlock
input wire grant_ready
);
if (NUM_REQS == 1) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (grant_unlock)
`UNUSED_VAR (grant_ready)
assign grant_index = '0;
assign grant_onehot = requests;
assign grant_valid = requests[0];
end else begin
reg [NUM_REQS-1:1] state [NUM_REQS-1:0];
reg [NUM_REQS-1:1] state [NUM_REQS-1:0];
wire [NUM_REQS-1:0] pri [NUM_REQS-1:0];
wire [NUM_REQS-1:0] grant_unqual;
for (genvar i = 0; i < NUM_REQS; ++i) begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
for (genvar j = 0; j < NUM_REQS; ++j) begin
if (j > i) begin
assign pri[j][i] = requests[i] && state[i][j];
end
end
else if (j < i) begin
assign pri[j][i] = requests[i] && !state[j][i];
end
end
else begin
assign pri[j][i] = 0;
assign pri[j][i] = 0;
end
end
assign grant_unqual[i] = requests[i] && !(| pri[i]);
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
for (genvar j = i + 1; j < NUM_REQS; ++j) begin
always @(posedge clk) begin
if (reset) begin
always @(posedge clk) begin
if (reset) begin
state[i][j] <= '0;
end else begin
state[i][j] <= (state[i][j] || grant_unqual[j]) && !grant_unqual[i];
@ -70,20 +69,15 @@ module VX_matrix_arbiter #(
end
end
if (LOCK_ENABLE == 0) begin
`UNUSED_VAR (grant_unlock)
assign grant_onehot = grant_unqual;
end else begin
reg [NUM_REQS-1:0] grant_unqual_prev;
always @(posedge clk) begin
if (reset) begin
grant_unqual_prev <= '0;
end else if (grant_unlock) begin
grant_unqual_prev <= grant_unqual;
end
reg [NUM_REQS-1:0] grant_unqual_prev;
always @(posedge clk) begin
if (reset) begin
grant_unqual_prev <= '0;
end else if (grant_ready) begin
grant_unqual_prev <= grant_unqual;
end
assign grant_onehot = grant_unlock ? grant_unqual : grant_unqual_prev;
end
assign grant_onehot = grant_ready ? grant_unqual : grant_unqual_prev;
VX_onehot_encoder #(
.N (NUM_REQS)
@ -96,6 +90,6 @@ module VX_matrix_arbiter #(
assign grant_valid = (| requests);
end
endmodule
`TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -24,13 +24,13 @@ module VX_mem_coalescer #(
parameter TAG_WIDTH = 8,
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
parameter QUEUE_SIZE = 8,
parameter DATA_IN_WIDTH = DATA_IN_SIZE * 8,
parameter DATA_OUT_WIDTH= DATA_OUT_SIZE * 8,
parameter OUT_REQS = (NUM_REQS * DATA_IN_WIDTH) / DATA_OUT_WIDTH,
parameter BATCH_SIZE = DATA_OUT_SIZE / DATA_IN_SIZE,
parameter BATCH_SIZE_W = `LOG2UP(BATCH_SIZE),
parameter OUT_ADDR_WIDTH= ADDR_WIDTH - BATCH_SIZE_W,
parameter DATA_RATIO = DATA_OUT_SIZE / DATA_IN_SIZE,
parameter DATA_RATIO_W = `LOG2UP(DATA_RATIO),
parameter OUT_REQS = NUM_REQS / DATA_RATIO,
parameter OUT_ADDR_WIDTH= ADDR_WIDTH - DATA_RATIO_W,
parameter QUEUE_ADDRW = `CLOG2(QUEUE_SIZE),
parameter OUT_TAG_WIDTH = UUID_WIDTH + QUEUE_ADDRW
) (
@ -45,7 +45,7 @@ module VX_mem_coalescer #(
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr,
input wire [NUM_REQS-1:0][ATYPE_WIDTH-1:0] in_req_atype,
input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data,
input wire [TAG_WIDTH-1:0] in_req_tag,
input wire [TAG_WIDTH-1:0] in_req_tag,
output wire in_req_ready,
// Input response
@ -58,7 +58,7 @@ module VX_mem_coalescer #(
// Output request
output wire out_req_valid,
output wire out_req_rw,
output wire [OUT_REQS-1:0] out_req_mask,
output wire [OUT_REQS-1:0] out_req_mask,
output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen,
output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr,
output wire [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype,
@ -78,27 +78,27 @@ module VX_mem_coalescer #(
`STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter"))
`RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("invalid request mask"));
`RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("invalid request mask"));
localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH;
localparam NUM_REQS_W = `LOG2UP(NUM_REQS);
localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH;
localparam NUM_REQS_W = `LOG2UP(NUM_REQS);
// tag + mask + offest
localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * BATCH_SIZE_W);
localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * DATA_RATIO_W);
localparam STATE_SETUP = 0;
localparam STATE_SEND = 1;
logic state_r, state_n;
logic out_req_valid_r, out_req_valid_n;
logic out_req_rw_r, out_req_rw_n;
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
logic [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
logic [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data_r, out_req_data_n;
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
logic in_req_ready_n;
reg state_r, state_n;
reg out_req_valid_r, out_req_valid_n;
reg out_req_rw_r, out_req_rw_n;
reg [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
reg [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
reg [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
reg [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
reg in_req_ready_n;
wire ibuf_push;
wire ibuf_pop;
@ -108,33 +108,45 @@ module VX_mem_coalescer #(
wire ibuf_empty;
wire [IBUF_DATA_WIDTH-1:0] ibuf_din;
wire [IBUF_DATA_WIDTH-1:0] ibuf_dout;
logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n;
logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n;
logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n;
wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx;
wire [NUM_REQS-1:0][OUT_ADDR_WIDTH-1:0] in_addr_base;
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] in_addr_offset;
wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] in_addr_offset;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:BATCH_SIZE_W];
assign in_addr_offset[i] = in_req_addr[i][BATCH_SIZE_W-1:0];
assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:DATA_RATIO_W];
assign in_addr_offset[i] = in_req_addr[i][DATA_RATIO_W-1:0];
end
for (genvar i = 0; i < OUT_REQS; ++i) begin
wire [BATCH_SIZE-1:0] batch_mask = in_req_mask[BATCH_SIZE * i +: BATCH_SIZE] & ~processed_mask_r[BATCH_SIZE * i +: BATCH_SIZE];
wire [BATCH_SIZE_W-1:0] batch_idx;
wire [DATA_RATIO-1:0] batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & ~processed_mask_r[i * DATA_RATIO +: DATA_RATIO];
wire [DATA_RATIO_W-1:0] batch_idx;
VX_priority_encoder #(
.N (BATCH_SIZE)
.N (DATA_RATIO)
) priority_encoder (
.data_in (batch_mask),
.index (batch_idx),
.index (batch_idx),
`UNUSED_PIN (onehot),
.valid_out (batch_valid_n[i])
);
assign seed_idx[i] = NUM_REQS_W'(BATCH_SIZE * i) + NUM_REQS_W'(batch_idx);
assign seed_idx[i] = NUM_REQS_W'(i * DATA_RATIO) + NUM_REQS_W'(batch_idx);
end
for (genvar i = 0; i < OUT_REQS; ++i) begin
assign seed_addr_n[i] = in_addr_base[seed_idx[i]];
assign seed_atype_n[i] = in_req_atype[seed_idx[i]];
end
for (genvar i = 0; i < OUT_REQS; ++i) begin
for (genvar j = 0; j < DATA_RATIO; ++j) begin
assign addr_matches_n[i * DATA_RATIO + j] = (in_addr_base[i * DATA_RATIO + j] == seed_addr_n[i]);
end
end
always @(posedge clk) begin
@ -144,12 +156,13 @@ module VX_mem_coalescer #(
out_req_valid_r <= 0;
end else begin
state_r <= state_n;
out_req_valid_r <= out_req_valid_n;
batch_valid_r <= batch_valid_n;
seed_addr_r <= seed_addr_n;
seed_atype_r <= seed_atype_n;
out_req_rw_r <= out_req_rw_n;
out_req_mask_r <= out_req_mask_n;
seed_atype_r <= seed_atype_n;
addr_matches_r <= addr_matches_n;
out_req_valid_r <= out_req_valid_n;
out_req_mask_r <= out_req_mask_n;
out_req_rw_r <= out_req_rw_n;
out_req_addr_r <= out_req_addr_n;
out_req_atype_r <= out_req_atype_n;
out_req_byteen_r <= out_req_byteen_n;
@ -159,84 +172,77 @@ module VX_mem_coalescer #(
end
end
logic [NUM_REQS-1:0] addr_matches;
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] req_data_merged;
always @(*) begin
addr_matches = '0;
req_byteen_merged = '0;
req_data_merged = 'x;
for (integer i = 0; i < OUT_REQS; ++i) begin
for (integer j = 0; j < BATCH_SIZE; j++) begin
if (in_addr_base[BATCH_SIZE * i + j] == seed_addr_r[i]) begin
addr_matches[BATCH_SIZE * i + j] = 1;
for (integer j = 0; j < DATA_RATIO; ++j) begin
if (current_pmask[i * DATA_RATIO + j]) begin
for (integer k = 0; k < DATA_IN_SIZE; ++k) begin
if (in_req_byteen[DATA_RATIO * i + j][k]) begin
req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1;
req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8];
end
end
end
end
end
end
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches;
wire [OUT_REQS * DATA_RATIO - 1:0] pending_mask;
for (genvar i = 0; i < OUT_REQS * DATA_RATIO; ++i) begin
assign pending_mask[i] = in_req_mask[i] && ~addr_matches_r[i] && ~processed_mask_r[i];
end
wire batch_completed = ~(| pending_mask);
always @(*) begin
state_n = state_r;
out_req_valid_n = out_req_valid_r;
seed_addr_n = seed_addr_r;
seed_atype_n = seed_atype_r;
out_req_rw_n = out_req_rw_r;
out_req_mask_n = out_req_mask_r;
out_req_mask_n = out_req_mask_r;
out_req_rw_n = out_req_rw_r;
out_req_addr_n = out_req_addr_r;
out_req_atype_n = out_req_atype_r;
out_req_byteen_n = out_req_byteen_r;
out_req_data_n = out_req_data_r;
out_req_tag_n = out_req_tag_r;
processed_mask_n = processed_mask_r;
in_req_ready_n = 0;
case (state_r)
STATE_SETUP: begin
// find the next seed address
for (integer i = 0; i < OUT_REQS; ++i) begin
seed_addr_n[i] = in_addr_base[seed_idx[i]];
seed_atype_n[i] = in_req_atype[seed_idx[i]];
end
STATE_SETUP: begin
// wait for pending outgoing request to submit
if (out_req_valid && out_req_ready) begin
out_req_valid_n = 0;
end
if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin
if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin
state_n = STATE_SEND;
end
end
default/*STATE_SEND*/: begin
out_req_valid_n = 1;
out_req_rw_n = in_req_rw;
out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr};
in_req_ready_n = 1;
out_req_byteen_n = '0;
out_req_data_n = 'x;
for (integer i = 0; i < OUT_REQS; ++i) begin
for (integer j = 0; j < BATCH_SIZE; j++) begin
if (in_req_mask[BATCH_SIZE * i + j]) begin
if (addr_matches[BATCH_SIZE * i + j]) begin
for (integer k = 0; k < DATA_IN_SIZE; ++k) begin
if (in_req_byteen[BATCH_SIZE * i + j][k]) begin
out_req_byteen_n[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_SIZE + k +: 1] = 1'b1;
out_req_data_n[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH + k * 8 +: 8] = in_req_data[BATCH_SIZE * i + j][k * 8 +: 8];
end
end
end else begin
if (!processed_mask_r[BATCH_SIZE * i + j]) begin
in_req_ready_n = 0;
end
end
end
end
out_req_mask_n[i] = batch_valid_r[i];
out_req_addr_n[i] = seed_addr_r[i];
out_req_atype_n[i]= seed_atype_r[i];
end
if (in_req_ready_n) begin
out_req_mask_n = batch_valid_r;
out_req_rw_n = in_req_rw;
out_req_addr_n = seed_addr_r;
out_req_atype_n = seed_atype_r;
out_req_byteen_n= req_byteen_merged;
out_req_data_n = req_data_merged;
out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr};
in_req_ready_n = batch_completed;
if (batch_completed) begin
processed_mask_n = '0;
end else begin
processed_mask_n = processed_mask_r | current_pmask;
end
state_n = STATE_SETUP;
end
endcase
@ -246,13 +252,15 @@ module VX_mem_coalescer #(
wire out_rsp_eop;
assign ibuf_push = (state_r == STATE_SEND) && ~in_req_rw;
wire req_sent = (state_r == STATE_SEND);
assign ibuf_push = req_sent && ~in_req_rw;
assign ibuf_pop = out_rsp_fire && out_rsp_eop;
assign ibuf_raddr = out_rsp_tag[QUEUE_ADDRW-1:0];
assign ibuf_raddr = out_rsp_tag[QUEUE_ADDRW-1:0];
wire [TAG_ID_WIDTH-1:0] ibuf_din_tag = in_req_tag[TAG_ID_WIDTH-1:0];
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_din_offset = in_addr_offset;
wire [NUM_REQS-1:0] ibuf_din_pmask = current_pmask;
wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] ibuf_din_offset = in_addr_offset;
wire [NUM_REQS-1:0] ibuf_din_pmask = current_pmask;
assign ibuf_din = {ibuf_din_tag, ibuf_din_pmask, ibuf_din_offset};
@ -286,7 +294,7 @@ module VX_mem_coalescer #(
// unmerge responses
reg [QUEUE_SIZE-1:0][OUT_REQS-1:0] rsp_rem_mask;
reg [QUEUE_SIZE-1:0][OUT_REQS-1:0] rsp_rem_mask;
wire [OUT_REQS-1:0] rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~out_rsp_mask;
assign out_rsp_eop = ~(| rsp_rem_mask_n);
@ -299,21 +307,19 @@ module VX_mem_coalescer #(
end
end
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_dout_offset;
reg [NUM_REQS-1:0] ibuf_dout_pmask;
wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] ibuf_dout_offset;
wire [NUM_REQS-1:0] ibuf_dout_pmask;
wire [TAG_ID_WIDTH-1:0] ibuf_dout_tag;
assign {ibuf_dout_tag, ibuf_dout_pmask, ibuf_dout_offset} = ibuf_dout;
logic [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n;
logic [NUM_REQS-1:0] in_rsp_mask_n;
always @(*) begin
for (integer i = 0; i < OUT_REQS; ++i) begin
for (integer j = 0; j < BATCH_SIZE; j++) begin
in_rsp_mask_n[BATCH_SIZE * i + j] = out_rsp_mask[i] && ibuf_dout_pmask[BATCH_SIZE * i + j];
in_rsp_data_n[BATCH_SIZE * i + j] = out_rsp_data[i][ibuf_dout_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH];
end
wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n;
wire [NUM_REQS-1:0] in_rsp_mask_n;
for (genvar i = 0; i < OUT_REQS; ++i) begin
for (genvar j = 0; j < DATA_RATIO; ++j) begin
assign in_rsp_mask_n[i * DATA_RATIO + j] = out_rsp_mask[i] && ibuf_dout_pmask[i * DATA_RATIO + j];
assign in_rsp_data_n[i * DATA_RATIO + j] = out_rsp_data[i][ibuf_dout_offset[i * DATA_RATIO + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH];
end
end
@ -335,11 +341,11 @@ module VX_mem_coalescer #(
assign out_rsp_uuid = '0;
end
reg [NUM_REQS-1:0][BATCH_SIZE_W-1:0] out_req_offset;
reg [NUM_REQS-1:0][DATA_RATIO_W-1:0] out_req_offset;
reg [NUM_REQS-1:0] out_req_pmask;
always @(posedge clk) begin
if (ibuf_push) begin
if (req_sent) begin
out_req_offset <= ibuf_din_offset;
out_req_pmask <= ibuf_din_pmask;
end
@ -351,30 +357,30 @@ module VX_mem_coalescer #(
if (out_req_fire) begin
if (out_req_rw) begin
`TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
`TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
`TRACE(1, (", byteen="));
`TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS);
`TRACE(1, (", data="));
`TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS);
`TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS);
end else begin
`TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
`TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
end
`TRACE(1, (", offset="));
`TRACE(1, (", offset="));
`TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS);
`TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid));
`TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid));
if ($countones(out_req_pmask) > 1) begin
`TRACE(1, ("%t: *** %s: coalescing=%b (#%0d)\n", $time, INSTANCE_ID, out_req_pmask, out_req_uuid));
end
`TRACE(1, ("%t: *** %s: coalesced=%d (#%0d)\n", $time, INSTANCE_ID, $countones(out_req_pmask), out_req_uuid));
end
end
if (out_rsp_fire) begin
`TRACE(1, ("%d: %s-out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask));
`TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS);
`TRACE(1, (", offset="));
`TRACE(1, (", offset="));
`TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS);
`TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid));
end

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -23,7 +23,7 @@ module VX_mem_scheduler #(
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
parameter ATYPE_WIDTH = 1,
parameter TAG_WIDTH = 8,
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
parameter CORE_QUEUE_SIZE= 8,
parameter MEM_QUEUE_SIZE= CORE_QUEUE_SIZE,
parameter RSP_PARTIAL = 0,
@ -54,7 +54,7 @@ module VX_mem_scheduler #(
input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data,
input wire [TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready,
output wire core_req_empty,
output wire core_req_empty,
output wire core_req_sent,
// Core response
@ -81,7 +81,7 @@ module VX_mem_scheduler #(
input wire mem_rsp_valid,
input wire [MEM_CHANNELS-1:0] mem_rsp_mask,
input wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_rsp_data,
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready
);
localparam BATCH_SEL_WIDTH = `UP(MEM_BATCH_BITS);
@ -110,7 +110,7 @@ module VX_mem_scheduler #(
wire reqq_valid;
wire [CORE_REQS-1:0] reqq_mask;
wire reqq_rw;
wire reqq_rw;
wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen;
wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr;
wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype;
@ -118,7 +118,7 @@ module VX_mem_scheduler #(
wire [REQQ_TAG_WIDTH-1:0] reqq_tag;
wire reqq_ready;
wire reqq_valid_s;
wire reqq_valid_s;
wire [MERGED_REQS-1:0] reqq_mask_s;
wire reqq_rw_s;
wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s;
@ -139,9 +139,9 @@ module VX_mem_scheduler #(
wire mem_req_ready_s;
wire mem_rsp_valid_s;
wire [CORE_REQS-1:0] mem_rsp_mask_s;
wire [CORE_REQS-1:0][WORD_WIDTH-1:0] mem_rsp_data_s;
wire [REQQ_TAG_WIDTH-1:0] mem_rsp_tag_s;
wire [CORE_CHANNELS-1:0] mem_rsp_mask_s;
wire [CORE_CHANNELS-1:0][WORD_WIDTH-1:0] mem_rsp_data_s;
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
wire mem_rsp_ready_s;
wire crsp_valid;
@ -159,7 +159,7 @@ module VX_mem_scheduler #(
wire ibuf_ready = (core_req_rw || ~ibuf_full);
wire reqq_valid_in = core_req_valid && ibuf_ready;
wire reqq_ready_in;
wire [REQQ_TAG_WIDTH-1:0] reqq_tag_u;
if (UUID_WIDTH != 0) begin
assign reqq_tag_u = {core_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr};
@ -169,7 +169,7 @@ module VX_mem_scheduler #(
VX_elastic_buffer #(
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + ATYPE_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
.SIZE (CORE_QUEUE_SIZE),
.SIZE (CORE_QUEUE_SIZE),
.OUT_REG (1)
) req_queue (
.clk (clk),
@ -188,7 +188,7 @@ module VX_mem_scheduler #(
// no pending requests
assign core_req_empty = !reqq_valid && ibuf_empty;
// notify request submisison
// notify request submisison
assign core_req_sent = reqq_valid && reqq_ready;
// Index buffer ///////////////////////////////////////////////////////////
@ -219,15 +219,15 @@ module VX_mem_scheduler #(
`UNUSED_VAR (ibuf_empty)
// Handle memory coalescing ///////////////////////////////////////////////
// Handle memory coalescing ///////////////////////////////////////////////
if (COALESCE_ENABLE) begin
`RESET_RELAY (coalescer_reset, reset);
VX_mem_coalescer #(
.INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)),
.NUM_REQS (CORE_REQS),
.NUM_REQS (CORE_REQS),
.DATA_IN_SIZE (WORD_SIZE),
.DATA_OUT_SIZE (LINE_SIZE),
.ADDR_WIDTH (ADDR_WIDTH),
@ -238,7 +238,7 @@ module VX_mem_scheduler #(
) coalescer (
.clk (clk),
.reset (coalescer_reset),
// Input request
.in_req_valid (reqq_valid),
.in_req_mask (reqq_mask),
@ -280,7 +280,7 @@ module VX_mem_scheduler #(
assign reqq_valid_s = reqq_valid;
assign reqq_mask_s = reqq_mask;
assign reqq_rw_s = reqq_rw;
assign reqq_rw_s = reqq_rw;
assign reqq_byteen_s= reqq_byteen;
assign reqq_addr_s = reqq_addr;
assign reqq_atype_s = reqq_atype;
@ -292,18 +292,18 @@ module VX_mem_scheduler #(
assign mem_rsp_mask_s = mem_rsp_mask;
assign mem_rsp_data_s = mem_rsp_data;
assign mem_rsp_tag_s = mem_rsp_tag;
assign mem_rsp_ready = mem_rsp_ready_s;
assign mem_rsp_ready = mem_rsp_ready_s;
end
// Handle memory requests /////////////////////////////////////////////////
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b;
wire [BATCH_SEL_WIDTH-1:0] req_batch_idx;
for (genvar i = 0; i < MEM_BATCHES; ++i) begin
@ -331,14 +331,19 @@ module VX_mem_scheduler #(
assign mem_req_addr_s = mem_req_addr_b[req_batch_idx];
assign mem_req_atype_s = mem_req_atype_b[req_batch_idx];
assign mem_req_data_s = mem_req_data_b[req_batch_idx];
if (MEM_BATCHES != 1) begin
reg [MEM_BATCH_BITS-1:0] req_batch_idx_r;
wire is_degenerate_batch = ~(| mem_req_mask_s);
wire mem_req_valid_b = reqq_valid_s && ~is_degenerate_batch;
wire mem_req_ready_b = mem_req_ready_s || is_degenerate_batch;
always @(posedge clk) begin
if (reset) begin
req_batch_idx_r <= '0;
end else begin
if (reqq_valid_s && mem_req_ready_s) begin
if (reqq_valid_s && mem_req_ready_b) begin
if (req_sent_all) begin
req_batch_idx_r <= '0;
end else begin
@ -352,10 +357,10 @@ module VX_mem_scheduler #(
wire [MEM_BATCHES-1:0][MEM_BATCH_BITS-1:0] req_batch_idxs;
wire [MEM_BATCH_BITS-1:0] req_batch_idx_last;
for (genvar i = 0; i < MEM_BATCHES; ++i) begin
for (genvar i = 0; i < MEM_BATCHES; ++i) begin
assign req_batch_valids[i] = (| mem_req_mask_b[i]);
assign req_batch_idxs[i] = MEM_BATCH_BITS'(i);
end
end
VX_find_first #(
.N (MEM_BATCHES),
@ -368,21 +373,22 @@ module VX_mem_scheduler #(
`UNUSED_PIN (valid_out)
);
assign req_batch_idx = req_batch_idx_r;
assign req_sent_all = mem_req_ready_s && (req_batch_idx_r == req_batch_idx_last);
assign mem_req_valid_s = mem_req_valid_b;
assign req_batch_idx = req_batch_idx_r;
assign req_sent_all = mem_req_ready_b && (req_batch_idx_r == req_batch_idx_last);
assign mem_req_tag_s = {reqq_tag_s, req_batch_idx};
end else begin
assign mem_req_valid_s = reqq_valid_s;
assign req_batch_idx = '0;
assign req_sent_all = mem_req_ready_s;
assign mem_req_tag_s = reqq_tag_s;
end
assign mem_req_valid_s = reqq_valid_s;
assign reqq_ready_s = req_sent_all;
VX_elastic_buffer #(
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + ATYPE_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
@ -415,7 +421,7 @@ module VX_mem_scheduler #(
localparam j = r % CORE_CHANNELS;
assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j];
end
assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask;
wire rsp_complete = ~(| rsp_rem_mask_n);
@ -457,19 +463,19 @@ module VX_mem_scheduler #(
end else begin
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n;
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n;
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];
always @(*) begin
rsp_store_n = rsp_store[ibuf_raddr];
rsp_store_n = rsp_store[ibuf_raddr];
for (integer i = 0; i < CORE_CHANNELS; ++i) begin
if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin
rsp_store_n[(rsp_batch_idx * CORE_CHANNELS + i) * WORD_WIDTH +: WORD_WIDTH] = mem_rsp_data_s[i];
end
end
end
end
always @(posedge clk) begin
if (ibuf_push) begin
rsp_orig_mask[ibuf_waddr] <= core_req_mask;
@ -490,10 +496,11 @@ module VX_mem_scheduler #(
end
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
end
if (UUID_WIDTH != 0) begin
assign crsp_tag = {mem_rsp_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout};
assign crsp_tag = {mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout};
end else begin
assign crsp_tag = ibuf_dout;
end
@ -509,11 +516,11 @@ module VX_mem_scheduler #(
) rsp_buf (
.clk (clk),
.reset (reset),
.valid_in (crsp_valid),
.valid_in (crsp_valid),
.ready_in (crsp_ready),
.data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}),
.data_out ({core_rsp_mask, core_rsp_sop, core_rsp_eop, core_rsp_data, core_rsp_tag}),
.valid_out (core_rsp_valid),
.valid_out (core_rsp_valid),
.ready_out (core_rsp_ready)
);
@ -541,14 +548,14 @@ module VX_mem_scheduler #(
end
end
if (ibuf_push) begin
if (ibuf_push) begin
pending_reqs_time[ibuf_waddr] <= {req_dbg_uuid, ibuf_din, $time};
end
for (integer i = 0; i < CORE_QUEUE_SIZE; ++i) begin
if (pending_reqs_valid[i]) begin
`ASSERT(($time - pending_reqs_time[i][63:0]) < STALL_TIMEOUT,
("%t: *** %s response timeout: tag=0x%0h (#%0d)",
("%t: *** %s response timeout: tag=0x%0h (#%0d)",
$time, INSTANCE_ID, pending_reqs_time[i][64 +: TAG_ID_WIDTH], pending_reqs_time[i][64+TAG_ID_WIDTH +: `UP(UUID_WIDTH)]));
end
end
@ -563,8 +570,8 @@ module VX_mem_scheduler #(
wire [`UP(UUID_WIDTH)-1:0] rsp_dbg_uuid;
if (UUID_WIDTH != 0) begin
assign mem_req_dbg_uuid = mem_req_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH];
assign mem_rsp_dbg_uuid = mem_rsp_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH];
assign mem_req_dbg_uuid = mem_req_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
assign mem_rsp_dbg_uuid = mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
assign rsp_dbg_uuid = core_rsp_tag[TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign mem_req_dbg_uuid = '0;
@ -572,25 +579,27 @@ module VX_mem_scheduler #(
assign rsp_dbg_uuid = '0;
end
wire [CORE_QUEUE_ADDRW-1:0] ibuf_waddr_s = mem_req_tag_s[MEM_BATCH_BITS +: CORE_QUEUE_ADDRW];
wire mem_req_fire_s = mem_req_valid_s && mem_req_ready_s;
always @(posedge clk) begin
if (core_req_fire) begin
if (core_req_rw) begin
`TRACE(1, ("%d: %s-core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask));
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
`TRACE(1, (", byteen="));
`TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS);
`TRACE(1, (", data="));
`TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS);
`TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS);
end else begin
`TRACE(1, ("%d: %s-core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask));
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
end
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid));
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
end
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid));
end
if (core_rsp_valid && core_rsp_ready) begin
`TRACE(1, ("%d: %s-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop));
`TRACE(1, ("%d: %s-core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop));
`TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS);
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid));
end
@ -601,20 +610,20 @@ module VX_mem_scheduler #(
`TRACE(1, (", byteen="));
`TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS);
`TRACE(1, (", data="));
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS);
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS);
end else begin
`TRACE(1, ("%d: %s-mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s));
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS);
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS);
end
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr, req_batch_idx, mem_req_dbg_uuid));
end
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid));
end
if (mem_rsp_fire_s) begin
`TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s));
`TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s));
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS);
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid));
end
end
`endif
endmodule
`TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -19,131 +19,36 @@ module VX_onehot_mux #(
parameter N = 1,
parameter MODEL = 1
) (
input wire [N-1:0][DATAW-1:0] data_in,
input wire [N-1:0] sel_in,
input wire [N-1:0][DATAW-1:0] data_in,
input wire [N-1:0] sel_in,
output wire [DATAW-1:0] data_out
);
);
if (N == 1) begin
`UNUSED_VAR (sel_in)
assign data_out = data_in;
end else if (N == 2) begin
`UNUSED_VAR (sel_in)
assign data_out = sel_in[0] ? data_in[0] : data_in[1];
end else if (N == 3) begin
end else if (MODEL == 1) begin
wire [N-1:0][DATAW-1:0] mask;
for (genvar i = 0; i < N; ++i) begin
assign mask[i] = {DATAW{sel_in[i]}} & data_in[i];
end
for (genvar i = 0; i < DATAW; ++i) begin
wire [N-1:0] gather;
for (genvar j = 0; j < N; ++j) begin
assign gather[j] = mask[j][i];
end
assign data_out[i] = (| gather);
end
end else if (MODEL == 2) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
3'b001: data_out_r = data_in[0];
3'b010: data_out_r = data_in[1];
3'b100: data_out_r = data_in[2];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 4) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
4'b0001: data_out_r = data_in[0];
4'b0010: data_out_r = data_in[1];
4'b0100: data_out_r = data_in[2];
4'b1000: data_out_r = data_in[3];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 5) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
5'b00001: data_out_r = data_in[0];
5'b00010: data_out_r = data_in[1];
5'b00100: data_out_r = data_in[2];
5'b01000: data_out_r = data_in[3];
5'b10000: data_out_r = data_in[4];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 6) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
6'b000001: data_out_r = data_in[0];
6'b000010: data_out_r = data_in[1];
6'b000100: data_out_r = data_in[2];
6'b001000: data_out_r = data_in[3];
6'b010000: data_out_r = data_in[4];
6'b100000: data_out_r = data_in[5];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 7) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
7'b0000001: data_out_r = data_in[0];
7'b0000010: data_out_r = data_in[1];
7'b0000100: data_out_r = data_in[2];
7'b0001000: data_out_r = data_in[3];
7'b0010000: data_out_r = data_in[4];
7'b0100000: data_out_r = data_in[5];
7'b1000000: data_out_r = data_in[6];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 8) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
8'b00000001: data_out_r = data_in[0];
8'b00000010: data_out_r = data_in[1];
8'b00000100: data_out_r = data_in[2];
8'b00001000: data_out_r = data_in[3];
8'b00010000: data_out_r = data_in[4];
8'b00100000: data_out_r = data_in[5];
8'b01000000: data_out_r = data_in[6];
8'b10000000: data_out_r = data_in[7];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else begin
if (MODEL == 1) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
data_out_r = 'x;
for (integer i = 0; i < N; ++i) begin
if (sel_in[i]) begin
data_out_r = data_in[i];
end
data_out_r = 'x;
for (integer i = 0; i < N; ++i) begin
if (sel_in[i]) begin
data_out_r = data_in[i];
end
end
assign data_out = data_out_r;
end else if (MODEL == 2) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
data_out_r = '0;
for (integer i = 0; i < N; ++i) begin
data_out_r |= {DATAW{sel_in[i]}} & data_in[i];
end
end
assign data_out = data_out_r;
end else if (MODEL == 3) begin
wire [N-1:0][DATAW-1:0] mask;
for (genvar i = 0; i < N; ++i) begin
assign mask[i] = {DATAW{sel_in[i]}} & data_in[i];
end
for (genvar i = 0; i < DATAW; ++i) begin
wire [N-1:0] gather;
for (genvar j = 0; j < N; ++j) begin
assign gather[j] = mask[j][i];
end
assign data_out[i] = (| gather);
end
end
assign data_out = data_out_r;
end
endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,8 +15,8 @@
`TRACING_OFF
module VX_pe_serializer #(
parameter NUM_LANES = 1,
parameter NUM_PES = 1,
parameter NUM_LANES = 1,
parameter NUM_PES = 1,
parameter LATENCY = 1,
parameter DATA_IN_WIDTH = 1,
parameter DATA_OUT_WIDTH = 1,
@ -28,12 +28,12 @@ module VX_pe_serializer #(
// input
input wire valid_in,
input wire [NUM_LANES-1:0][DATA_IN_WIDTH-1:0] data_in,
input wire [NUM_LANES-1:0][DATA_IN_WIDTH-1:0] data_in,
input wire [TAG_WIDTH-1:0] tag_in,
output wire ready_in,
// PE
output wire pe_enable,
output wire pe_enable,
output wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in,
input wire [NUM_PES-1:0][DATA_OUT_WIDTH-1:0] pe_data_out,
@ -43,6 +43,7 @@ module VX_pe_serializer #(
output wire [TAG_WIDTH-1:0] tag_out,
input wire ready_out
);
wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s;
wire valid_out_s;
wire [TAG_WIDTH-1:0] tag_out_s;
wire enable;
@ -59,6 +60,17 @@ module VX_pe_serializer #(
.data_out ({valid_out_s, tag_out_s})
);
VX_pipe_register #(
.DATAW (NUM_PES * DATA_IN_WIDTH),
.DEPTH (PE_REG)
) pe_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in (pe_data_in_s),
.data_out (pe_data_in)
);
if (NUM_LANES != NUM_PES) begin
localparam BATCH_SIZE = NUM_LANES / NUM_PES;
@ -67,6 +79,10 @@ module VX_pe_serializer #(
reg [BATCH_SIZEW-1:0] batch_in_idx;
reg [BATCH_SIZEW-1:0] batch_out_idx;
for (genvar i = 0; i < NUM_PES; ++i) begin
assign pe_data_in_s[i] = data_in[batch_in_idx * NUM_PES + i];
end
always @(posedge clk) begin
if (reset) begin
batch_in_idx <= '0;
@ -81,45 +97,29 @@ module VX_pe_serializer #(
end
end
wire batch_in_done = (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-1));
wire batch_in_done = (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-1));
wire batch_out_done = (batch_out_idx == BATCH_SIZEW'(BATCH_SIZE-1));
wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s;
for (genvar i = 0; i < NUM_PES; ++i) begin
assign pe_data_in_s[i] = data_in[batch_in_idx * NUM_PES + i];
end
VX_pipe_register #(
.DATAW (NUM_PES * DATA_IN_WIDTH),
.DEPTH (PE_REG)
) pe_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in (pe_data_in_s),
.data_out (pe_data_in)
);
reg valid_out_r;
reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r;
reg [TAG_WIDTH-1:0] tag_out_r;
wire valid_out_b = valid_out_s && batch_out_done;
wire enable_r = ready_out || ~valid_out;
wire valid_out_b = valid_out_s && batch_out_done;
wire ready_out_b = ready_out || ~valid_out;
always @(posedge clk) begin
if (reset) begin
valid_out_r <= 1'b0;
end else if (enable_r) begin
end else if (ready_out_b) begin
valid_out_r <= valid_out_b;
end
if (enable_r) begin
if (ready_out_b) begin
data_out_r[batch_out_idx] <= pe_data_out;
tag_out_r <= tag_out_s;
end
end
assign enable = (enable_r || ~valid_out_b);
assign enable = ready_out_b || ~valid_out_b;
assign ready_in = enable && batch_in_done;
assign pe_enable = enable;
@ -130,16 +130,17 @@ module VX_pe_serializer #(
end else begin
assign pe_data_in_s = data_in;
assign enable = ready_out || ~valid_out;
assign ready_in = enable;
assign pe_enable = enable;
assign pe_data_in= data_in;
assign valid_out = valid_out_s;
assign valid_out = valid_out_s;
assign data_out = pe_data_out;
assign tag_out = tag_out_s;
end
endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -13,44 +13,53 @@
`include "VX_platform.vh"
`TRACING_OFF
//`TRACING_OFF
module VX_pending_size #(
parameter SIZE = 1,
parameter INCRW = 1,
parameter DECRW = 1,
parameter SIZEW = `CLOG2(SIZE+1)
parameter SIZE = 1,
parameter INCRW = 1,
parameter DECRW = 1,
parameter ALM_FULL = (SIZE - 1),
parameter ALM_EMPTY = 1,
parameter SIZEW = `CLOG2(SIZE+1)
) (
input wire clk,
input wire reset,
input wire [INCRW-1:0] incr,
input wire [DECRW-1:0] decr,
output wire empty,
output wire alm_empty,
output wire full,
output wire alm_full,
output wire [SIZEW-1:0] size
);
`STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter"))
`STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter"))
`STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter: %d vs %d", INCRW, SIZEW))
`STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter: %d vs %d", DECRW, SIZEW))
localparam ADDRW = `LOG2UP(SIZE);
reg empty_r;
reg full_r;
reg empty_r, alm_empty_r;
reg full_r, alm_full_r;
if (INCRW != 1 || DECRW != 1) begin
reg [SIZEW-1:0] size_r;
wire [SIZEW-1:0] size_n;
assign size_n = size_r + SIZEW'(incr) - SIZEW'(decr);
wire [SIZEW-1:0] size_n = size_r + SIZEW'(incr) - SIZEW'(decr);
always @(posedge clk) begin
if (reset) begin
size_r <= '0;
empty_r <= 1;
full_r <= 0;
if (reset) begin
empty_r <= 1;
alm_empty_r <= 1;
alm_full_r <= 0;
full_r <= 0;
size_r <= '0;
end else begin
size_r <= size_n;
empty_r <= (size_n == SIZEW'(0));
full_r <= (size_n == SIZEW'(SIZE));
`ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow"));
`ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow"));
size_r <= size_n;
empty_r <= (size_n == SIZEW'(0));
alm_empty_r <= (size_n == SIZEW'(ALM_EMPTY));
full_r <= (size_n == SIZEW'(SIZE));
alm_full_r <= (size_n == SIZEW'(ALM_FULL));
end
end
@ -59,30 +68,47 @@ module VX_pending_size #(
end else begin
reg [ADDRW-1:0] used_r;
wire [ADDRW-1:0] used_n;
always @(posedge clk) begin
if (reset) begin
used_r <= '0;
empty_r <= 1;
full_r <= 0;
end else begin
`ASSERT(~(incr && ~decr) || ~full, ("runtime error: incrementing full counter"));
`ASSERT(~(decr && ~incr) || ~empty, ("runtime error: decrementing empty counter"));
if (reset) begin
empty_r <= 1;
alm_empty_r <= 1;
full_r <= 0;
alm_full_r <= 0;
used_r <= '0;
end else begin
`ASSERT(~(incr && ~decr) || ~full, ("runtime error: counter overflow"));
`ASSERT(~(decr && ~incr) || ~empty, ("runtime error: counter underflow"));
if (incr) begin
if (~decr) begin
empty_r <= 0;
if (used_r == ADDRW'(ALM_EMPTY))
alm_empty_r <= 0;
if (used_r == ADDRW'(SIZE-1))
full_r <= 1;
if (used_r == ADDRW'(ALM_FULL-1))
alm_full_r <= 1;
end
end else if (decr) begin
full_r <= 0;
if (used_r == ADDRW'(1))
empty_r <= 1;
empty_r <= 1;
if (used_r == ADDRW'(ALM_EMPTY+1))
alm_empty_r <= 1;
full_r <= 0;
if (used_r == ADDRW'(ALM_FULL))
alm_full_r <= 0;
end
used_r <= $signed(used_r) + ADDRW'($signed(2'(incr) - 2'(decr)));
used_r <= used_n;
end
end
if (SIZE == 2) begin
assign used_n = used_r ^ (incr ^ decr);
end else begin
assign used_n = $signed(used_r) + ADDRW'($signed(2'(incr) - 2'(decr)));
end
if (SIZE > 1) begin
if (SIZEW > ADDRW) begin
assign size = {full_r, used_r};
@ -95,8 +121,10 @@ module VX_pending_size #(
end
assign empty = empty_r;
assign full = full_r;
assign empty = empty_r;
assign alm_empty = alm_empty_r;
assign alm_full = alm_full_r;
assign full = full_r;
endmodule
`TRACING_ON
//`TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -23,8 +23,8 @@ module VX_priority_arbiter #(
output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid
);
if (NUM_REQS == 1) begin
if (NUM_REQS == 1) begin
assign grant_index = '0;
assign grant_onehot = requests;
assign grant_valid = requests[0];
@ -41,6 +41,6 @@ module VX_priority_arbiter #(
);
end
endmodule
`TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,24 +16,23 @@
`TRACING_OFF
module VX_rr_arbiter #(
parameter NUM_REQS = 1,
parameter LOCK_ENABLE = 0,
parameter MODEL = 1,
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) (
input wire clk,
input wire reset,
input wire [NUM_REQS-1:0] requests,
input wire reset,
input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot,
output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid,
input wire grant_unlock
input wire grant_ready
);
if (NUM_REQS == 1) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (grant_unlock)
`UNUSED_VAR (grant_ready)
assign grant_index = '0;
assign grant_onehot = requests;
assign grant_valid = requests[0];
@ -41,7 +40,7 @@ module VX_rr_arbiter #(
end else if (NUM_REQS == 2) begin
reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state;
always @(*) begin
@ -52,279 +51,279 @@ module VX_rr_arbiter #(
endcase
end
always @(posedge clk) begin
if (reset) begin
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin
end else if (grant_ready) begin
state <= grant_index_r;
end
end
assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests);
assign grant_valid = (| requests);
end /*else if (NUM_REQS == 3) begin
reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state;
always @(*) begin
casez ({state, requests})
5'b00_001,
5'b01_0?1,
5'b01_0?1,
5'b10_??1: begin grant_onehot_r = 3'b001; grant_index_r = LOG_NUM_REQS'(0); end
5'b00_?1?,
5'b01_010,
5'b00_?1?,
5'b01_010,
5'b10_?10: begin grant_onehot_r = 3'b010; grant_index_r = LOG_NUM_REQS'(1); end
default: begin grant_onehot_r = 3'b100; grant_index_r = LOG_NUM_REQS'(2); end
endcase
end
always @(posedge clk) begin
if (reset) begin
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin
end else if (grant_ready) begin
state <= grant_index_r;
end
end
assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests);
assign grant_valid = (| requests);
end */else if (NUM_REQS == 4) begin
reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state;
always @(*) begin
casez ({state, requests})
6'b00_0001,
6'b01_00?1,
6'b00_0001,
6'b01_00?1,
6'b10_0??1,
6'b11_???1: begin grant_onehot_r = 4'b0001; grant_index_r = LOG_NUM_REQS'(0); end
6'b00_??1?,
6'b01_0010,
6'b10_0?10,
6'b00_??1?,
6'b01_0010,
6'b10_0?10,
6'b11_??10: begin grant_onehot_r = 4'b0010; grant_index_r = LOG_NUM_REQS'(1); end
6'b00_?10?,
6'b01_?1??,
6'b10_0100,
6'b00_?10?,
6'b01_?1??,
6'b10_0100,
6'b11_?100: begin grant_onehot_r = 4'b0100; grant_index_r = LOG_NUM_REQS'(2); end
default: begin grant_onehot_r = 4'b1000; grant_index_r = LOG_NUM_REQS'(3); end
endcase
end
always @(posedge clk) begin
if (reset) begin
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin
end else if (grant_ready) begin
state <= grant_index_r;
end
end
assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests);
assign grant_valid = (| requests);
end /*else if (NUM_REQS == 5) begin
reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state;
always @(*) begin
casez ({state, requests})
8'b000_00001,
8'b001_000?1,
8'b010_00??1,
8'b000_00001,
8'b001_000?1,
8'b010_00??1,
8'b011_0???1,
8'b100_????1: begin grant_onehot_r = 5'b00001; grant_index_r = LOG_NUM_REQS'(0); end
8'b000_???1?,
8'b001_00010,
8'b010_00?10,
8'b011_0??10,
8'b000_???1?,
8'b001_00010,
8'b010_00?10,
8'b011_0??10,
8'b100_???10: begin grant_onehot_r = 5'b00010; grant_index_r = LOG_NUM_REQS'(1); end
8'b000_??10?,
8'b001_??1??,
8'b010_00100,
8'b000_??10?,
8'b001_??1??,
8'b010_00100,
8'b011_0?100,
8'b100_??100: begin grant_onehot_r = 5'b00100; grant_index_r = LOG_NUM_REQS'(2); end
8'b000_?100?,
8'b001_?10??,
8'b000_?100?,
8'b001_?10??,
8'b010_?1???,
8'b011_01000,
8'b011_01000,
8'b100_?1000: begin grant_onehot_r = 5'b01000; grant_index_r = LOG_NUM_REQS'(3); end
default: begin grant_onehot_r = 5'b10000; grant_index_r = LOG_NUM_REQS'(4); end
endcase
end
always @(posedge clk) begin
if (reset) begin
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin
end else if (grant_ready) begin
state <= grant_index_r;
end
end
assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests);
assign grant_valid = (| requests);
end else if (NUM_REQS == 6) begin
reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state;
always @(*) begin
casez ({state, requests})
9'b000_000001,
9'b001_0000?1,
9'b010_000??1,
9'b011_00???1,
9'b100_0????1,
9'b000_000001,
9'b001_0000?1,
9'b010_000??1,
9'b011_00???1,
9'b100_0????1,
9'b101_?????1: begin grant_onehot_r = 6'b000001; grant_index_r = LOG_NUM_REQS'(0); end
9'b000_????1?,
9'b001_000010,
9'b010_000?10,
9'b011_00??10,
9'b100_0???10,
9'b000_????1?,
9'b001_000010,
9'b010_000?10,
9'b011_00??10,
9'b100_0???10,
9'b101_????10: begin grant_onehot_r = 6'b000010; grant_index_r = LOG_NUM_REQS'(1); end
9'b000_???10?,
9'b001_???1??,
9'b010_000100,
9'b000_???10?,
9'b001_???1??,
9'b010_000100,
9'b011_00?100,
9'b100_0??100,
9'b100_0??100,
9'b101_???100: begin grant_onehot_r = 6'b000100; grant_index_r = LOG_NUM_REQS'(2); end
9'b000_??100?,
9'b001_??10??,
9'b000_??100?,
9'b001_??10??,
9'b010_??1???,
9'b011_001000,
9'b100_0?1000,
9'b011_001000,
9'b100_0?1000,
9'b101_??1000: begin grant_onehot_r = 6'b001000; grant_index_r = LOG_NUM_REQS'(3); end
9'b000_?1000?,
9'b001_?100??,
9'b000_?1000?,
9'b001_?100??,
9'b010_?10???,
9'b011_?1????,
9'b100_010000,
9'b011_?1????,
9'b100_010000,
9'b101_?10000: begin grant_onehot_r = 6'b010000; grant_index_r = LOG_NUM_REQS'(4); end
default: begin grant_onehot_r = 6'b100000; grant_index_r = LOG_NUM_REQS'(5); end
endcase
end
always @(posedge clk) begin
if (reset) begin
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin
end else if (grant_ready) begin
state <= grant_index_r;
end
end
assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests);
assign grant_valid = (| requests);
end else if (NUM_REQS == 7) begin
reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state;
always @(*) begin
casez ({state, requests})
10'b000_000001,
10'b001_0000?1,
10'b010_000??1,
10'b011_00???1,
10'b100_00???1,
10'b101_0????1,
10'b000_000001,
10'b001_0000?1,
10'b010_000??1,
10'b011_00???1,
10'b100_00???1,
10'b101_0????1,
10'b110_?????1: begin grant_onehot_r = 7'b0000001; grant_index_r = LOG_NUM_REQS'(0); end
10'b000_?????1?,
10'b001_0000010,
10'b010_0000?10,
10'b011_000??10,
10'b100_00???10,
10'b101_0????10,
10'b000_?????1?,
10'b001_0000010,
10'b010_0000?10,
10'b011_000??10,
10'b100_00???10,
10'b101_0????10,
10'b110_?????10: begin grant_onehot_r = 7'b0000010; grant_index_r = LOG_NUM_REQS'(1); end
10'b000_????10?,
10'b001_????1??,
10'b010_0000100,
10'b000_????10?,
10'b001_????1??,
10'b010_0000100,
10'b011_000?100,
10'b100_00??100,
10'b101_0???100,
10'b100_00??100,
10'b101_0???100,
10'b110_????100: begin grant_onehot_r = 7'b0000100; grant_index_r = LOG_NUM_REQS'(2); end
10'b000_???100?,
10'b001_???10??,
10'b000_???100?,
10'b001_???10??,
10'b010_???1???,
10'b011_0001000,
10'b100_00?1000,
10'b101_0??1000,
10'b011_0001000,
10'b100_00?1000,
10'b101_0??1000,
10'b110_???1000: begin grant_onehot_r = 7'b0001000; grant_index_r = LOG_NUM_REQS'(3); end
10'b000_??1000?,
10'b001_??100??,
10'b000_??1000?,
10'b001_??100??,
10'b010_??10???,
10'b011_??1????,
10'b100_0010000,
10'b101_0?10000,
10'b011_??1????,
10'b100_0010000,
10'b101_0?10000,
10'b110_??10000: begin grant_onehot_r = 7'b0010000; grant_index_r = LOG_NUM_REQS'(4); end
10'b000_?10000?,
10'b001_?1000??,
10'b000_?10000?,
10'b001_?1000??,
10'b010_?100???,
10'b011_?10????,
10'b100_?1?????,
10'b101_0100000,
10'b011_?10????,
10'b100_?1?????,
10'b101_0100000,
10'b110_?100000: begin grant_onehot_r = 7'b0100000; grant_index_r = LOG_NUM_REQS'(5); end
default: begin grant_onehot_r = 7'b1000000; grant_index_r = LOG_NUM_REQS'(6); end
endcase
end
always @(posedge clk) begin
if (reset) begin
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin
end else if (grant_ready) begin
state <= grant_index_r;
end
end
assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests);
assign grant_valid = (| requests);
end */else if (NUM_REQS == 8) begin
reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state;
always @(*) begin
casez ({state, requests})
11'b000_00000001,
11'b001_000000?1,
11'b010_00000??1,
11'b000_00000001,
11'b001_000000?1,
11'b010_00000??1,
11'b011_0000???1,
11'b100_000????1,
11'b101_00?????1,
11'b110_0??????1,
11'b100_000????1,
11'b101_00?????1,
11'b110_0??????1,
11'b111_???????1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end
11'b000_??????1?,
11'b001_00000010,
11'b010_00000?10,
11'b000_??????1?,
11'b001_00000010,
11'b010_00000?10,
11'b011_0000??10,
11'b100_000???10,
11'b101_00????10,
11'b110_0?????10,
11'b100_000???10,
11'b101_00????10,
11'b110_0?????10,
11'b111_??????10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end
11'b000_?????10?,
11'b001_?????1??,
11'b010_00000100,
11'b000_?????10?,
11'b001_?????1??,
11'b010_00000100,
11'b011_0000?100,
11'b100_000??100,
11'b101_00???100,
11'b110_0????100,
11'b100_000??100,
11'b101_00???100,
11'b110_0????100,
11'b111_?????100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end
11'b000_????100?,
11'b001_????10??,
@ -362,20 +361,20 @@ module VX_rr_arbiter #(
endcase
end
always @(posedge clk) begin
if (reset) begin
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin
end else if (grant_ready) begin
state <= grant_index_r;
end
end
assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests);
assign grant_valid = (| requests);
end else if (MODEL == 1) begin
`IGNORE_UNOPTFLAT_BEGIN
wire [NUM_REQS-1:0] mask_higher_pri_regs, unmask_higher_pri_regs;
`IGNORE_UNOPTFLAT_END
@ -385,12 +384,18 @@ module VX_rr_arbiter #(
wire [NUM_REQS-1:0] req_masked = requests & pointer_reg;
assign mask_higher_pri_regs[NUM_REQS-1:1] = mask_higher_pri_regs[NUM_REQS-2:0] | req_masked[NUM_REQS-2:0];
assign mask_higher_pri_regs[0] = 1'b0;
for (genvar i = 1; i < NUM_REQS; ++i) begin
assign mask_higher_pri_regs[i] = mask_higher_pri_regs[i-1] | req_masked[i-1];
end
assign grant_masked[NUM_REQS-1:0] = req_masked[NUM_REQS-1:0] & ~mask_higher_pri_regs[NUM_REQS-1:0];
assign unmask_higher_pri_regs[NUM_REQS-1:1] = unmask_higher_pri_regs[NUM_REQS-2:0] | requests[NUM_REQS-2:0];
assign unmask_higher_pri_regs[0] = 1'b0;
for (genvar i = 1; i < NUM_REQS; ++i) begin
assign unmask_higher_pri_regs[i] = unmask_higher_pri_regs[i-1] | requests[i-1];
end
assign grant_unmasked[NUM_REQS-1:0] = requests[NUM_REQS-1:0] & ~unmask_higher_pri_regs[NUM_REQS-1:0];
wire no_req_masked = ~(|req_masked);
@ -399,7 +404,7 @@ module VX_rr_arbiter #(
always @(posedge clk) begin
if (reset) begin
pointer_reg <= {NUM_REQS{1'b1}};
end else if (!LOCK_ENABLE || grant_unlock) begin
end else if (grant_ready) begin
if (|req_masked) begin
pointer_reg <= mask_higher_pri_regs;
end else if (|requests) begin
@ -410,22 +415,22 @@ module VX_rr_arbiter #(
end
end
assign grant_valid = (| requests);
assign grant_valid = (| requests);
VX_onehot_encoder #(
.N (NUM_REQS)
) onehot_encoder (
.data_in (grant_onehot),
.data_out (grant_index),
.data_out (grant_index),
`UNUSED_PIN (valid_out)
);
end else begin
reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [NUM_REQS-1:0] state;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [NUM_REQS-1:0] state;
always @(*) begin
grant_index_r = 'x;
grant_onehot_r = 'x;
@ -440,18 +445,18 @@ module VX_rr_arbiter #(
end
end
always @(posedge clk) begin
if (reset) begin
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin
end else if (grant_ready) begin
state <= grant_index_r;
end
end
assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests);
assign grant_valid = (| requests);
end
endmodule
`TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,17 +17,18 @@
module VX_sp_ram #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter ADDR_MIN = 0,
parameter WRENW = 1,
parameter OUT_REG = 0,
parameter NO_RWCHECK = 0,
parameter LUTRAM = 0,
parameter LUTRAM = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0,
parameter ADDRW = `LOG2UP(SIZE)
) (
input wire clk,
input wire read,
) (
input wire clk,
input wire read,
input wire write,
input wire [WRENW-1:0] wren,
input wire [ADDRW-1:0] addr,
@ -37,6 +38,7 @@ module VX_sp_ram #(
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (SIZE),
.ADDR_MIN (ADDR_MIN),
.WRENW (WRENW),
.OUT_REG (OUT_REG),
.NO_RWCHECK (NO_RWCHECK),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -20,7 +20,8 @@ module VX_stream_arb #(
parameter DATAW = 1,
parameter `STRING ARBITER = "P",
parameter MAX_FANOUT = `MAX_FANOUT,
parameter OUT_BUF = 0 ,
parameter OUT_BUF = 0,
parameter LUTRAM = 0,
parameter NUM_REQS = `CDIV(NUM_INPUTS, NUM_OUTPUTS),
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
parameter NUM_REQS_W = `UP(LOG_NUM_REQS)
@ -42,7 +43,7 @@ module VX_stream_arb #(
if (NUM_OUTPUTS > 1) begin
// (#inputs > #outputs) and (#outputs > 1)
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
localparam BATCH_BEGIN = i * NUM_REQS;
@ -57,7 +58,8 @@ module VX_stream_arb #(
.DATAW (DATAW),
.ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF)
.OUT_BUF (OUT_BUF),
.LUTRAM (LUTRAM)
) arb_slice (
.clk (clk),
.reset (slice_reset),
@ -81,8 +83,8 @@ module VX_stream_arb #(
wire [NUM_BATCHES-1:0] valid_tmp;
wire [NUM_BATCHES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp;
wire [NUM_BATCHES-1:0] ready_tmp;
wire [NUM_BATCHES-1:0] ready_tmp;
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
localparam BATCH_BEGIN = i * MAX_FANOUT;
@ -97,18 +99,19 @@ module VX_stream_arb #(
if (MAX_FANOUT != 1) begin
VX_stream_arb #(
.NUM_INPUTS (BATCH_SIZE),
.NUM_OUTPUTS (1),
.NUM_OUTPUTS (1),
.DATAW (DATAW),
.ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF)
.OUT_BUF (3), // registered output
.LUTRAM (LUTRAM)
) fanout_slice_arb (
.clk (clk),
.reset (slice_reset),
.valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]),
.data_in (data_in[BATCH_END-1: BATCH_BEGIN]),
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]),
.valid_out (valid_tmp[i]),
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]),
.valid_out (valid_tmp[i]),
.data_out (data_tmp_u),
.sel_out (sel_tmp_u),
.ready_out (ready_tmp[i])
@ -123,11 +126,12 @@ module VX_stream_arb #(
VX_stream_arb #(
.NUM_INPUTS (NUM_BATCHES),
.NUM_OUTPUTS (1),
.NUM_OUTPUTS (1),
.DATAW (DATAW + LOG_NUM_REQS2),
.ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF)
.OUT_BUF (OUT_BUF),
.LUTRAM (LUTRAM)
) fanout_join_arb (
.clk (clk),
.reset (reset),
@ -150,16 +154,15 @@ module VX_stream_arb #(
wire valid_in_r;
wire [DATAW-1:0] data_in_r;
wire ready_in_r;
wire arb_valid;
wire [NUM_REQS_W-1:0] arb_index;
wire [NUM_REQS-1:0] arb_onehot;
wire arb_ready;
VX_generic_arbiter #(
.NUM_REQS (NUM_REQS),
.LOCK_ENABLE (1),
.TYPE (ARBITER)
.NUM_REQS (NUM_REQS),
.TYPE (ARBITER)
) arbiter (
.clk (clk),
.reset (reset),
@ -167,21 +170,30 @@ module VX_stream_arb #(
.grant_valid (arb_valid),
.grant_index (arb_index),
.grant_onehot (arb_onehot),
.grant_unlock (arb_ready)
.grant_ready (arb_ready)
);
assign valid_in_r = arb_valid;
assign data_in_r = data_in[arb_index];
assign arb_ready = ready_in_r;
VX_onehot_mux #(
.DATAW (DATAW),
.N (NUM_REQS)
) onehot_mux (
.data_in (data_in),
.sel_in (arb_onehot),
.data_out (data_in_r)
);
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign ready_in[i] = ready_in_r & arb_onehot[i];
assign ready_in[i] = ready_in_r && arb_onehot[i];
end
VX_elastic_buffer #(
.DATAW (LOG_NUM_REQS + DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (LUTRAM)
) out_buf (
.clk (clk),
.reset (reset),
@ -214,7 +226,8 @@ module VX_stream_arb #(
.DATAW (DATAW),
.ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF)
.OUT_BUF (OUT_BUF),
.LUTRAM (LUTRAM)
) arb_slice (
.clk (clk),
.reset (slice_reset),
@ -248,19 +261,20 @@ module VX_stream_arb #(
.DATAW (DATAW),
.ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF)
.OUT_BUF (3), // registered output
.LUTRAM (LUTRAM)
) fanout_fork_arb (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.data_in (data_in),
.data_in (data_in),
.data_out (data_tmp),
.valid_out (valid_tmp),
.ready_out (ready_tmp),
`UNUSED_PIN (sel_out)
);
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
localparam BATCH_BEGIN = i * MAX_FANOUT;
@ -271,11 +285,12 @@ module VX_stream_arb #(
VX_stream_arb #(
.NUM_INPUTS (1),
.NUM_OUTPUTS (BATCH_SIZE),
.NUM_OUTPUTS (BATCH_SIZE),
.DATAW (DATAW),
.ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF)
.OUT_BUF (OUT_BUF),
.LUTRAM (LUTRAM)
) fanout_slice_arb (
.clk (clk),
.reset (slice_reset),
@ -293,25 +308,24 @@ module VX_stream_arb #(
// (#inputs == 1) and (#outputs <= max_fanout)
wire [NUM_OUTPUTS-1:0] ready_in_r;
wire [NUM_OUTPUTS-1:0] ready_in_r;
wire [NUM_OUTPUTS-1:0] arb_requests;
wire arb_valid;
wire [NUM_OUTPUTS-1:0] arb_onehot;
wire arb_ready;
VX_generic_arbiter #(
.NUM_REQS (NUM_OUTPUTS),
.LOCK_ENABLE (1),
.TYPE (ARBITER)
.NUM_REQS (NUM_OUTPUTS),
.TYPE (ARBITER)
) arbiter (
.clk (clk),
.reset (reset),
.requests (arb_requests),
.grant_valid (arb_valid),
`UNUSED_PIN (grant_index),
`UNUSED_PIN (grant_index),
.grant_onehot (arb_onehot),
.grant_unlock (arb_ready)
.grant_ready (arb_ready)
);
assign arb_requests = ready_in_r;
@ -320,9 +334,10 @@ module VX_stream_arb #(
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
.DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (LUTRAM)
) out_buf (
.clk (clk),
.reset (reset),
@ -337,7 +352,7 @@ module VX_stream_arb #(
end
assign sel_out = 0;
end else begin
// #Inputs == #Outputs
@ -349,7 +364,8 @@ module VX_stream_arb #(
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (LUTRAM)
) out_buf (
.clk (clk),
.reset (out_buf_reset),
@ -363,6 +379,6 @@ module VX_stream_arb #(
assign sel_out[i] = NUM_REQS_W'(i);
end
end
endmodule
`TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,9 +15,9 @@
`TRACING_OFF
module VX_stream_pack #(
parameter NUM_REQS = 1,
parameter DATA_WIDTH = 1,
parameter TAG_WIDTH = 1,
parameter NUM_REQS = 1,
parameter DATA_WIDTH = 1,
parameter TAG_WIDTH = 1,
parameter TAG_SEL_BITS = 0,
parameter `STRING ARBITER = "P",
parameter OUT_BUF = 0
@ -38,47 +38,48 @@ module VX_stream_pack #(
output wire [TAG_WIDTH-1:0] tag_out,
input wire ready_out
);
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS);
if (NUM_REQS > 1) begin
wire [LOG_NUM_REQS-1:0] grant_index;
wire [NUM_REQS-1:0] grant_onehot;
wire grant_valid;
wire grant_ready;
VX_generic_arbiter #(
.NUM_REQS (NUM_REQS),
.LOCK_ENABLE (1),
.TYPE (ARBITER)
.TYPE (ARBITER)
) arbiter (
.clk (clk),
.reset (reset),
.requests (valid_in),
.requests (valid_in),
.grant_valid (grant_valid),
.grant_index (grant_index),
`UNUSED_PIN (grant_onehot),
.grant_unlock(grant_ready)
`UNUSED_PIN (grant_index),
.grant_onehot(grant_onehot),
.grant_ready (grant_ready)
);
reg [NUM_REQS-1:0] valid_sel;
reg [NUM_REQS-1:0] ready_sel;
wire ready_unqual;
wire [TAG_WIDTH-1:0] tag_sel;
wire [TAG_WIDTH-1:0] tag_sel = tag_in[grant_index];
always @(*) begin
valid_sel = '0;
ready_sel = '0;
for (integer i = 0; i < NUM_REQS; ++i) begin
if (tag_in[i][TAG_SEL_BITS-1:0] == tag_sel[TAG_SEL_BITS-1:0]) begin
valid_sel[i] = valid_in[i];
ready_sel[i] = ready_unqual;
end
end
end
VX_onehot_mux #(
.DATAW (TAG_WIDTH),
.N (NUM_REQS)
) onehot_mux (
.data_in (tag_in),
.sel_in (grant_onehot),
.data_out (tag_sel)
);
wire [NUM_REQS-1:0] tag_matches;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign tag_matches[i] = (tag_in[i][TAG_SEL_BITS-1:0] == tag_sel[TAG_SEL_BITS-1:0]);
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign ready_in[i] = grant_ready & tag_matches[i];
end
wire [NUM_REQS-1:0] mask_sel = valid_in & tag_matches;
assign grant_ready = ready_unqual;
VX_elastic_buffer #(
.DATAW (NUM_REQS + TAG_WIDTH + (NUM_REQS * DATA_WIDTH)),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
@ -86,16 +87,14 @@ module VX_stream_pack #(
) out_buf (
.clk (clk),
.reset (reset),
.valid_in (grant_valid),
.data_in ({valid_sel, tag_sel, data_in}),
.ready_in (ready_unqual),
.valid_in (grant_valid),
.data_in ({mask_sel, tag_sel, data_in}),
.ready_in (grant_ready),
.valid_out (valid_out),
.data_out ({mask_out, tag_out, data_out}),
.ready_out (ready_out)
);
);
assign ready_in = ready_sel;
end else begin
`UNUSED_VAR (clk)

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -22,6 +22,7 @@ module VX_stream_xbar #(
parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS),
parameter ARBITER = "P",
parameter OUT_BUF = 0,
parameter LUTRAM = 0,
parameter MAX_FANOUT = `MAX_FANOUT,
parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1)
) (
@ -36,7 +37,7 @@ module VX_stream_xbar #(
output wire [NUM_INPUTS-1:0] ready_in,
output wire [NUM_OUTPUTS-1:0] valid_out,
output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out,
output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out,
output wire [NUM_OUTPUTS-1:0][IN_WIDTH-1:0] sel_out,
input wire [NUM_OUTPUTS-1:0] ready_out
);
@ -66,7 +67,8 @@ module VX_stream_xbar #(
.DATAW (DATAW),
.ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF)
.OUT_BUF (OUT_BUF),
.LUTRAM (LUTRAM)
) xbar_arb (
.clk (clk),
.reset (slice_reset),
@ -94,7 +96,8 @@ module VX_stream_xbar #(
.DATAW (DATAW),
.ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF)
.OUT_BUF (OUT_BUF),
.LUTRAM (LUTRAM)
) xbar_arb (
.clk (clk),
.reset (reset),
@ -124,13 +127,14 @@ module VX_stream_xbar #(
assign ready_in = ready_out_r[sel_in];
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
`RESET_RELAY (out_buf_reset, reset);
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (LUTRAM)
) out_buf (
.clk (clk),
.reset (out_buf_reset),
@ -152,7 +156,8 @@ module VX_stream_xbar #(
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (LUTRAM)
) out_buf (
.clk (clk),
.reset (reset),
@ -172,7 +177,7 @@ module VX_stream_xbar #(
// compute inputs collision
// we have a collision when there exists a valid transfer with multiple input candicates
// we count the unique duplicates each cycle.
reg [NUM_INPUTS-1:0] per_cycle_collision, per_cycle_collision_r;
wire [`CLOG2(NUM_INPUTS+1)-1:0] collision_count;
reg [PERF_CTR_BITS-1:0] collisions_r;
@ -182,14 +187,14 @@ module VX_stream_xbar #(
for (integer i = 0; i < NUM_INPUTS; ++i) begin
for (integer j = 1; j < (NUM_INPUTS-i); ++j) begin
per_cycle_collision[i] |= valid_in[i]
&& valid_in[j+i]
&& valid_in[j+i]
&& (sel_in[i] == sel_in[j+i])
&& (ready_in[i] | ready_in[j+i]);
end
end
end
`BUFFER(per_cycle_collision_r, per_cycle_collision);
`BUFFER(per_cycle_collision_r, per_cycle_collision);
`POP_COUNT(collision_count, per_cycle_collision_r);
always @(posedge clk) begin

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,10 +17,10 @@ module VX_local_mem import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
// Size of cache in bytes
parameter SIZE = (1024*16*8),
parameter SIZE = (1024*16*8),
// Number of Word requests per cycle
parameter NUM_REQS = 4,
parameter NUM_REQS = 4,
// Number of banks
parameter NUM_BANKS = 4,
@ -33,8 +33,11 @@ module VX_local_mem import VX_gpu_pkg::*; #(
parameter UUID_WIDTH = 0,
// Request tag size
parameter TAG_WIDTH = 16
) (
parameter TAG_WIDTH = 16,
// Response buffer
parameter OUT_BUF = 0
) (
input wire clk,
input wire reset,
@ -59,7 +62,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
localparam REQ_DATAW = 1 + BANK_ADDR_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH;
localparam RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
`STATIC_ASSERT(ADDR_WIDTH == (BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS)), ("invalid parameter"))
`STATIC_ASSERT(ADDR_WIDTH == (BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS)), ("invalid parameter"))
// bank selection
@ -70,7 +73,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
end
end else begin
assign req_bank_idx = 0;
end
end
// bank addressing
@ -83,18 +86,18 @@ module VX_local_mem import VX_gpu_pkg::*; #(
// bank requests dispatch
wire [NUM_BANKS-1:0] per_bank_req_valid;
wire [NUM_BANKS-1:0] per_bank_req_rw;
wire [NUM_BANKS-1:0] per_bank_req_rw;
wire [NUM_BANKS-1:0][BANK_ADDR_WIDTH-1:0] per_bank_req_addr;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_req_byteen;
wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_req_data;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_req_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx;
wire [NUM_BANKS-1:0] per_bank_req_ready;
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_all;
wire [NUM_REQS-1:0] req_valid_in;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
wire [NUM_REQS-1:0] req_ready_in;
`ifdef PERF_ENABLE
@ -104,13 +107,13 @@ module VX_local_mem import VX_gpu_pkg::*; #(
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_valid_in[i] = mem_bus_if[i].req_valid;
assign req_data_in[i] = {
mem_bus_if[i].req_data.rw,
mem_bus_if[i].req_data.rw,
req_bank_addr[i],
mem_bus_if[i].req_data.byteen,
mem_bus_if[i].req_data.data,
mem_bus_if[i].req_data.tag};
assign mem_bus_if[i].req_ready = req_ready_in[i];
end
end
VX_stream_xbar #(
.NUM_INPUTS (NUM_REQS),
@ -138,10 +141,10 @@ module VX_local_mem import VX_gpu_pkg::*; #(
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign {
per_bank_req_rw[i],
per_bank_req_rw[i],
per_bank_req_addr[i],
per_bank_req_byteen[i],
per_bank_req_data[i],
per_bank_req_byteen[i],
per_bank_req_data[i],
per_bank_req_tag[i]} = per_bank_req_data_all[i];
end
@ -149,13 +152,13 @@ module VX_local_mem import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0] per_bank_rsp_valid;
wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_rsp_data;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_rsp_idx;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_rsp_idx;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_rsp_ready;
`RESET_RELAY (bank_reset, reset);
for (genvar i = 0; i < NUM_BANKS; ++i) begin
for (genvar i = 0; i < NUM_BANKS; ++i) begin
VX_sp_ram #(
.DATAW (WORD_WIDTH),
.SIZE (WORDS_PER_BANK),
@ -165,7 +168,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
.read (1'b1),
.write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]),
.wren (per_bank_req_byteen[i]),
.addr (per_bank_req_addr[i]),
.addr (per_bank_req_addr[i]),
.wdata (per_bank_req_data[i]),
.rdata (per_bank_rsp_data[i])
);
@ -193,7 +196,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
// bank responses gather
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_all;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign per_bank_rsp_data_all[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
end
@ -206,7 +209,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
.NUM_INPUTS (NUM_BANKS),
.NUM_OUTPUTS (NUM_REQS),
.DATAW (RSP_DATAW),
.OUT_BUF (2)
.OUT_BUF (OUT_BUF)
) rsp_xbar (
.clk (clk),
.reset (reset),
@ -302,38 +305,38 @@ module VX_local_mem import VX_gpu_pkg::*; #(
assign per_bank_rsp_uuid[i] = 0;
end
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
always @(posedge clk) begin
if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin
if (mem_bus_if[i].req_data.rw) begin
`TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
`TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i]));
end else begin
`TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
`TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i]));
end
end
if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin
`TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n",
`TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i]));
end
end
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin
always @(posedge clk) begin
if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin
if (per_bank_req_rw[i]) begin
`TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
`TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i]));
end else begin
`TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
`TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i]));
end
end
if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin
`TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
`TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i]));
end
end

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,20 +17,19 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
parameter NUM_REQS = 1,
parameter DATA_SIZE = 1,
parameter TAG_WIDTH = 1,
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter ADDR_WIDTH = 1,
parameter REQ_OUT_BUF = 0,
parameter RSP_OUT_BUF = 0,
parameter `STRING ARBITER = "R",
parameter `STRING ARBITER = "R",
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
) (
input wire clk,
input wire reset,
input wire [`UP(LOG_NUM_REQS)-1:0] bus_sel,
VX_mem_bus_if.slave bus_in_if,
VX_mem_bus_if.master bus_out_if [NUM_REQS]
);
localparam ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE));
);
localparam DATA_WIDTH = (8 * DATA_SIZE);
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH;
@ -40,7 +39,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
wire [NUM_REQS-1:0] req_valid_out;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_out;
wire [NUM_REQS-1:0] req_ready_out;
VX_stream_switch #(
.NUM_OUTPUTS (NUM_REQS),
.DATAW (REQ_DATAW),
@ -49,7 +48,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
.clk (clk),
.reset (reset),
.sel_in (bus_sel),
.valid_in (bus_in_if.req_valid),
.valid_in (bus_in_if.req_valid),
.data_in (bus_in_if.req_data),
.ready_in (bus_in_if.req_ready),
.valid_out (req_valid_out),
@ -68,7 +67,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
wire [NUM_REQS-1:0] rsp_valid_in;
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_in;
wire [NUM_REQS-1:0] rsp_ready_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign rsp_valid_in[i] = bus_out_if[i].rsp_valid;
assign rsp_data_in[i] = bus_out_if[i].rsp_data;
@ -77,15 +76,15 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
VX_stream_arb #(
.NUM_INPUTS (NUM_REQS),
.DATAW (RSP_DATAW),
.DATAW (RSP_DATAW),
.ARBITER (ARBITER),
.OUT_BUF (RSP_OUT_BUF)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in),
.data_in (rsp_data_in),
.ready_in (rsp_ready_in),
.valid_in (rsp_valid_in),
.data_in (rsp_data_in),
.ready_in (rsp_ready_in),
.valid_out (bus_in_if.rsp_valid),
.data_out (bus_in_if.rsp_data),
.ready_out (bus_in_if.rsp_ready),

View file

@ -12,7 +12,6 @@ SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/opae
RTL_DIR := $(VORTEX_HOME)/hw/rtl
DPI_DIR := $(VORTEX_HOME)/hw/dpi
AFU_DIR := $(RTL_DIR)/afu/opae
THIRD_PARTY_DIR := $(VORTEX_HOME)/third_party
SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts
IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY)
@ -76,19 +75,19 @@ endif
# Debugigng
ifdef DEBUG
ifeq ($(TARGET), fpga)
CFLAGS += -DNDEBUG -DSCOPE $(DBG_SCOPE_FLAGS)
SCOPE_JSON += $(BUILD_DIR)/scope.json
ifneq ($(TARGET), fpga)
CFLAGS += -DNDEBUG
else
CFLAGS += $(DBG_TRACE_FLAGS)
endif
else
else
CFLAGS += -DNDEBUG
endif
# Enable scope analyzer
ifdef SCOPE
CFLAGS += -DSCOPE
CFLAGS += -DSCOPE $(DBG_SCOPE_FLAGS)
SCOPE_JSON += $(BUILD_DIR)/scope.json
endif
# Enable perf counters
@ -128,7 +127,7 @@ ifeq ($(TARGET), asesim)
afu_sim_setup -s $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/synth
else
afu_synth_setup -s $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/synth
endif
endif
build: ip-gen setup $(SCOPE_JSON)
ifeq ($(TARGET), asesim)
@ -145,5 +144,5 @@ scope-json: $(BUILD_DIR)/scope.json
$(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml
$(SCRIPT_DIR)/scope.py $(BUILD_DIR)/vortex.xml -o $(BUILD_DIR)/scope.json
clean:
clean:
rm -rf vortex_afu.h $(BUILD_DIR)

Some files were not shown because too many files have changed in this diff Show more