This commit is contained in:
Hanran Wu 2024-08-23 17:44:24 -04:00
parent e7660b6ffe
commit ea9560b33b
186 changed files with 36003 additions and 4008 deletions

270
.github/workflows/ci.yml vendored Normal file
View file

@ -0,0 +1,270 @@
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: CI
on: [push, pull_request]
jobs:
setup:
runs-on: ubuntu-20.04
steps:
- name: Checkout code
uses: actions/checkout@v2
with:
submodules: recursive
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v2
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
restore-keys: |
${{ runner.os }}-toolchain-
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v2
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
restore-keys: |
${{ runner.os }}-thirdparty-
- name: Install Dependencies
if: steps.cache-toolchain.outputs.cache-hit != 'true' || steps.cache-thirdparty.outputs.cache-hit != 'true'
run: |
sudo bash ./ci/system_updates.sh
- name: Setup Toolchain
if: steps.cache-toolchain.outputs.cache-hit != 'true'
run: |
TOOLDIR=$PWD/tools
mkdir -p build
cd build
../configure --tooldir=$TOOLDIR
ci/toolchain_install.sh --all
- name: Setup Third Party
if: steps.cache-thirdparty.outputs.cache-hit != 'true'
run: |
make -C third_party > /dev/null
# build:
# runs-on: ubuntu-20.04
# needs: setup
# strategy:
# matrix:
# xlen: [32, 64]
# steps:
# - name: Checkout code
# uses: actions/checkout@v2
# - name: Install Dependencies
# run: |
# sudo bash ./ci/system_updates.sh
# - name: Cache Toolchain Directory
# id: cache-toolchain
# uses: actions/cache@v2
# with:
# path: tools
# key: ${{ runner.os }}-toolchain-v0.1
# restore-keys: |
# ${{ runner.os }}-toolchain-
# - name: Cache Third Party Directory
# id: cache-thirdparty
# uses: actions/cache@v2
# with:
# path: third_party
# key: ${{ runner.os }}-thirdparty-v0.1
# restore-keys: |
# ${{ runner.os }}-thirdparty-
# - name: Run Build
# run: |
# TOOLDIR=$PWD/tools
# mkdir -p build${{ matrix.xlen }}
# cd build${{ matrix.xlen }}
# ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }}
# source ci/toolchain_env.sh
# make software -s > /dev/null
# make tests -s > /dev/null
# - name: Upload Build Artifact
# uses: actions/upload-artifact@v2
# with:
# name: build-${{ matrix.xlen }}
# path: build${{ matrix.xlen }}
# tests:
# runs-on: ubuntu-20.04
# needs: build
# strategy:
# matrix:
# name: [regression, opencl, config1, config2, debug, stress]
# xlen: [32, 64]
# steps:
# - name: Checkout code
# uses: actions/checkout@v2
# - name: Install Dependencies
# run: |
# sudo bash ./ci/system_updates.sh
# - name: Cache Toolchain Directory
# id: cache-toolchain
# uses: actions/cache@v2
# with:
# path: tools
# key: ${{ runner.os }}-toolchain-v0.1
# restore-keys: |
# ${{ runner.os }}-toolchain-
# - name: Cache Third Party Directory
# id: cache-thirdparty
# uses: actions/cache@v2
# with:
# path: third_party
# key: ${{ runner.os }}-thirdparty-v0.1
# restore-keys: |
# ${{ runner.os }}-thirdparty-
# - name: Download Build Artifact
# uses: actions/download-artifact@v2
# with:
# name: build-${{ matrix.xlen }}
# path: build${{ matrix.xlen }}
# - name: Run tests
# run: |
# cd build${{ matrix.xlen }}
# source ci/toolchain_env.sh
# chmod -R +x . # Ensure all files have executable permissions
# if [ "${{ matrix.name }}" == "regression" ]; then
# ./ci/regression.sh --unittest
# ./ci/regression.sh --isa
# ./ci/regression.sh --kernel
# ./ci/regression.sh --synthesis
# ./ci/regression.sh --regression
# else
# ./ci/regression.sh --${{ matrix.name }}
# fi
build_vm:
runs-on: ubuntu-20.04
needs: setup
strategy:
matrix:
xlen: [32, 64]
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Install Dependencies
run: |
sudo bash ./ci/system_updates.sh
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v2
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
restore-keys: |
${{ runner.os }}-toolchain-
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v2
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
restore-keys: |
${{ runner.os }}-thirdparty-
- name: Run Build
run: |
TOOLDIR=$PWD/tools
mkdir -p build${{ matrix.xlen }}-vm
cd build${{ matrix.xlen }}-vm
../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} --vm_enable=1
source ci/toolchain_env.sh
make software -s > /dev/null
make tests -s > /dev/null
- name: Upload Build Artifact
uses: actions/upload-artifact@v2
with:
name: build-${{ matrix.xlen }}-vm
path: build${{ matrix.xlen }}-vm
test_vm:
runs-on: ubuntu-20.04
needs: build_vm
strategy:
matrix:
xlen: [32, 64]
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Install Dependencies
run: |
sudo bash ./ci/system_updates.sh
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v2
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
restore-keys: |
${{ runner.os }}-toolchain-
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v2
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
restore-keys: |
${{ runner.os }}-thirdparty-
- name: Download Build Artifact
uses: actions/download-artifact@v2
with:
name: build-${{ matrix.xlen }}-vm
path: build${{ matrix.xlen }}-vm
- name: Run tests
run: |
cd build${{ matrix.xlen }}-vm
source ci/toolchain_env.sh
chmod -R +x . # Ensure all files have executable permissions
./ci/regression.sh --vm
complete:
runs-on: ubuntu-20.04
needs: test_vm
steps:
- name: Check Completion
run: echo "All matrix jobs passed"

1
.gitignore vendored
View file

@ -1,3 +1,4 @@
/build* /build*
/.vscode /.vscode
*.cache
*.code-workspace *.code-workspace

3
.gitmodules vendored
View file

@ -6,5 +6,4 @@
url = https://github.com/ucb-bar/berkeley-softfloat-3.git url = https://github.com/ucb-bar/berkeley-softfloat-3.git
[submodule "third_party/ramulator"] [submodule "third_party/ramulator"]
path = third_party/ramulator path = third_party/ramulator
url = https://github.com/CMU-SAFARI/ramulator.git url = https://github.com/CMU-SAFARI/ramulator2.git
ignore = dirty

View file

@ -1,118 +0,0 @@
language: cpp
dist: focal
os: linux
compiler: gcc
addons:
apt:
packages:
- build-essential
- valgrind
- libstdc++6
- binutils
- python
- uuid-dev
env:
global:
- TOOLDIR=$HOME/tools
cache:
directories:
- $TOOLDIR
- $HOME/third_party
- $HOME/build32
- $HOME/build64
before_install:
- if [ ! -d "$TOOLDIR" ] || [ -z "$(ls -A $TOOLDIR)" ] || [ "$(cat "$TOOLDIR/version.txt")" != "v0.4" ]; then
rm -rf $TOOLDIR;
mkdir -p $TRAVIS_BUILD_DIR/build && cd $TRAVIS_BUILD_DIR/build;
../configure --tooldir=$TOOLDIR;
ci/toolchain_install.sh --all;
echo "v0.3" > "$TOOLDIR/version.txt";
else
echo "using existing tooldir build";
fi
- if [ ! -d "$HOME/third_party" ] || [ -z "$(ls -A $HOME/third_party)" ] || [ "$(cat "$HOME/third_party/version.txt")" != "v0.2" ]; then
cd $TRAVIS_BUILD_DIR;
make -C third_party > /dev/null;
echo "v0.2" > "third_party/version.txt";
cp -rf third_party $HOME;
else
echo "using existing third_party build";
cp -rf $HOME/third_party $TRAVIS_BUILD_DIR;
fi
install:
- if [ ! -d "$HOME/build$XLEN" ] || [ -z "$(ls -A $HOME/build$XLEN)" ] || [ "$(cat "$HOME/build$XLEN/version.txt")" != "$TRAVIS_COMMIT" ]; then
mkdir -p $TRAVIS_BUILD_DIR/build$XLEN && cd $TRAVIS_BUILD_DIR/build$XLEN;
../configure --tooldir=$TOOLDIR --xlen=$XLEN;
source ci/toolchain_env.sh;
make build -s > /dev/null;
echo "$TRAVIS_COMMIT" > version.txt;
cp -rf $TRAVIS_BUILD_DIR/build$XLEN $HOME;
else
echo "using existing build for commit $TRAVIS_COMMIT";
cp -rf $HOME/build$XLEN $TRAVIS_BUILD_DIR;
fi
before_script:
- cd $TRAVIS_BUILD_DIR/build$XLEN
- source ci/toolchain_env.sh
stages:
- test
jobs:
include:
- stage: test
name: regression32
env: XLEN=32
script:
- ./ci/travis_run.py ./ci/regression.sh --unittest
- ./ci/travis_run.py ./ci/regression.sh --isa
- ./ci/travis_run.py ./ci/regression.sh --kernel
- ./ci/travis_run.py ./ci/regression.sh --synthesis
- ./ci/travis_run.py ./ci/regression.sh --regression
- ./ci/travis_run.py ./ci/regression.sh --opencl
- stage: test
name: regression64
env: XLEN=64
script:
- ./ci/travis_run.py ./ci/regression.sh --isa
- ./ci/travis_run.py ./ci/regression.sh --kernel
- ./ci/travis_run.py ./ci/regression.sh --synthesis
- ./ci/travis_run.py ./ci/regression.sh --regression
- ./ci/travis_run.py ./ci/regression.sh --opencl
- stage: test
name: config
env: XLEN=32
script:
- ./ci/travis_run.py ./ci/regression.sh --cluster
- ./ci/travis_run.py ./ci/regression.sh --config
- stage: test
name: debug
env: XLEN=32
script:
- ./ci/travis_run.py ./ci/regression.sh --debug
- ./ci/travis_run.py ./ci/regression.sh --stress
- stage: test
name: virtual_memory
env: XLEN=32
env: VM_DISABLE=1
script:
- ./ci/travis_run.py ./ci/regression.sh --regression
- ./ci/travis_run.py ./ci/regression.sh --opencl
- stage: test
name: virtual_memory
env: XLEN=64
env: VM_DISABLE=1
script:
- ./ci/travis_run.py ./ci/regression.sh --regression
- ./ci/travis_run.py ./ci/regression.sh --opencl

View file

@ -1,5 +1,15 @@
include config.mk include config.mk
.PHONY: build software tests
vm:
$(MAKE) -C $(VORTEX_HOME)/third_party
$(MAKE) -C hw
$(MAKE) -C sim simx
$(MAKE) -C kernel
$(MAKE) -C runtime vm
$(MAKE) -C tests
all: all:
$(MAKE) -C $(VORTEX_HOME)/third_party $(MAKE) -C $(VORTEX_HOME)/third_party
$(MAKE) -C hw $(MAKE) -C hw
@ -15,13 +25,24 @@ build:
$(MAKE) -C runtime $(MAKE) -C runtime
$(MAKE) -C tests $(MAKE) -C tests
clean: software:
$(MAKE) -C hw
$(MAKE) -C kernel
$(MAKE) -C runtime/stub
tests:
$(MAKE) -C tests
clean-build:
$(MAKE) -C hw clean $(MAKE) -C hw clean
$(MAKE) -C sim clean $(MAKE) -C sim clean
$(MAKE) -C kernel clean $(MAKE) -C kernel clean
$(MAKE) -C runtime clean $(MAKE) -C runtime clean
$(MAKE) -C tests clean $(MAKE) -C tests clean
clean: clean-build
$(MAKE) -C $(VORTEX_HOME)/third_party clean
# Install setup # Install setup
KERNEL_INC_DST = $(PREFIX)/kernel/include KERNEL_INC_DST = $(PREFIX)/kernel/include
KERNEL_LIB_DST = $(PREFIX)/kernel/lib$(XLEN) KERNEL_LIB_DST = $(PREFIX)/kernel/lib$(XLEN)

View file

@ -56,7 +56,7 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
``` ```
### Install Vortex codebase ### Install Vortex codebase
``` ```
git clone --depth=1 --recursive git@github.com:vortexgpgpu/vortex.git -b vortex_vm git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git -b vortex_vm
cd vortex cd vortex
``` ```
@ -68,18 +68,18 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
mkdir out mkdir out
export OUT_DIR=`pwd`/out export OUT_DIR=`pwd`/out
cd build cd build
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-6-14 --prefix=$OUT_DIR # Run the following to disble virtual memory feature in compilation
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR
# Run the following instead to enable virtual memory feature in compilation
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR --vm_enable=1
### Install prebuilt toolchain ### Install prebuilt toolchain
# We will use the precomipled tools in volvo toolchanin directory # We will use the precomipled tools in volvo toolchanin directory
### set environment variables ### set environment variables
# should always run before using the toolchain! # should always run before using the toolchain!
source ./ci/toolchain_env.sh source ./ci/toolchain_env.sh
### Building Vortex ### Building Vortex
make -s make -s
### Quick demo running vecadd OpenCL kernel on 2 cores ### Quick demo running vecadd OpenCL kernel on 2 cores
$ ./ci/blackbox.sh --cores=2 --app=vecadd $ ./ci/blackbox.sh --cores=2 --app=vecadd

View file

@ -25,37 +25,6 @@ XLEN=${XLEN:=@XLEN@}
echo "Vortex Regression Test: XLEN=$XLEN" echo "Vortex Regression Test: XLEN=$XLEN"
split_file() {
if [[ $# -ne 2 ]]; then
echo "Usage: $0 <filename> <start_with>"
return 1
fi
input_file="$1"
start_with="$2"
if [[ ! -r "$input_file" ]]; then
echo "Error: File '$input_file' is not readable or does not exist."
return 1
fi
count=0
output_file=""
while IFS= read -r line; do
if [[ $line == $start_with* ]]; then
count=$((count + 1))
output_file="$input_file.part$count"
> "$output_file" # ensure empty
fi
if [[ -n "$output_file" ]]; then
echo "$line" >> "$output_file"
fi
done < "$input_file"
if [[ $count -eq 0 ]]; then
echo "No lines starting with '$start_with' were found in '$input_file'."
fi
}
###############################################################################
unittest() unittest()
{ {
make -C tests/unittest run make -C tests/unittest run
@ -66,6 +35,9 @@ isa()
{ {
echo "begin isa tests..." echo "begin isa tests..."
make -C sim/simx
make -C sim/rtlsim
make -C tests/riscv/isa run-simx make -C tests/riscv/isa run-simx
make -C tests/riscv/isa run-rtlsim make -C tests/riscv/isa run-rtlsim
@ -96,8 +68,8 @@ isa()
make -C tests/riscv/isa run-rtlsim-64fx make -C tests/riscv/isa run-rtlsim-64fx
fi fi
# restore default prebuilt configuration # clean build
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null make -C sim/rtlsim clean
echo "isa tests done!" echo "isa tests done!"
} }
@ -106,6 +78,9 @@ kernel()
{ {
echo "begin kernel tests..." echo "begin kernel tests..."
make -C sim/simx
make -C sim/rtlsim
make -C tests/kernel run-simx make -C tests/kernel run-simx
make -C tests/kernel run-rtlsim make -C tests/kernel run-rtlsim
@ -116,6 +91,9 @@ regression()
{ {
echo "begin regression tests..." echo "begin regression tests..."
make -C runtime/simx
make -C runtime/rtlsim
make -C tests/regression run-simx make -C tests/regression run-simx
make -C tests/regression run-rtlsim make -C tests/regression run-rtlsim
@ -134,6 +112,9 @@ opencl()
{ {
echo "begin opencl tests..." echo "begin opencl tests..."
make -C runtime/simx
make -C runtime/rtlsim
make -C tests/opencl run-simx make -C tests/opencl run-simx
make -C tests/opencl run-rtlsim make -C tests/opencl run-rtlsim
@ -143,24 +124,28 @@ opencl()
echo "opencl tests done!" echo "opencl tests done!"
} }
cluster() vm(){
{ echo "begin vm tests..."
echo "begin clustering tests..."
# cores clustering make -C sim/simx
./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1" make -C runtime/simx
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1"
# L2/L3 make -C tests/kernel run-simx
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1" # Regression tests
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1" make -C tests/regression run-simx
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1"
echo "clustering tests done!" # test global barrier
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
# test local barrier
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
# OpenCL tests
make -C tests/opencl run-simx
./ci/blackbox.sh --driver=simx --app=lbm --warps=8
echo "vm tests done!"
} }
test_csv_trace() test_csv_trace()
@ -170,29 +155,20 @@ test_csv_trace()
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-simx-32im > run_simx.log make -C tests/riscv/isa run-simx-32im > run_simx.log
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
split_file run_simx.log "Running " ./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
split_file run_rtlsim.log "Running " ./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
for file in ./run_simx.log.part*; do diff trace_rtlsim.csv trace_simx.csv
if [[ -f "$file" ]]; then # clean build
file2="${file//simx/rtlsim}" make -C sim/simx clean
if [[ -f "$file2" ]]; then make -C sim/rtlsim clean
./ci/trace_csv.py -tsimx $file -otrace_simx.csv
./ci/trace_csv.py -trtlsim $file2 -otrace_rtlsim.csv
diff trace_rtlsim.csv trace_simx.csv
else
echo "File $file2 not found."
fi
fi
done
# restore default prebuilt configuration
make -C sim/simx clean && make -C sim/simx > /dev/null
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
} }
debug() debug()
{ {
echo "begin debugging tests..." echo "begin debugging tests..."
test_csv_trace test_csv_trace
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1" ./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
@ -200,21 +176,23 @@ debug()
echo "debugging tests done!" echo "debugging tests done!"
} }
config() config1()
{ {
echo "begin configuration tests..." echo "begin configuration-1 tests..."
# warp/threads configurations # warp/threads
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=1 --threads=1 --app=diverge ./ci/blackbox.sh --driver=rtlsim --warps=1 --threads=1 --app=diverge
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=2 --app=diverge ./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=2 --app=diverge
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=diverge ./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=8 --app=diverge
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=diverge ./ci/blackbox.sh --driver=rtlsim --warps=8 --threads=2 --app=diverge
./ci/blackbox.sh --driver=simx --cores=1 --warps=1 --threads=1 --app=diverge ./ci/blackbox.sh --driver=simx --warps=1 --threads=1 --app=diverge
./ci/blackbox.sh --driver=simx --cores=1 --warps=8 --threads=16 --app=diverge ./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge
# disable DPI # cores clustering
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood ./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1"
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1"
# issue width # issue width
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
@ -240,6 +218,31 @@ config()
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
# L2/L3
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1"
# multiple L1 caches per socket
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=8 --warps=1 --threads=2
echo "configuration-1 tests done!"
}
config2()
{
echo "begin configuration-2 tests..."
# test opaesim
./ci/blackbox.sh --driver=opae --app=printf
./ci/blackbox.sh --driver=opae --app=diverge
# disable DPI
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
# custom program startup address # custom program startup address
make -C tests/regression/dogfood clean-kernel make -C tests/regression/dogfood clean-kernel
if [ "$XLEN" == "64" ]; then if [ "$XLEN" == "64" ]; then
@ -249,55 +252,57 @@ config()
fi fi
./ci/blackbox.sh --driver=simx --app=dogfood ./ci/blackbox.sh --driver=simx --app=dogfood
./ci/blackbox.sh --driver=rtlsim --app=dogfood ./ci/blackbox.sh --driver=rtlsim --app=dogfood
make -C tests/regression/dogfood clean-kernel
# disabling M & F extensions # disabling M & F extensions
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-32i make -C tests/riscv/isa run-rtlsim-32i
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null make -C sim/rtlsim clean
# disabling ZICOND extension # disabling ZICOND extension
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
# disable local memory # disable local memory
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --perf=1 CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=demo --perf=1 CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
# disable L1 cache
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx
# multiple L1 caches per socket
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=8 --warps=1 --threads=2
# test AXI bus # test AXI bus
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=demo
# disable L1 cache
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
# reduce l1 line size # reduce l1 line size
CONFIGS="-DL1_LINE_SIZE=4" ./ci/blackbox.sh --driver=rtlsim --app=io_addr CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
CONFIGS="-DL1_LINE_SIZE=4" ./ci/blackbox.sh --driver=simx --app=io_addr CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=simx --app=io_addr
CONFIGS="-DL1_LINE_SIZE=4 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_LINE_SIZE=4 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache ways
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache banking # test cache banking
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemmx CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test 128-bit MEM block # test 128-bit MEM block
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=demo
# test single-bank DRAM # test single-bank DRAM
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=demo
# test 27-bit DRAM address # test 27-bit DRAM address
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=demo
echo "configuration tests done!" echo "configuration-2 tests done!"
} }
stress() stress()
@ -306,9 +311,7 @@ stress()
# test verilator reset values # test verilator reset values
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --app=printf
./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n128" --l2cache
echo "stress tests done!" echo "stress tests done!"
} }
@ -318,7 +321,7 @@ synthesis()
echo "begin synthesis tests..." echo "begin synthesis tests..."
PREFIX=build_base make -C hw/syn/yosys clean PREFIX=build_base make -C hw/syn/yosys clean
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys elaborate PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys synthesis
echo "synthesis tests done!" echo "synthesis tests done!"
} }
@ -326,7 +329,7 @@ synthesis()
show_usage() show_usage()
{ {
echo "Vortex Regression Test" echo "Vortex Regression Test"
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cluster] [--debug] [--config] [--stress] [--synthesis] [--all] [--h|--help]" echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]"
} }
start=$SECONDS start=$SECONDS
@ -336,6 +339,9 @@ clean=0
while [ "$1" != "" ]; do while [ "$1" != "" ]; do
case $1 in case $1 in
--vm )
tests+=("vm")
;;
--clean ) --clean )
clean=1 clean=1
;; ;;
@ -354,15 +360,15 @@ while [ "$1" != "" ]; do
--opencl ) --opencl )
tests+=("opencl") tests+=("opencl")
;; ;;
--cluster ) --config1 )
tests+=("cluster") tests+=("config1")
;;
--config2 )
tests+=("config2")
;; ;;
--debug ) --debug )
tests+=("debug") tests+=("debug")
;; ;;
--config )
tests+=("config")
;;
--stress ) --stress )
tests+=("stress") tests+=("stress")
;; ;;
@ -376,9 +382,9 @@ while [ "$1" != "" ]; do
tests+=("kernel") tests+=("kernel")
tests+=("regression") tests+=("regression")
tests+=("opencl") tests+=("opencl")
tests+=("cluster") tests+=("config1")
tests+=("config2")
tests+=("debug") tests+=("debug")
tests+=("config")
tests+=("stress") tests+=("stress")
tests+=("synthesis") tests+=("synthesis")
;; ;;

27
ci/system_updates.sh Executable file
View file

@ -0,0 +1,27 @@
#!/bin/sh
# Copyright 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
apt-get update -y
add-apt-repository -y ppa:ubuntu-toolchain-r/test
apt-get update
apt-get install -y g++-11 gcc-11
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100
apt-get install -y build-essential valgrind libstdc++6 binutils python uuid-dev ccache

View file

@ -16,8 +16,8 @@
TOOLDIR=${TOOLDIR:=@TOOLDIR@} TOOLDIR=${TOOLDIR:=@TOOLDIR@}
export VERILATOR_ROOT=$TOOLDIR/verilator # export VERILATOR_ROOT=$TOOLDIR/verilator
export PATH=$VERILATOR_ROOT/bin:$PATH # export PATH=$VERILATOR_ROOT/bin:$PATH
export SV2V_PATH=$TOOLDIR/sv2v export SV2V_PATH=$TOOLDIR/sv2v
export PATH=$SV2V_PATH/bin:$PATH export PATH=$SV2V_PATH/bin:$PATH

View file

@ -26,7 +26,7 @@ def parse_args():
parser.add_argument('log', help='Input log file') parser.add_argument('log', help='Input log file')
return parser.parse_args() return parser.parse_args()
def parse_simx(log_filename): def parse_simx(log_lines):
pc_pattern = r"PC=(0x[0-9a-fA-F]+)" pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
instr_pattern = r"Instr (0x[0-9a-fA-F]+):" instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
opcode_pattern = r"Instr 0x[0-9a-fA-F]+: ([0-9a-zA-Z_\.]+)" opcode_pattern = r"Instr 0x[0-9a-fA-F]+: ([0-9a-zA-Z_\.]+)"
@ -37,32 +37,31 @@ def parse_simx(log_filename):
destination_pattern = r"Dest Reg: (.+)" destination_pattern = r"Dest Reg: (.+)"
uuid_pattern = r"#(\d+)" uuid_pattern = r"#(\d+)"
entries = [] entries = []
with open(log_filename, 'r') as log_file: instr_data = None
instr_data = None for lineno, line in enumerate(log_lines, start=1):
for lineno, line in enumerate(log_file, start=1): try:
try: if line.startswith("DEBUG Fetch:"):
if line.startswith("DEBUG Fetch:"): if instr_data:
if instr_data: entries.append(instr_data)
entries.append(instr_data) instr_data = {}
instr_data = {} instr_data["lineno"] = lineno
instr_data["lineno"] = lineno instr_data["PC"] = re.search(pc_pattern, line).group(1)
instr_data["PC"] = re.search(pc_pattern, line).group(1) instr_data["core_id"] = re.search(core_id_pattern, line).group(1)
instr_data["core_id"] = re.search(core_id_pattern, line).group(1) instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1)
instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1) instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
instr_data["tmask"] = re.search(tmask_pattern, line).group(1) instr_data["uuid"] = re.search(uuid_pattern, line).group(1)
instr_data["uuid"] = re.search(uuid_pattern, line).group(1) elif line.startswith("DEBUG Instr"):
elif line.startswith("DEBUG Instr"): instr_data["instr"] = re.search(instr_pattern, line).group(1)
instr_data["instr"] = re.search(instr_pattern, line).group(1) instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
instr_data["opcode"] = re.search(opcode_pattern, line).group(1) elif line.startswith("DEBUG Src"):
elif line.startswith("DEBUG Src"): src_reg = re.search(operands_pattern, line).group(1)
src_reg = re.search(operands_pattern, line).group(1) instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg
instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg elif line.startswith("DEBUG Dest"):
elif line.startswith("DEBUG Dest"): instr_data["destination"] = re.search(destination_pattern, line).group(1)
instr_data["destination"] = re.search(destination_pattern, line).group(1) except Exception as e:
except Exception as e: print("Error at line {}: {}".format(lineno, e))
print("Error at line {}: {}".format(lineno, e)) if instr_data:
if instr_data: entries.append(instr_data)
entries.append(instr_data)
return entries return entries
def reverse_binary(bin_str): def reverse_binary(bin_str):
@ -95,8 +94,9 @@ def append_value(text, reg, value, tmask_arr, sep):
text += "}" text += "}"
return text, sep return text, sep
def parse_rtlsim(log_filename): def parse_rtlsim(log_lines):
line_pattern = r"\d+: core(\d+)-(decode|issue|commit)" config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=(\d+), num_barriers=(\d+)"
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
pc_pattern = r"PC=(0x[0-9a-fA-F]+)" pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
instr_pattern = r"instr=(0x[0-9a-fA-F]+)" instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
ex_pattern = r"ex=([a-zA-Z]+)" ex_pattern = r"ex=([a-zA-Z]+)"
@ -116,124 +116,166 @@ def parse_rtlsim(log_filename):
eop_pattern = r"eop=(\d)" eop_pattern = r"eop=(\d)"
uuid_pattern = r"#(\d+)" uuid_pattern = r"#(\d+)"
entries = [] entries = []
with open(log_filename, 'r') as log_file: instr_data = {}
instr_data = {} num_threads = 0
for lineno, line in enumerate(log_file, start=1): num_warps = 0
try: num_cores = 0
line_match = re.search(line_pattern, line) num_clusters = 0
if line_match: socket_size = 0
PC = re.search(pc_pattern, line).group(1) local_mem_base = 0
warp_id = re.search(warp_id_pattern, line).group(1) num_barriers = 0
tmask = re.search(tmask_pattern, line).group(1) num_sockets = 0
uuid = re.search(uuid_pattern, line).group(1) for lineno, line in enumerate(log_lines, start=1):
core_id = line_match.group(1) try:
stage = line_match.group(2) config_match = re.search(config_pattern, line)
if stage == "decode": if config_match:
trace = {} num_threads = int(config_match.group(1))
trace["uuid"] = uuid num_warps = int(config_match.group(2))
trace["PC"] = PC num_cores = int(config_match.group(3))
trace["core_id"] = core_id num_clusters = int(config_match.group(4))
trace["warp_id"] = warp_id socket_size = int(config_match.group(5))
trace["tmask"] = reverse_binary(tmask) local_mem_base = int(config_match.group(6))
trace["instr"] = re.search(instr_pattern, line).group(1) num_barriers = int(config_match.group(7))
trace["opcode"] = re.search(op_pattern, line).group(1) num_sockets = (num_cores + socket_size - 1) // socket_size
trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1)) continue
trace["rd"] = re.search(rd_pattern, line).group(1) line_match = re.search(line_pattern, line)
trace["rs1"] = re.search(rs1_pattern, line).group(1) if line_match:
trace["rs2"] = re.search(rs2_pattern, line).group(1) PC = re.search(pc_pattern, line).group(1)
trace["rs3"] = re.search(rs3_pattern, line).group(1) warp_id = re.search(warp_id_pattern, line).group(1)
tmask = re.search(tmask_pattern, line).group(1)
uuid = re.search(uuid_pattern, line).group(1)
cluster_id = line_match.group(1)
socket_id = line_match.group(2)
core_id = line_match.group(3)
stage = line_match.group(4)
if stage == "decode":
trace = {}
trace["uuid"] = uuid
trace["PC"] = PC
trace["core_id"] = ((((cluster_id * num_sockets) + socket_id) * socket_size) + core_id)
trace["warp_id"] = warp_id
trace["tmask"] = reverse_binary(tmask)
trace["instr"] = re.search(instr_pattern, line).group(1)
trace["opcode"] = re.search(op_pattern, line).group(1)
trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1))
trace["rd"] = re.search(rd_pattern, line).group(1)
trace["rs1"] = re.search(rs1_pattern, line).group(1)
trace["rs2"] = re.search(rs2_pattern, line).group(1)
trace["rs3"] = re.search(rs3_pattern, line).group(1)
instr_data[uuid] = trace
elif stage == "issue":
if uuid in instr_data:
trace = instr_data[uuid]
trace["lineno"] = lineno
opds = trace["opds"]
if opds[1]:
trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1]
if opds[2]:
trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1]
if opds[3]:
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
trace["issued"] = True
instr_data[uuid] = trace instr_data[uuid] = trace
elif stage == "issue": elif stage == "commit":
if uuid in instr_data: if uuid in instr_data:
trace = instr_data[uuid] trace = instr_data[uuid]
trace["lineno"] = lineno if "issued" in trace:
opds = trace["opds"] opds = trace["opds"]
if opds[1]: dst_tmask_arr = bin_to_array(tmask)[::-1]
trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1] wb = re.search(wb_pattern, line).group(1) == "1"
if opds[2]: if wb:
trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1] rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1]
if opds[3]: if 'rd_data' in trace:
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1] merged_rd_data = trace['rd_data']
trace["issued"] = True for i in range(len(dst_tmask_arr)):
if dst_tmask_arr[i] == 1:
merged_rd_data[i] = rd_data[i]
trace['rd_data'] = merged_rd_data
else:
trace['rd_data'] = rd_data
instr_data[uuid] = trace instr_data[uuid] = trace
elif stage == "commit": eop = re.search(eop_pattern, line).group(1) == "1"
if uuid in instr_data: if eop:
trace = instr_data[uuid] tmask_arr = bin_to_array(trace["tmask"])
if "issued" in trace: destination = ''
opds = trace["opds"]
dst_tmask_arr = bin_to_array(tmask)[::-1]
wb = re.search(wb_pattern, line).group(1) == "1"
if wb: if wb:
rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1] destination, sep = append_value(destination, trace["rd"], trace['rd_data'], tmask_arr, False)
if 'rd_data' in trace: del trace['rd_data']
merged_rd_data = trace['rd_data'] trace["destination"] = destination
for i in range(len(dst_tmask_arr)): operands = ''
if dst_tmask_arr[i] == 1: sep = False
merged_rd_data[i] = rd_data[i] if opds[1]:
trace['rd_data'] = merged_rd_data operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep)
else: del trace["rs1_data"]
trace['rd_data'] = rd_data if opds[2]:
instr_data[uuid] = trace operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep)
eop = re.search(eop_pattern, line).group(1) == "1" del trace["rs2_data"]
if eop: if opds[3]:
tmask_arr = bin_to_array(trace["tmask"]) operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
destination = '' del trace["rs3_data"]
if wb: trace["operands"] = operands
destination, sep = append_value(destination, trace["rd"], trace['rd_data'], tmask_arr, False) del trace["opds"]
del trace['rd_data'] del trace["rd"]
trace["destination"] = destination del trace["rs1"]
operands = '' del trace["rs2"]
sep = False del trace["rs3"]
if opds[1]: del trace["issued"]
operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep) del instr_data[uuid]
del trace["rs1_data"] entries.append(trace)
if opds[2]: except Exception as e:
operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep) print("Error at line {}: {}".format(lineno, e))
del trace["rs2_data"]
if opds[3]:
operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
del trace["rs3_data"]
trace["operands"] = operands
del trace["opds"]
del trace["rd"]
del trace["rs1"]
del trace["rs2"]
del trace["rs3"]
del trace["issued"]
del instr_data[uuid]
entries.append(trace)
except Exception as e:
print("Error at line {}: {}".format(lineno, e))
return entries return entries
def write_csv(log_filename, csv_filename, log_type): def write_csv(sublogs, csv_filename, log_type):
entries = None
# parse log file
if log_type == "rtlsim":
entries = parse_rtlsim(log_filename)
elif log_type == "simx":
entries = parse_simx(log_filename)
else:
print('Error: invalid log type')
sys.exit()
# sort entries by uuid
entries.sort(key=lambda x: (int(x['uuid'])))
for entry in entries:
del entry['lineno']
# write to CSV
with open(csv_filename, 'w', newline='') as csv_file: with open(csv_filename, 'w', newline='') as csv_file:
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"] fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader() writer.writeheader()
for entry in entries:
writer.writerow(entry) for sublog in sublogs:
entries = None
# parse sublog
if log_type == "rtlsim":
entries = parse_rtlsim(sublog)
elif log_type == "simx":
entries = parse_simx(sublog)
else:
print('Error: invalid log type')
sys.exit()
# sort entries by uuid
entries.sort(key=lambda x: (int(x['uuid'])))
for entry in entries:
del entry['lineno']
for entry in entries:
writer.writerow(entry)
def split_log_file(log_filename):
with open(log_filename, 'r') as log_file:
log_lines = log_file.readlines()
sublogs = []
current_sublog = None
for line in log_lines:
if line.startswith("[VXDRV] START"):
if current_sublog is not None:
sublogs.append(current_sublog)
current_sublog = [line]
elif current_sublog is not None:
current_sublog.append(line)
if current_sublog is not None:
sublogs.append(current_sublog)
return sublogs
def main(): def main():
args = parse_args() args = parse_args()
write_csv(args.log, args.csv, args.type) sublogs = split_log_file(args.log)
write_csv(sublogs, args.csv, args.type)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View file

@ -32,4 +32,8 @@ RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX) RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime
VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel
THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party
VM_ENABLE ?= @VM_ENABLE@

15
configure vendored
View file

@ -63,7 +63,7 @@ copy_files() {
filename_no_ext="${filename%.in}" filename_no_ext="${filename%.in}"
dest_file="$dest_dir/$filename_no_ext" dest_file="$dest_dir/$filename_no_ext"
mkdir -p "$dest_dir" mkdir -p "$dest_dir"
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g" "$file" > "$dest_file" sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file"
# apply permissions to bash scripts # apply permissions to bash scripts
read -r firstline < "$dest_file" read -r firstline < "$dest_file"
if [[ "$firstline" =~ ^#!.*bash ]]; then if [[ "$firstline" =~ ^#!.*bash ]]; then
@ -111,9 +111,10 @@ copy_files() {
# default configuration parameters # default configuration parameters
default_xlen=32 default_xlen=32
default_tooldir=/opt default_tooldir=$HOME/tools
default_osversion=$(detect_osversion) default_osversion=$(detect_osversion)
default_prefix=$CURRENT_DIR default_prefix=$CURRENT_DIR
default_vm=0
# load default configuration parameters from existing config.mk # load default configuration parameters from existing config.mk
if [ -f "config.mk" ]; then if [ -f "config.mk" ]; then
@ -126,6 +127,7 @@ if [ -f "config.mk" ]; then
TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;; TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;;
OSVERSION\ ?*) default_osversion=${value//\?=/} ;; OSVERSION\ ?*) default_osversion=${value//\?=/} ;;
PREFIX\ ?*) default_prefix=${value//\?=/} ;; PREFIX\ ?*) default_prefix=${value//\?=/} ;;
VM_ENABLE\ ?*) default_vm=${value//\?=/} ;;
esac esac
done < config.mk done < config.mk
fi fi
@ -135,14 +137,16 @@ XLEN=${XLEN:=$default_xlen}
TOOLDIR=${TOOLDIR:=$default_tooldir} TOOLDIR=${TOOLDIR:=$default_tooldir}
OSVERSION=${OSVERSION:=$default_osversion} OSVERSION=${OSVERSION:=$default_osversion}
PREFIX=${PREFIX:=$default_prefix} PREFIX=${PREFIX:=$default_prefix}
VM_ENABLE=${VM_ENABLE:=$default_vm}
# parse command line arguments # parse command line arguments
usage() { usage() {
echo "Usage: $0 [--xlen=<value>] [--tooldir=<path>] [--osversion=<version>]" echo "Usage: $0 [--xlen=<value>] [--tooldir=<path>] [--osversion=<version>]"
echo " --xlen=<value> Set the XLEN value (default: 32)" echo " --xlen=<value> Set the XLEN value (default: 32)"
echo " --tooldir=<path> Set the TOOLDIR path (default: /opt)" echo " --tooldir=<path> Set the TOOLDIR path (default: $HOME/tools)"
echo " --osversion=<version> Set the OS Version (default: $(detect_os))" echo " --osversion=<version> Set the OS Version (default: $(detect_osversion))"
echo " --prefix=<path> Set installation directory" echo " --prefix=<path> Set installation directory"
echo " --vm_enable=<value> Enable Virtual Memory support (default: 0)"
exit 1 exit 1
} }
while [[ "$#" -gt 0 ]]; do while [[ "$#" -gt 0 ]]; do
@ -151,6 +155,7 @@ while [[ "$#" -gt 0 ]]; do
--tooldir=*) TOOLDIR="${1#*=}" ;; --tooldir=*) TOOLDIR="${1#*=}" ;;
--osversion=*) OSVERSION="${1#*=}" ;; --osversion=*) OSVERSION="${1#*=}" ;;
--prefix=*) PREFIX="${1#*=}" ;; --prefix=*) PREFIX="${1#*=}" ;;
--vm_enable=*) VM_ENABLE="${1#*=}" ;;
-h|--help) usage ;; -h|--help) usage ;;
*) echo "Unknown parameter passed: $1"; usage ;; *) echo "Unknown parameter passed: $1"; usage ;;
esac esac
@ -172,3 +177,5 @@ SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
THIRD_PARTY_DIR=$SCRIPT_DIR/third_party THIRD_PARTY_DIR=$SCRIPT_DIR/third_party
copy_files "$SCRIPT_DIR" "$CURRENT_DIR" copy_files "$SCRIPT_DIR" "$CURRENT_DIR"
echo "VM Enable: "$VM_ENABLE

79
docs/altera_fpga_guide.md Normal file
View file

@ -0,0 +1,79 @@
# FPGA Startup and Configuration Guide
OPAE Environment Setup
----------------------
$ source /opt/inteldevstack/init_env_user.sh
$ export OPAE_HOME=/opt/opae/1.1.2
$ export PATH=$OPAE_HOME/bin:$PATH
$ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH
$ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH
$ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH
OPAE Build
------------------
The FPGA has to following configuration options:
- DEVICE_FAMILY=arria10 | stratix10
- NUM_CORES=#n
Command line:
$ cd hw/syn/altera/opae
$ PREFIX=test1 TARGET=fpga NUM_CORES=4 make
A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete.
Setting TARGET=ase will build the project for simulation using Intel ASE.
OPAE Build Configuration
------------------------
The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured:
- `NUM_WARPS`: Number of warps per cores
- `NUM_THREADS`: Number of threads per warps
- `PERF_ENABLE`: enable the use of all profile counters
You configure the syntesis build from the command line:
$ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make
OPAE Build Progress
-------------------
You could check the last 10 lines in the build log for possible errors until build completion.
$ tail -n 10 <build_dir>/build.log
Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs.
$ ps -u <username>
If the build fails and you need to restart it, clean up the build folder using the following command:
$ make clean
The bitstream file `vortex_afu.gbs` should exist when the build is done:
$ ls -lsa <build_dir>/synth/vortex_afu.gbs
Signing the bitstream and Programming the FPGA
----------------------------------------------
$ cd <build_dir>
$ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs
$ fpgasupdate vortex_afu_unsigned_ssl.gbs
Sample FPGA Run Test
--------------------
Ensure you have the correct opae runtime for the FPGA target
$ make -C runtime/opae clean
$ TARGET=FPGA make -C runtime/opae
Run the following from your Vortex build directory
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"

View file

@ -7,7 +7,8 @@
- [Cache Subsystem](cache_subsystem.md) - [Cache Subsystem](cache_subsystem.md)
- [Software](software.md) - [Software](software.md)
- [Simulation](simulation.md) - [Simulation](simulation.md)
- [FPGA Setup Guide](fpga_setup.md) - [Altera FPGA Setup Guide](altera_fpga_guide.md)
- [Xilinx FPGA Setup Guide](xilinx_fpga_guide.md)
- [Debugging](debugging.md) - [Debugging](debugging.md)
- [Useful Links](references.md) - [Useful Links](references.md)
@ -27,6 +28,6 @@ Running Vortex simulators with different configurations:
$ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo $ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads - Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
$ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood $ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood

36
docs/xilinx_fpga_guide.md Normal file
View file

@ -0,0 +1,36 @@
# FPGA Startup and Configuration Guide
XRT Environment Setup
----------------------
$ source /opt/xilinx/Vitis/2023.1/settings64.sh
$ source /opt/xilinx/xrt/setup.sh
Check Installed FPGA Platforms
------------------------------
$ platforminfo -l
Build FPGA image
----------------
$ cd hw/syn/xilinx/xrt
$ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make
Will run the synthesis under new build directory: BUILD_DIR := "\<PREFIX>\_\<PLATFORM>\_\<TARGET>"
The generated bitstream will be located under <BUILD_DIR>/bin/vortex_afu.xclbin
Sample FPGA Run Test
--------------------
Ensure you have the correct opae runtime for the FPGA target
$ make -C runtime/xrt clean
$ TARGET=hw make -C runtime/xrt
Run the following from your Vortex build directory
$ TARGET=hw FPGA_BIN_DIR=<BUILD_DIR>/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128"

View file

@ -9,13 +9,14 @@ all: config
config: VX_config.h VX_types.h config: VX_config.h VX_types.h
VX_config.h: $(RTL_DIR)/VX_config.vh VX_config.h: $(RTL_DIR)/VX_config.vh
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_config.vh -o VX_config.h $(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_config.vh -o VX_config.h
VX_types.h: $(RTL_DIR)/VX_types.vh VX_types.h: $(RTL_DIR)/VX_types.vh
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_types.vh -o VX_types.h $(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_types.vh -o VX_types.h
clean: clean:
$(MAKE) -C unittest clean
rm -f VX_config.h VX_types.h rm -f VX_config.h VX_types.h
.PHONY: VX_config.h VX_types.h .PHONY: VX_config.h VX_types.h

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,8 +14,6 @@
`ifndef FLOAT_DPI_VH `ifndef FLOAT_DPI_VH
`define FLOAT_DPI_VH `define FLOAT_DPI_VH
`include "VX_config.vh"
import "DPI-C" function void dpi_fadd(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags); import "DPI-C" function void dpi_fadd(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fsub(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags); import "DPI-C" function void dpi_fsub(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmul(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags); import "DPI-C" function void dpi_fmul(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);

View file

@ -14,8 +14,6 @@
`ifndef UTIL_DPI_VH `ifndef UTIL_DPI_VH
`define UTIL_DPI_VH `define UTIL_DPI_VH
`include "VX_config.vh"
`ifdef XLEN_64 `ifdef XLEN_64
`define INT_TYPE longint `define INT_TYPE longint
`else `else

View file

@ -14,7 +14,8 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_cluster import VX_gpu_pkg::*; #( module VX_cluster import VX_gpu_pkg::*; #(
parameter CLUSTER_ID = 0 parameter CLUSTER_ID = 0,
parameter `STRING INSTANCE_ID = ""
) ( ) (
`SCOPE_IO_DECL `SCOPE_IO_DECL
@ -85,7 +86,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
`RESET_RELAY (l2_reset, reset); `RESET_RELAY (l2_reset, reset);
VX_cache_wrap #( VX_cache_wrap #(
.INSTANCE_ID ("l2cache"), .INSTANCE_ID ($sformatf("%s-l2cache", INSTANCE_ID)),
.CACHE_SIZE (`L2_CACHE_SIZE), .CACHE_SIZE (`L2_CACHE_SIZE),
.LINE_SIZE (`L2_LINE_SIZE), .LINE_SIZE (`L2_LINE_SIZE),
.NUM_BANKS (`L2_NUM_BANKS), .NUM_BANKS (`L2_NUM_BANKS),
@ -98,6 +99,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
.MREQ_SIZE (`L2_MREQ_SIZE), .MREQ_SIZE (`L2_MREQ_SIZE),
.TAG_WIDTH (L2_TAG_WIDTH), .TAG_WIDTH (L2_TAG_WIDTH),
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.WRITEBACK (`L2_WRITEBACK),
.UUID_WIDTH (`UUID_WIDTH), .UUID_WIDTH (`UUID_WIDTH),
.CORE_OUT_BUF (2), .CORE_OUT_BUF (2),
.MEM_OUT_BUF (2), .MEM_OUT_BUF (2),
@ -122,17 +124,19 @@ module VX_cluster import VX_gpu_pkg::*; #(
wire [`NUM_SOCKETS-1:0] per_socket_busy; wire [`NUM_SOCKETS-1:0] per_socket_busy;
VX_dcr_bus_if socket_dcr_bus_if();
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1)); `BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1));
// Generate all sockets // Generate all sockets
for (genvar i = 0; i < `NUM_SOCKETS; ++i) begin for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : sockets
`RESET_RELAY (socket_reset, reset); `RESET_RELAY (socket_reset, reset);
VX_socket #( VX_socket #(
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + i) .SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id),
.INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id))
) socket ( ) socket (
`SCOPE_IO_BIND (scope_socket+i) `SCOPE_IO_BIND (scope_socket+socket_id)
.clk (clk), .clk (clk),
.reset (socket_reset), .reset (socket_reset),
@ -143,13 +147,13 @@ module VX_cluster import VX_gpu_pkg::*; #(
.dcr_bus_if (socket_dcr_bus_if), .dcr_bus_if (socket_dcr_bus_if),
.mem_bus_if (per_socket_mem_bus_if[i]), .mem_bus_if (per_socket_mem_bus_if[socket_id]),
`ifdef GBAR_ENABLE `ifdef GBAR_ENABLE
.gbar_bus_if (per_socket_gbar_bus_if[i]), .gbar_bus_if (per_socket_gbar_bus_if[socket_id]),
`endif `endif
.busy (per_socket_busy[i]) .busy (per_socket_busy[socket_id])
); );
end end

View file

@ -33,10 +33,6 @@
`endif `endif
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`ifndef VM_DISABLE
`define VM_ENABLE
`endif
`ifndef EXT_M_DISABLE `ifndef EXT_M_DISABLE
`define EXT_M_ENABLE `define EXT_M_ENABLE
`endif `endif
@ -114,7 +110,6 @@
`ifndef SOCKET_SIZE `ifndef SOCKET_SIZE
`define SOCKET_SIZE `MIN(4, `NUM_CORES) `define SOCKET_SIZE `MIN(4, `NUM_CORES)
`endif `endif
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
`ifdef L2_ENABLE `ifdef L2_ENABLE
`define L2_ENABLED 1 `define L2_ENABLED 1
@ -357,7 +352,7 @@
// Number of SFU units // Number of SFU units
`ifndef NUM_SFU_LANES `ifndef NUM_SFU_LANES
`define NUM_SFU_LANES `MIN(`NUM_THREADS, 4) `define NUM_SFU_LANES `NUM_THREADS
`endif `endif
`ifndef NUM_SFU_BLOCKS `ifndef NUM_SFU_BLOCKS
`define NUM_SFU_BLOCKS 1 `define NUM_SFU_BLOCKS 1
@ -481,22 +476,27 @@
`define LATENCY_FCVT 5 `define LATENCY_FCVT 5
`endif `endif
// FMA Bandwidth ratio
`ifndef FMA_PE_RATIO `ifndef FMA_PE_RATIO
`define FMA_PE_RATIO 1 `define FMA_PE_RATIO 1
`endif `endif
// FDIV Bandwidth ratio
`ifndef FDIV_PE_RATIO `ifndef FDIV_PE_RATIO
`define FDIV_PE_RATIO 8 `define FDIV_PE_RATIO 8
`endif `endif
// FSQRT Bandwidth ratio
`ifndef FSQRT_PE_RATIO `ifndef FSQRT_PE_RATIO
`define FSQRT_PE_RATIO 8 `define FSQRT_PE_RATIO 8
`endif `endif
// FCVT Bandwidth ratio
`ifndef FCVT_PE_RATIO `ifndef FCVT_PE_RATIO
`define FCVT_PE_RATIO 8 `define FCVT_PE_RATIO 8
`endif `endif
// FNCP Bandwidth ratio
`ifndef FNCP_PE_RATIO `ifndef FNCP_PE_RATIO
`define FNCP_PE_RATIO 2 `define FNCP_PE_RATIO 2
`endif `endif
@ -603,7 +603,12 @@
`define DCACHE_NUM_WAYS 1 `define DCACHE_NUM_WAYS 1
`endif `endif
// SM Configurable Knobs ////////////////////////////////////////////////////// // Enable Cache Writeback
`ifndef DCACHE_WRITEBACK
`define DCACHE_WRITEBACK 0
`endif
// LMEM Configurable Knobs ////////////////////////////////////////////////////
`ifndef LMEM_DISABLE `ifndef LMEM_DISABLE
`define LMEM_ENABLE `define LMEM_ENABLE
@ -662,6 +667,11 @@
`define L2_NUM_WAYS 2 `define L2_NUM_WAYS 2
`endif `endif
// Enable Cache Writeback
`ifndef L2_WRITEBACK
`define L2_WRITEBACK 0
`endif
// L3cache Configurable Knobs ///////////////////////////////////////////////// // L3cache Configurable Knobs /////////////////////////////////////////////////
// Cache Size // Cache Size
@ -703,6 +713,11 @@
`define L3_NUM_WAYS 4 `define L3_NUM_WAYS 4
`endif `endif
// Enable Cache Writeback
`ifndef L3_WRITEBACK
`define L3_WRITEBACK 0
`endif
// ISA Extensions ///////////////////////////////////////////////////////////// // ISA Extensions /////////////////////////////////////////////////////////////
`ifdef EXT_A_ENABLE `ifdef EXT_A_ENABLE

View file

@ -59,6 +59,8 @@
`define OFFSET_BITS 12 `define OFFSET_BITS 12
`define IMM_BITS `XLEN `define IMM_BITS `XLEN
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`define EX_ALU 0 `define EX_ALU 0
@ -296,6 +298,7 @@
`ifdef ICACHE_ENABLE `ifdef ICACHE_ENABLE
`define L1_ENABLE `define L1_ENABLE
`endif `endif
`ifdef DCACHE_ENABLE `ifdef DCACHE_ENABLE
`define L1_ENABLE `define L1_ENABLE
`endif `endif
@ -322,7 +325,7 @@
.DATAW ($bits(dst)), \ .DATAW ($bits(dst)), \
.RESETW ($bits(dst)), \ .RESETW ($bits(dst)), \
.DEPTH (latency) \ .DEPTH (latency) \
) __``dst ( \ ) __``dst``__ ( \
.clk (clk), \ .clk (clk), \
.reset (reset), \ .reset (reset), \
.enable (ena), \ .enable (ena), \
@ -336,13 +339,18 @@
VX_popcount #( \ VX_popcount #( \
.N ($bits(in)), \ .N ($bits(in)), \
.MODEL (model) \ .MODEL (model) \
) __``out ( \ ) __``out``__ ( \
.data_in (in), \ .data_in (in), \
.data_out (out) \ .data_out (out) \
) )
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1) `define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
`define ASSIGN_VX_IF(dst, src) \
assign dst.valid = src.valid; \
assign dst.data = src.data; \
assign src.ready = dst.ready
`define ASSIGN_VX_MEM_BUS_IF(dst, src) \ `define ASSIGN_VX_MEM_BUS_IF(dst, src) \
assign dst.req_valid = src.req_valid; \ assign dst.req_valid = src.req_valid; \
assign dst.req_data = src.req_data; \ assign dst.req_data = src.req_data; \
@ -377,42 +385,42 @@
assign dst.rsp_ready = src.rsp_ready assign dst.rsp_ready = src.rsp_ready
`define BUFFER_DCR_BUS_IF(dst, src, enable) \ `define BUFFER_DCR_BUS_IF(dst, src, enable) \
logic [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __``dst; \
if (enable) begin \ if (enable) begin \
reg [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __dst; \
always @(posedge clk) begin \ always @(posedge clk) begin \
__``dst <= {src.write_valid, src.write_addr, src.write_data}; \ __dst <= {src.write_valid, src.write_addr, src.write_data}; \
end \ end \
assign {dst.write_valid, dst.write_addr, dst.write_data} = __dst; \
end else begin \ end else begin \
assign __``dst = {src.write_valid, src.write_addr, src.write_data}; \ assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid, src.write_addr, src.write_data}; \
end \ end
VX_dcr_bus_if dst(); \
assign {dst.write_valid, dst.write_addr, dst.write_data} = __``dst
`define PERF_COUNTER_ADD(dst, src, field, width, dst_count, src_count, reg_enable) \ `define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \
for (genvar __d = 0; __d < dst_count; ++__d) begin \ if (count > 1) begin \
localparam __count = ((src_count > dst_count) ? `CDIV(src_count, dst_count) : 1); \ wire [count-1:0][width-1:0] __reduce_add_i_field; \
wire [__count-1:0][width-1:0] __reduce_add_i_``src``field; \ wire [width-1:0] __reduce_add_o_field; \
wire [width-1:0] __reduce_add_o_``dst``field; \ for (genvar __i = 0; __i < count; ++__i) begin \
for (genvar __i = 0; __i < __count; ++__i) begin \ assign __reduce_add_i_field[__i] = src[__i].``field; \
assign __reduce_add_i_``src``field[__i] = ``src[__d * __count + __i].``field; \
end \ end \
VX_reduce #(.DATAW_IN(width), .N(__count), .OP("+")) __reduce_add_``dst``field ( \ VX_reduce #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_field ( \
__reduce_add_i_``src``field, \ __reduce_add_i_field, \
__reduce_add_o_``dst``field \ __reduce_add_o_field \
); \ ); \
if (reg_enable) begin \ if (reg_enable) begin \
reg [width-1:0] __reduce_add_r_``dst``field; \ reg [width-1:0] __reduce_add_r_field; \
always @(posedge clk) begin \ always @(posedge clk) begin \
if (reset) begin \ if (reset) begin \
__reduce_add_r_``dst``field <= '0; \ __reduce_add_r_field <= '0; \
end else begin \ end else begin \
__reduce_add_r_``dst``field <= __reduce_add_o_``dst``field; \ __reduce_add_r_field <= __reduce_add_o_field; \
end \ end \
end \ end \
assign ``dst[__d].``field = __reduce_add_r_``dst``field; \ assign dst.``field = __reduce_add_r_field; \
end else begin \ end else begin \
assign ``dst[__d].``field = __reduce_add_o_``dst``field; \ assign dst.``field = __reduce_add_o_field; \
end \ end \
end else begin \
assign dst.``field = src[0].``field; \
end end
`define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \ `define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \
@ -426,20 +434,4 @@
assign dst = src; \ assign dst = src; \
end end
`define TO_DISPATCH_DATA(data, tid) { \
data.uuid, \
data.wis, \
data.tmask, \
data.PC, \
data.op_type, \
data.op_args, \
data.wb, \
data.rd, \
tid, \
data.rs1_data, \
data.rs2_data, \
data.rs3_data}
///////////////////////////////////////////////////////////////////////////////
`endif // VX_DEFINE_VH `endif // VX_DEFINE_VH

View file

@ -60,6 +60,8 @@ package VX_gpu_pkg;
logic [7:0] mpm_class; logic [7:0] mpm_class;
} base_dcrs_t; } base_dcrs_t;
//////////////////////////// Perf counter types ///////////////////////////
typedef struct packed { typedef struct packed {
logic [`PERF_CTR_BITS-1:0] reads; logic [`PERF_CTR_BITS-1:0] reads;
logic [`PERF_CTR_BITS-1:0] writes; logic [`PERF_CTR_BITS-1:0] writes;
@ -77,48 +79,63 @@ package VX_gpu_pkg;
logic [`PERF_CTR_BITS-1:0] latency; logic [`PERF_CTR_BITS-1:0] latency;
} mem_perf_t; } mem_perf_t;
typedef struct packed {
logic [`PERF_CTR_BITS-1:0] idles;
logic [`PERF_CTR_BITS-1:0] stalls;
} sched_perf_t;
typedef struct packed {
logic [`PERF_CTR_BITS-1:0] ibf_stalls;
logic [`PERF_CTR_BITS-1:0] scb_stalls;
logic [`PERF_CTR_BITS-1:0] opd_stalls;
logic [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] units_uses;
logic [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] sfu_uses;
} issue_perf_t;
//////////////////////// instruction arguments ////////////////////////////
typedef struct packed { typedef struct packed {
logic use_PC; logic use_PC;
logic use_imm; logic use_imm;
logic is_w; logic is_w;
logic [`ALU_TYPE_BITS-1:0] xtype; logic [`ALU_TYPE_BITS-1:0] xtype;
logic [`IMM_BITS-1:0] imm; logic [`IMM_BITS-1:0] imm;
} alu_mod_t; } alu_args_t;
typedef struct packed { typedef struct packed {
logic [($bits(alu_mod_t)-`INST_FRM_BITS-`INST_FMT_BITS)-1:0] __padding; logic [($bits(alu_args_t)-`INST_FRM_BITS-`INST_FMT_BITS)-1:0] __padding;
logic [`INST_FRM_BITS-1:0] frm; logic [`INST_FRM_BITS-1:0] frm;
logic [`INST_FMT_BITS-1:0] fmt; logic [`INST_FMT_BITS-1:0] fmt;
} fpu_mod_t; } fpu_args_t;
typedef struct packed { typedef struct packed {
logic [($bits(alu_mod_t)-1-1-`OFFSET_BITS)-1:0] __padding; logic [($bits(alu_args_t)-1-1-`OFFSET_BITS)-1:0] __padding;
logic is_store; logic is_store;
logic is_float; logic is_float;
logic [`OFFSET_BITS-1:0] offset; logic [`OFFSET_BITS-1:0] offset;
} lsu_mod_t; } lsu_args_t;
typedef struct packed { typedef struct packed {
logic [($bits(alu_mod_t)-1-`VX_CSR_ADDR_BITS-5)-1:0] __padding; logic [($bits(alu_args_t)-1-`VX_CSR_ADDR_BITS-5)-1:0] __padding;
logic use_imm; logic use_imm;
logic [`VX_CSR_ADDR_BITS-1:0] addr; logic [`VX_CSR_ADDR_BITS-1:0] addr;
logic [4:0] imm; logic [4:0] imm;
} csr_mod_t; } csr_args_t;
typedef struct packed { typedef struct packed {
logic [($bits(alu_mod_t)-1)-1:0] __padding; logic [($bits(alu_args_t)-1)-1:0] __padding;
logic is_neg; logic is_neg;
} wctl_mod_t; } wctl_args_t;
typedef union packed { typedef union packed {
alu_mod_t alu; alu_args_t alu;
fpu_mod_t fpu; fpu_args_t fpu;
lsu_mod_t lsu; lsu_args_t lsu;
csr_mod_t csr; csr_args_t csr;
wctl_mod_t wctl; wctl_args_t wctl;
} op_args_t; } op_args_t;
/* verilator lint_off UNUSED */ `IGNORE_UNUSED_BEGIN
///////////////////////// LSU memory Parameters /////////////////////////// ///////////////////////// LSU memory Parameters ///////////////////////////
@ -129,6 +146,31 @@ package VX_gpu_pkg;
localparam LSU_TAG_WIDTH = (`UUID_WIDTH + LSU_TAG_ID_BITS); localparam LSU_TAG_WIDTH = (`UUID_WIDTH + LSU_TAG_ID_BITS);
localparam LSU_NUM_REQS = `NUM_LSU_BLOCKS * `NUM_LSU_LANES; localparam LSU_NUM_REQS = `NUM_LSU_BLOCKS * `NUM_LSU_LANES;
////////////////////////// Icache Parameters //////////////////////////////
// Word size in bytes
localparam ICACHE_WORD_SIZE = 4;
localparam ICACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(ICACHE_WORD_SIZE));
// Block size in bytes
localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE;
// Core request tag Id bits
localparam ICACHE_TAG_ID_BITS = `NW_WIDTH;
// Core request tag bits
localparam ICACHE_TAG_WIDTH = (`UUID_WIDTH + ICACHE_TAG_ID_BITS);
// Memory request data bits
localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8);
// Memory request tag bits
`ifdef ICACHE_ENABLE
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES);
`else
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
`endif
////////////////////////// Dcache Parameters ////////////////////////////// ////////////////////////// Dcache Parameters //////////////////////////////
// Word size in bytes // Word size in bytes
@ -154,36 +196,11 @@ package VX_gpu_pkg;
localparam DCACHE_MEM_DATA_WIDTH = (DCACHE_LINE_SIZE * 8); localparam DCACHE_MEM_DATA_WIDTH = (DCACHE_LINE_SIZE * 8);
// Memory request tag bits // Memory request tag bits
`ifdef DCACHE_ENABLE `ifdef DCACHE_ENABLE
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES); localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
`else `else
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES); localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
`endif `endif
////////////////////////// Icache Parameters //////////////////////////////
// Word size in bytes
localparam ICACHE_WORD_SIZE = 4;
localparam ICACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(ICACHE_WORD_SIZE));
// Block size in bytes
localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE;
// Core request tag Id bits
localparam ICACHE_TAG_ID_BITS = `NW_WIDTH;
// Core request tag bits
localparam ICACHE_TAG_WIDTH = (`UUID_WIDTH + ICACHE_TAG_ID_BITS);
// Memory request data bits
localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8);
// Memory request tag bits
`ifdef ICACHE_ENABLE
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES);
`else
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
`endif
/////////////////////////////// L1 Parameters ///////////////////////////// /////////////////////////////// L1 Parameters /////////////////////////////
@ -208,11 +225,11 @@ package VX_gpu_pkg;
localparam L2_MEM_DATA_WIDTH = (`L2_LINE_SIZE * 8); localparam L2_MEM_DATA_WIDTH = (`L2_LINE_SIZE * 8);
// Memory request tag bits // Memory request tag bits
`ifdef L2_ENABLE `ifdef L2_ENABLE
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH); localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
`else `else
localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH); localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
`endif `endif
/////////////////////////////// L3 Parameters ///////////////////////////// /////////////////////////////// L3 Parameters /////////////////////////////
@ -229,23 +246,20 @@ package VX_gpu_pkg;
localparam L3_MEM_DATA_WIDTH = (`L3_LINE_SIZE * 8); localparam L3_MEM_DATA_WIDTH = (`L3_LINE_SIZE * 8);
// Memory request tag bits // Memory request tag bits
`ifdef L3_ENABLE `ifdef L3_ENABLE
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH); localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
`else `else
localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH); localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
`endif `endif
/* verilator lint_on UNUSED */
/////////////////////////////// Issue parameters ////////////////////////// /////////////////////////////// Issue parameters //////////////////////////
localparam ISSUE_ISW = `CLOG2(`ISSUE_WIDTH); localparam ISSUE_ISW = `CLOG2(`ISSUE_WIDTH);
localparam ISSUE_ISW_W = `UP(ISSUE_ISW); localparam ISSUE_ISW_W = `UP(ISSUE_ISW);
localparam ISSUE_RATIO = `NUM_WARPS / `ISSUE_WIDTH; localparam PER_ISSUE_WARPS = `NUM_WARPS / `ISSUE_WIDTH;
localparam ISSUE_WIS = `CLOG2(ISSUE_RATIO); localparam ISSUE_WIS = `CLOG2(PER_ISSUE_WARPS);
localparam ISSUE_WIS_W = `UP(ISSUE_WIS); localparam ISSUE_WIS_W = `UP(ISSUE_WIS);
`IGNORE_UNUSED_BEGIN
function logic [`NW_WIDTH-1:0] wis_to_wid( function logic [`NW_WIDTH-1:0] wis_to_wid(
input logic [ISSUE_WIS_W-1:0] wis, input logic [ISSUE_WIS_W-1:0] wis,
input logic [ISSUE_ISW_W-1:0] isw input logic [ISSUE_ISW_W-1:0] isw
@ -278,6 +292,20 @@ package VX_gpu_pkg;
wid_to_wis = 0; wid_to_wis = 0;
end end
endfunction endfunction
///////////////////////// Miscaellaneous functions ////////////////////////
function logic [`SFU_WIDTH-1:0] op_to_sfu_type(
input logic [`INST_OP_BITS-1:0] op_type
);
case (op_type)
`INST_SFU_CSRRW,
`INST_SFU_CSRRS,
`INST_SFU_CSRRC: op_to_sfu_type = `SFU_CSRS;
default: op_to_sfu_type = `SFU_WCTL;
endcase
endfunction
`IGNORE_UNUSED_END `IGNORE_UNUSED_END
endpackage endpackage

View file

@ -47,7 +47,7 @@
`define UNUSED_VAR(x) `define UNUSED_VAR(x)
`define UNUSED_PIN(x) . x () `define UNUSED_PIN(x) . x ()
`define UNUSED_ARG(x) x `define UNUSED_ARG(x) x
`define TRACE(level, args) $write args `define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args
`else `else
`ifdef VERILATOR `ifdef VERILATOR
`define TRACING_ON /* verilator tracing_on */ `define TRACING_ON /* verilator tracing_on */
@ -112,8 +112,14 @@
`define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \ `define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \
x \ x \
/* verilator lint_on UNUSED */ /* verilator lint_on UNUSED */
`define TRACE(level, args) dpi_trace(level, $sformatf args)
`endif `endif
`ifdef SV_DPI
`define TRACE(level, args) dpi_trace(level, $sformatf args)
`else
`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args
`endif
`endif `endif
`ifdef SIMULATION `ifdef SIMULATION

View file

@ -14,7 +14,8 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_socket import VX_gpu_pkg::*; #( module VX_socket import VX_gpu_pkg::*; #(
parameter SOCKET_ID = 0 parameter SOCKET_ID = 0,
parameter `STRING INSTANCE_ID = ""
) ( ) (
`SCOPE_IO_DECL `SCOPE_IO_DECL
@ -40,6 +41,11 @@ module VX_socket import VX_gpu_pkg::*; #(
output wire busy output wire busy
); );
`ifdef SCOPE
localparam scope_core = 0;
`SCOPE_IO_SWITCH (`SOCKET_SIZE);
`endif
`ifdef GBAR_ENABLE `ifdef GBAR_ENABLE
VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE](); VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE]();
@ -81,7 +87,7 @@ module VX_socket import VX_gpu_pkg::*; #(
`RESET_RELAY (icache_reset, reset); `RESET_RELAY (icache_reset, reset);
VX_cache_cluster #( VX_cache_cluster #(
.INSTANCE_ID ($sformatf("socket%0d-icache", SOCKET_ID)), .INSTANCE_ID ($sformatf("%s-icache", INSTANCE_ID)),
.NUM_UNITS (`NUM_ICACHES), .NUM_UNITS (`NUM_ICACHES),
.NUM_INPUTS (`SOCKET_SIZE), .NUM_INPUTS (`SOCKET_SIZE),
.TAG_SEL_IDX (0), .TAG_SEL_IDX (0),
@ -126,7 +132,7 @@ module VX_socket import VX_gpu_pkg::*; #(
`RESET_RELAY (dcache_reset, reset); `RESET_RELAY (dcache_reset, reset);
VX_cache_cluster #( VX_cache_cluster #(
.INSTANCE_ID ($sformatf("socket%0d-dcache", SOCKET_ID)), .INSTANCE_ID ($sformatf("%s-dcache", INSTANCE_ID)),
.NUM_UNITS (`NUM_DCACHES), .NUM_UNITS (`NUM_DCACHES),
.NUM_INPUTS (`SOCKET_SIZE), .NUM_INPUTS (`SOCKET_SIZE),
.TAG_SEL_IDX (0), .TAG_SEL_IDX (0),
@ -143,8 +149,9 @@ module VX_socket import VX_gpu_pkg::*; #(
.TAG_WIDTH (DCACHE_TAG_WIDTH), .TAG_WIDTH (DCACHE_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH), .UUID_WIDTH (`UUID_WIDTH),
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.WRITEBACK (`DCACHE_WRITEBACK),
.NC_ENABLE (1), .NC_ENABLE (1),
.CORE_OUT_BUF (`LMEM_ENABLED ? 2 : 1), .CORE_OUT_BUF (2),
.MEM_OUT_BUF (2) .MEM_OUT_BUF (2)
) dcache ( ) dcache (
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
@ -194,19 +201,19 @@ module VX_socket import VX_gpu_pkg::*; #(
wire [`SOCKET_SIZE-1:0] per_core_busy; wire [`SOCKET_SIZE-1:0] per_core_busy;
VX_dcr_bus_if core_dcr_bus_if();
`BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1)); `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1));
`SCOPE_IO_SWITCH (`SOCKET_SIZE)
// Generate all cores // Generate all cores
for (genvar i = 0; i < `SOCKET_SIZE; ++i) begin for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : cores
`RESET_RELAY (core_reset, reset); `RESET_RELAY (core_reset, reset);
VX_core #( VX_core #(
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + i) .CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id),
.INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id))
) core ( ) core (
`SCOPE_IO_BIND (i) `SCOPE_IO_BIND (scope_core + core_id)
.clk (clk), .clk (clk),
.reset (core_reset), .reset (core_reset),
@ -217,15 +224,15 @@ module VX_socket import VX_gpu_pkg::*; #(
.dcr_bus_if (core_dcr_bus_if), .dcr_bus_if (core_dcr_bus_if),
.dcache_bus_if (per_core_dcache_bus_if[i * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]), .dcache_bus_if (per_core_dcache_bus_if[core_id * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
.icache_bus_if (per_core_icache_bus_if[i]), .icache_bus_if (per_core_icache_bus_if[core_id]),
`ifdef GBAR_ENABLE `ifdef GBAR_ENABLE
.gbar_bus_if (per_core_gbar_bus_if[i]), .gbar_bus_if (per_core_gbar_bus_if[core_id]),
`endif `endif
.busy (per_core_busy[i]) .busy (per_core_busy[core_id])
); );
end end

View file

@ -85,30 +85,31 @@
`define VX_CSR_MPM_IBUF_ST_H 12'hB85 `define VX_CSR_MPM_IBUF_ST_H 12'hB85
`define VX_CSR_MPM_SCRB_ST 12'hB06 `define VX_CSR_MPM_SCRB_ST 12'hB06
`define VX_CSR_MPM_SCRB_ST_H 12'hB86 `define VX_CSR_MPM_SCRB_ST_H 12'hB86
`define VX_CSR_MPM_SCRB_ALU 12'hB07 `define VX_CSR_MPM_OPDS_ST 12'hB07
`define VX_CSR_MPM_SCRB_ALU_H 12'hB87 `define VX_CSR_MPM_OPDS_ST_H 12'hB87
`define VX_CSR_MPM_SCRB_FPU 12'hB08 `define VX_CSR_MPM_SCRB_ALU 12'hB08
`define VX_CSR_MPM_SCRB_FPU_H 12'hB88 `define VX_CSR_MPM_SCRB_ALU_H 12'hB88
`define VX_CSR_MPM_SCRB_LSU 12'hB09 `define VX_CSR_MPM_SCRB_FPU 12'hB09
`define VX_CSR_MPM_SCRB_LSU_H 12'hB89 `define VX_CSR_MPM_SCRB_FPU_H 12'hB89
`define VX_CSR_MPM_SCRB_SFU 12'hB0A `define VX_CSR_MPM_SCRB_LSU 12'hB0A
`define VX_CSR_MPM_SCRB_SFU_H 12'hB8A `define VX_CSR_MPM_SCRB_LSU_H 12'hB8A
`define VX_CSR_MPM_SCRB_SFU 12'hB0B
`define VX_CSR_MPM_SCRB_SFU_H 12'hB8B
`define VX_CSR_MPM_SCRB_CSRS 12'hB0C
`define VX_CSR_MPM_SCRB_CSRS_H 12'hB8C
`define VX_CSR_MPM_SCRB_WCTL 12'hB0D
`define VX_CSR_MPM_SCRB_WCTL_H 12'hB8D
// PERF: memory // PERF: memory
`define VX_CSR_MPM_IFETCHES 12'hB0B `define VX_CSR_MPM_IFETCHES 12'hB0E
`define VX_CSR_MPM_IFETCHES_H 12'hB8B `define VX_CSR_MPM_IFETCHES_H 12'hB8E
`define VX_CSR_MPM_LOADS 12'hB0C `define VX_CSR_MPM_LOADS 12'hB0F
`define VX_CSR_MPM_LOADS_H 12'hB8C `define VX_CSR_MPM_LOADS_H 12'hB8F
`define VX_CSR_MPM_STORES 12'hB0D `define VX_CSR_MPM_STORES 12'hB10
`define VX_CSR_MPM_STORES_H 12'hB8D `define VX_CSR_MPM_STORES_H 12'hB90
`define VX_CSR_MPM_IFETCH_LT 12'hB0E `define VX_CSR_MPM_IFETCH_LT 12'hB11
`define VX_CSR_MPM_IFETCH_LT_H 12'hB8E `define VX_CSR_MPM_IFETCH_LT_H 12'hB91
`define VX_CSR_MPM_LOAD_LT 12'hB0F `define VX_CSR_MPM_LOAD_LT 12'hB12
`define VX_CSR_MPM_LOAD_LT_H 12'hB8F `define VX_CSR_MPM_LOAD_LT_H 12'hB92
// SFU: scoreboard
`define VX_CSR_MPM_SCRB_WCTL 12'hB10
`define VX_CSR_MPM_SCRB_WCTL_H 12'hB90
`define VX_CSR_MPM_SCRB_CSRS 12'hB11
`define VX_CSR_MPM_SCRB_CSRS_H 12'hB91
// Machine Performance-monitoring memory counters (class 2) /////////////////// // Machine Performance-monitoring memory counters (class 2) ///////////////////

View file

@ -44,6 +44,11 @@ module Vortex import VX_gpu_pkg::*; (
output wire busy output wire busy
); );
`ifdef SCOPE
localparam scope_cluster = 0;
`SCOPE_IO_SWITCH (`NUM_CLUSTERS);
`endif
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
VX_mem_perf_if mem_perf_if(); VX_mem_perf_if mem_perf_if();
assign mem_perf_if.icache = 'x; assign mem_perf_if.icache = 'x;
@ -78,6 +83,7 @@ module Vortex import VX_gpu_pkg::*; (
.MREQ_SIZE (`L3_MREQ_SIZE), .MREQ_SIZE (`L3_MREQ_SIZE),
.TAG_WIDTH (L2_MEM_TAG_WIDTH), .TAG_WIDTH (L2_MEM_TAG_WIDTH),
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.WRITEBACK (`L3_WRITEBACK),
.UUID_WIDTH (`UUID_WIDTH), .UUID_WIDTH (`UUID_WIDTH),
.CORE_OUT_BUF (2), .CORE_OUT_BUF (2),
.MEM_OUT_BUF (2), .MEM_OUT_BUF (2),
@ -121,19 +127,19 @@ module Vortex import VX_gpu_pkg::*; (
wire [`NUM_CLUSTERS-1:0] per_cluster_busy; wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
`SCOPE_IO_SWITCH (`NUM_CLUSTERS)
// Generate all clusters // Generate all clusters
for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters
`RESET_RELAY (cluster_reset, reset); `RESET_RELAY (cluster_reset, reset);
VX_dcr_bus_if cluster_dcr_bus_if();
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1)); `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
VX_cluster #( VX_cluster #(
.CLUSTER_ID (i) .CLUSTER_ID (cluster_id),
.INSTANCE_ID ($sformatf("cluster%0d", cluster_id))
) cluster ( ) cluster (
`SCOPE_IO_BIND (i) `SCOPE_IO_BIND (scope_cluster + cluster_id)
.clk (clk), .clk (clk),
.reset (cluster_reset), .reset (cluster_reset),
@ -144,9 +150,9 @@ module Vortex import VX_gpu_pkg::*; (
.dcr_bus_if (cluster_dcr_bus_if), .dcr_bus_if (cluster_dcr_bus_if),
.mem_bus_if (per_cluster_mem_bus_if[i]), .mem_bus_if (per_cluster_mem_bus_if[cluster_id]),
.busy (per_cluster_busy[i]) .busy (per_cluster_busy[cluster_id])
); );
end end

View file

@ -5,6 +5,7 @@
// To be done: // To be done:
// Check how to run this with OPAE. Looks like setup issue // Check how to run this with OPAE. Looks like setup issue
`ifndef NOPAE
`include "platform_if.vh" `include "platform_if.vh"
@ -85,7 +86,7 @@ module ccip_std_afu #(
t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS]; t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS];
t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS]; t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS];
logic avs_write [NUM_LOCAL_MEM_BANKS]; logic avs_write [NUM_LOCAL_MEM_BANKS];
logic avs_read [NUM_LOCAL_MEM_BANKS]; logic avs_read [NUM_LOCAL_MEM_BANKS];
for (genvar b = 0; b < NUM_LOCAL_MEM_BANKS; b++) begin for (genvar b = 0; b < NUM_LOCAL_MEM_BANKS; b++) begin
assign local_mem[b].burstcount = avs_burstcount[b]; assign local_mem[b].burstcount = avs_burstcount[b];
@ -94,7 +95,7 @@ module ccip_std_afu #(
assign local_mem[b].byteenable = avs_byteenable[b]; assign local_mem[b].byteenable = avs_byteenable[b];
assign local_mem[b].write = avs_write[b]; assign local_mem[b].write = avs_write[b];
assign local_mem[b].read = avs_read[b]; assign local_mem[b].read = avs_read[b];
assign avs_waitrequest[b] = local_mem[b].waitrequest; assign avs_waitrequest[b] = local_mem[b].waitrequest;
assign avs_readdata[b] = local_mem[b].readdata; assign avs_readdata[b] = local_mem[b].readdata;
assign avs_readdatavalid[b] = local_mem[b].readdatavalid; assign avs_readdatavalid[b] = local_mem[b].readdatavalid;
@ -107,7 +108,7 @@ module ccip_std_afu #(
.reset (reset_T1), .reset (reset_T1),
.cp2af_sRxPort (cp2af_sRx_T1), .cp2af_sRxPort (cp2af_sRx_T1),
.af2cp_sTxPort (af2cp_sTx_T0), .af2cp_sTxPort (af2cp_sTx_T0),
.avs_writedata (avs_writedata), .avs_writedata (avs_writedata),
.avs_readdata (avs_readdata), .avs_readdata (avs_readdata),
@ -121,3 +122,5 @@ module ccip_std_afu #(
); );
endmodule endmodule
`endif

View file

@ -587,7 +587,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.DATA_SIZE (LMEM_DATA_SIZE), .DATA_SIZE (LMEM_DATA_SIZE),
.ADDR_WIDTH (LMEM_ADDR_WIDTH), .ADDR_WIDTH (LMEM_ADDR_WIDTH),
.TAG_WIDTH (AVS_REQ_TAGW), .TAG_WIDTH (AVS_REQ_TAGW),
.ARBITER ("P"), .ARBITER ("P"), // prioritize VX requests
.REQ_OUT_BUF (0), .REQ_OUT_BUF (0),
.RSP_OUT_BUF (0) .RSP_OUT_BUF (0)
) mem_arb ( ) mem_arb (
@ -692,9 +692,11 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.reset (reset), .reset (reset),
.incr (cci_rd_req_fire), .incr (cci_rd_req_fire),
.decr (cci_rdq_pop), .decr (cci_rdq_pop),
`UNUSED_PIN (empty),
`UNUSED_PIN (alm_empty),
.full (cci_pending_reads_full), .full (cci_pending_reads_full),
.size (cci_pending_reads), `UNUSED_PIN (alm_full),
`UNUSED_PIN (empty) .size (cci_pending_reads)
); );
`UNUSED_VAR (cci_pending_reads) `UNUSED_VAR (cci_pending_reads)
@ -852,7 +854,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.incr (cci_mem_rd_rsp_fire), .incr (cci_mem_rd_rsp_fire),
.decr (cci_wr_rsp_fire), .decr (cci_wr_rsp_fire),
.empty (cci_pending_writes_empty), .empty (cci_pending_writes_empty),
`UNUSED_PIN (alm_empty),
.full (cci_pending_writes_full), .full (cci_pending_writes_full),
`UNUSED_PIN (alm_full),
.size (cci_pending_writes) .size (cci_pending_writes)
); );
@ -1010,7 +1014,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
// SCOPE ////////////////////////////////////////////////////////////////////// // SCOPE //////////////////////////////////////////////////////////////////////
`ifdef DBG_SCOPE_AFU `ifdef DBG_SCOPE_AFU
`ifdef SCOPE
wire mem_req_fire = mem_bus_if[0].req_valid && mem_bus_if[0].req_ready; wire mem_req_fire = mem_bus_if[0].req_valid && mem_bus_if[0].req_ready;
wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready; wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready;
wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0]; wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0];
@ -1080,7 +1083,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.bus_in(scope_bus_in_w[0]), .bus_in(scope_bus_in_w[0]),
.bus_out(scope_bus_out_w[0]) .bus_out(scope_bus_out_w[0])
); );
`endif
`else `else
`SCOPE_IO_UNUSED_W(0) `SCOPE_IO_UNUSED_W(0)
`endif `endif

View file

@ -311,7 +311,6 @@ module VX_afu_wrap #(
// SCOPE ////////////////////////////////////////////////////////////////////// // SCOPE //////////////////////////////////////////////////////////////////////
`ifdef DBG_SCOPE_AFU `ifdef DBG_SCOPE_AFU
`ifdef SCOPE
`define TRIGGERS { \ `define TRIGGERS { \
reset, \ reset, \
ap_start, \ ap_start, \
@ -330,35 +329,17 @@ module VX_afu_wrap #(
VX_scope_tap #( VX_scope_tap #(
.SCOPE_ID (0), .SCOPE_ID (0),
.TRIGGERW ($bits(`TRIGGERS)), .TRIGGERW ($bits(`TRIGGERS)),
.PROBEW ($bits(`PROBES)) .PROBEW ($bits(`PROBES))
) scope_tap ( ) scope_tap (
.clk(clk), .clk (clk),
.reset(scope_reset_w[0]), .reset (scope_reset_w[0]),
.start(1'b0), .start (1'b0),
.stop(1'b0), .stop (1'b0),
.triggers(`TRIGGERS), .triggers (`TRIGGERS),
.probes(`PROBES), .probes (`PROBES),
.bus_in(scope_bus_in_w[0]), .bus_in (scope_bus_in_w[0]),
.bus_out(scope_bus_out_w[0]) .bus_out (scope_bus_out_w[0])
); );
`endif
`ifdef CHIPSCOPE
ila_afu ila_afu_inst (
.clk (ap_clk),
.probe0 ({
ap_start,
ap_done,
ap_idle,
interrupt
}),
.probe1 ({
vx_pending_writes,
vx_busy_wait,
vx_busy,
vx_running
})
);
`endif
`else `else
`SCOPE_IO_UNUSED_W(0) `SCOPE_IO_UNUSED_W(0)
`endif `endif

109
hw/rtl/cache/VX_bank_flush.sv vendored Normal file
View file

@ -0,0 +1,109 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_bank_flush #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1,
// Enable cache writeback
parameter WRITEBACK = 0
) (
input wire clk,
input wire reset,
input wire flush_in_valid,
output wire flush_in_ready,
output wire flush_out_init,
output wire flush_out_valid,
output wire [`CS_LINE_SEL_BITS-1:0] flush_out_line,
output wire [NUM_WAYS-1:0] flush_out_way,
input wire flush_out_ready,
input wire mshr_empty
);
parameter CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);
parameter STATE_IDLE = 2'd0;
parameter STATE_INIT = 2'd1;
parameter STATE_FLUSH = 2'd2;
reg [CTR_WIDTH-1:0] counter_r;
reg [1:0] state_r, state_n;
reg flush_in_ready_r, flush_in_ready_n;
always @(*) begin
state_n = state_r;
flush_in_ready_n = 0;
case (state_r)
// STATE_IDLE
default: begin
if (flush_in_valid && mshr_empty) begin
state_n = STATE_FLUSH;
end
end
STATE_INIT: begin
if (counter_r == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
state_n = STATE_IDLE;
end
end
STATE_FLUSH: begin
if (counter_r == ((2 ** CTR_WIDTH)-1)) begin
state_n = STATE_IDLE;
flush_in_ready_n = 1;
end
end
endcase
end
always @(posedge clk) begin
if (reset) begin
state_r <= STATE_INIT;
counter_r <= '0;
flush_in_ready_r <= '0;
end else begin
state_r <= state_n;
flush_in_ready_r <= flush_in_ready_n;
if (state_r != STATE_IDLE) begin
if ((state_r == STATE_INIT) || flush_out_ready) begin
counter_r <= counter_r + CTR_WIDTH'(1);
end
end else begin
counter_r <= '0;
end
end
end
assign flush_in_ready = flush_in_ready_r;
assign flush_out_init = (state_r == STATE_INIT);
assign flush_out_valid = (state_r == STATE_FLUSH);
assign flush_out_line = counter_r[`CS_LINE_SEL_BITS-1:0];
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin
reg [NUM_WAYS-1:0] flush_out_way_r;
always @(*) begin
flush_out_way_r = '0;
flush_out_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
end
assign flush_out_way = flush_out_way_r;
end else begin
assign flush_out_way = {NUM_WAYS{1'b1}};
end
endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,15 +14,15 @@
`include "VX_cache_define.vh" `include "VX_cache_define.vh"
module VX_cache import VX_gpu_pkg::*; #( module VX_cache import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "", parameter `STRING INSTANCE_ID = "",
// Number of Word requests per cycle // Number of Word requests per cycle
parameter NUM_REQS = 4, parameter NUM_REQS = 4,
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE = 4096, parameter CACHE_SIZE = 4096,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter LINE_SIZE = 64, parameter LINE_SIZE = 64,
// Number of banks // Number of banks
parameter NUM_BANKS = 1, parameter NUM_BANKS = 1,
// Number of associative ways // Number of associative ways
@ -33,7 +33,7 @@ module VX_cache import VX_gpu_pkg::*; #(
// Core Response Queue Size // Core Response Queue Size
parameter CRSQ_SIZE = 2, parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob // Miss Reserv Queue Knob
parameter MSHR_SIZE = 8, parameter MSHR_SIZE = 8,
// Memory Response Queue Size // Memory Response Queue Size
parameter MRSQ_SIZE = 0, parameter MRSQ_SIZE = 0,
// Memory Request Queue Size // Memory Request Queue Size
@ -42,6 +42,9 @@ module VX_cache import VX_gpu_pkg::*; #(
// Enable cache writeable // Enable cache writeable
parameter WRITE_ENABLE = 1, parameter WRITE_ENABLE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Request debug identifier // Request debug identifier
parameter UUID_WIDTH = 0, parameter UUID_WIDTH = 0,
@ -53,12 +56,12 @@ module VX_cache import VX_gpu_pkg::*; #(
// Memory request output register // Memory request output register
parameter MEM_OUT_BUF = 0 parameter MEM_OUT_BUF = 0
) ( ) (
// PERF // PERF
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
output cache_perf_t cache_perf, output cache_perf_t cache_perf,
`endif `endif
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -67,6 +70,7 @@ module VX_cache import VX_gpu_pkg::*; #(
); );
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter")) `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter"))
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS); localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS); localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
@ -78,36 +82,46 @@ module VX_cache import VX_gpu_pkg::*; #(
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS); localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH; localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1;
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH; localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1); localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1);
localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0;
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
wire [NUM_BANKS-1:0] perf_read_miss_per_bank; wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
wire [NUM_BANKS-1:0] perf_write_miss_per_bank; wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank; wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
`endif `endif
wire [NUM_REQS-1:0] core_req_valid; VX_mem_bus_if #(
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr; .DATA_SIZE (WORD_SIZE),
wire [NUM_REQS-1:0] core_req_rw; .TAG_WIDTH (TAG_WIDTH)
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen; ) core_bus2_if[NUM_REQS]();
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
wire [NUM_REQS-1:0] core_req_ready;
for (genvar i = 0; i < NUM_REQS; ++i) begin wire [NUM_BANKS-1:0] per_bank_flush_valid;
assign core_req_valid[i] = core_bus_if[i].req_valid; wire [NUM_BANKS-1:0] per_bank_flush_ready;
assign core_req_rw[i] = core_bus_if[i].req_data.rw;
assign core_req_byteen[i] = core_bus_if[i].req_data.byteen; wire [NUM_BANKS-1:0] per_bank_core_req_fire;
assign core_req_addr[i] = core_bus_if[i].req_data.addr;
assign core_req_data[i] = core_bus_if[i].req_data.data; // this reset relay is required to sync with bank initialization
assign core_req_tag[i] = core_bus_if[i].req_data.tag; `RESET_RELAY (flush_reset, reset);
assign core_bus_if[i].req_ready = core_req_ready[i];
`UNUSED_VAR (core_bus_if[i].req_data.atype) VX_cache_flush #(
end .NUM_REQS (NUM_REQS),
.NUM_BANKS (NUM_BANKS),
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
) flush_unit (
.clk (clk),
.reset (flush_reset),
.core_bus_in_if (core_bus_if),
.core_bus_out_if (core_bus2_if),
.bank_req_fire (per_bank_core_req_fire),
.flush_valid (per_bank_flush_valid),
.flush_ready (per_bank_flush_ready)
);
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@ -117,10 +131,10 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
wire [NUM_REQS-1:0] core_rsp_ready_s; wire [NUM_REQS-1:0] core_rsp_ready_s;
`RESET_RELAY (core_rsp_reset, reset);
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
`RESET_RELAY (core_rsp_reset, reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH), .DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
.SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), .SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
@ -131,9 +145,9 @@ module VX_cache import VX_gpu_pkg::*; #(
.valid_in (core_rsp_valid_s[i]), .valid_in (core_rsp_valid_s[i]),
.ready_in (core_rsp_ready_s[i]), .ready_in (core_rsp_ready_s[i]),
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}), .data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
.data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}), .data_out ({core_bus2_if[i].rsp_data.data, core_bus2_if[i].rsp_data.tag}),
.valid_out (core_bus_if[i].rsp_valid), .valid_out (core_bus2_if[i].rsp_valid),
.ready_out (core_bus_if[i].rsp_ready) .ready_out (core_bus2_if[i].rsp_ready)
); );
end end
@ -146,24 +160,29 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [LINE_SIZE-1:0] mem_req_byteen_s; wire [LINE_SIZE-1:0] mem_req_byteen_s;
wire [`CS_LINE_WIDTH-1:0] mem_req_data_s; wire [`CS_LINE_WIDTH-1:0] mem_req_data_s;
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s; wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
wire mem_req_flush_s;
wire mem_req_ready_s; wire mem_req_ready_s;
wire mem_bus_if_flush;
`RESET_RELAY (mem_req_reset, reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH), .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
) mem_req_buf ( ) mem_req_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (mem_req_reset),
.valid_in (mem_req_valid_s), .valid_in (mem_req_valid_s),
.ready_in (mem_req_ready_s), .ready_in (mem_req_ready_s),
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s}), .data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}),
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}), .data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag, mem_bus_if_flush}),
.valid_out (mem_bus_if.req_valid), .valid_out (mem_bus_if.req_valid),
.ready_out (mem_bus_if.req_ready) .ready_out (mem_bus_if.req_ready)
); );
assign mem_bus_if.req_data.atype = '0; assign mem_bus_if.req_data.atype = mem_bus_if_flush ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0;
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@ -172,44 +191,26 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s; wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s;
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s; wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
wire mem_rsp_ready_s; wire mem_rsp_ready_s;
`RESET_RELAY (mem_rsp_reset, reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH), .DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
.SIZE (MRSQ_SIZE), .SIZE (MRSQ_SIZE),
.OUT_REG (MRSQ_SIZE > 2) .OUT_REG (MRSQ_SIZE > 2)
) mem_rsp_queue ( ) mem_rsp_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (mem_rsp_reset),
.valid_in (mem_bus_if.rsp_valid), .valid_in (mem_bus_if.rsp_valid),
.ready_in (mem_bus_if.rsp_ready), .ready_in (mem_bus_if.rsp_ready),
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}), .data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
.data_out ({mem_rsp_tag_s, mem_rsp_data_s}), .data_out ({mem_rsp_tag_s, mem_rsp_data_s}),
.valid_out (mem_rsp_valid_s), .valid_out (mem_rsp_valid_s),
.ready_out (mem_rsp_ready_s) .ready_out (mem_rsp_ready_s)
); );
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
wire [`CS_LINE_SEL_BITS-1:0] init_line_sel;
wire init_enable;
// this reset relay is required to sync with bank initialization
`RESET_RELAY (init_reset, reset);
VX_cache_init #(
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS)
) cache_init (
.clk (clk),
.reset (init_reset),
.addr_out (init_line_sel),
.valid_out (init_enable)
);
///////////////////////////////////////////////////////////////////////
wire [NUM_BANKS-1:0] per_bank_core_req_valid; wire [NUM_BANKS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr; wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0] per_bank_core_req_rw; wire [NUM_BANKS-1:0] per_bank_core_req_rw;
@ -218,25 +219,28 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data; wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx;
wire [NUM_BANKS-1:0] per_bank_core_req_flush;
wire [NUM_BANKS-1:0] per_bank_core_req_ready; wire [NUM_BANKS-1:0] per_bank_core_req_ready;
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data; wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx;
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready; wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
wire [NUM_BANKS-1:0] per_bank_mem_req_valid; wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr; wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
wire [NUM_BANKS-1:0] per_bank_mem_req_rw; wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_mem_req_wsel; wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen; wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data;
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_mem_req_data;
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id; wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
wire [NUM_BANKS-1:0] per_bank_mem_req_flush;
wire [NUM_BANKS-1:0] per_bank_mem_req_ready; wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready; wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready;
if (NUM_BANKS == 1) begin if (NUM_BANKS == 1) begin
assign mem_rsp_ready_s = per_bank_mem_rsp_ready; assign mem_rsp_ready_s = per_bank_mem_rsp_ready;
end else begin end else begin
@ -245,12 +249,33 @@ module VX_cache import VX_gpu_pkg::*; #(
// Bank requests dispatch // Bank requests dispatch
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in; wire [NUM_REQS-1:0] core_req_valid;
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out; wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
wire [NUM_REQS-1:0] core_req_rw;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
wire [NUM_REQS-1:0] core_req_flush;
wire [NUM_REQS-1:0] core_req_ready;
wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr; wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr;
wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid; wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid;
wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel; wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel;
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_valid[i] = core_bus2_if[i].req_valid;
assign core_req_rw[i] = core_bus2_if[i].req_data.rw;
assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen;
assign core_req_addr[i] = core_bus2_if[i].req_data.addr;
assign core_req_data[i] = core_bus2_if[i].req_data.data;
assign core_req_tag[i] = core_bus2_if[i].req_data.tag;
assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
assign core_bus2_if[i].req_ready = core_req_ready[i];
end
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
if (WORDS_PER_LINE > 1) begin if (WORDS_PER_LINE > 1) begin
assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS]; assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS];
@ -273,9 +298,11 @@ module VX_cache import VX_gpu_pkg::*; #(
core_req_line_addr[i], core_req_line_addr[i],
core_req_rw[i], core_req_rw[i],
core_req_wsel[i], core_req_wsel[i],
core_req_byteen[i], core_req_byteen[i],
core_req_data[i], core_req_data[i],
core_req_tag[i]}; core_req_tag[i],
core_req_flush[i]
};
end end
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
@ -284,12 +311,12 @@ module VX_cache import VX_gpu_pkg::*; #(
`RESET_RELAY (req_xbar_reset, reset); `RESET_RELAY (req_xbar_reset, reset);
VX_stream_xbar #( VX_stream_xbar #(
.NUM_INPUTS (NUM_REQS), .NUM_INPUTS (NUM_REQS),
.NUM_OUTPUTS (NUM_BANKS), .NUM_OUTPUTS (NUM_BANKS),
.DATAW (CORE_REQ_DATAW), .DATAW (CORE_REQ_DATAW),
.PERF_CTR_BITS (`PERF_CTR_BITS), .PERF_CTR_BITS (`PERF_CTR_BITS),
.OUT_BUF ((NUM_REQS > 4) ? 2 : 0) .OUT_BUF (REQ_XBAR_BUF)
) req_xbar ( ) req_xbar (
.clk (clk), .clk (clk),
.reset (req_xbar_reset), .reset (req_xbar_reset),
@ -313,27 +340,29 @@ module VX_cache import VX_gpu_pkg::*; #(
per_bank_core_req_addr[i], per_bank_core_req_addr[i],
per_bank_core_req_rw[i], per_bank_core_req_rw[i],
per_bank_core_req_wsel[i], per_bank_core_req_wsel[i],
per_bank_core_req_byteen[i], per_bank_core_req_byteen[i],
per_bank_core_req_data[i], per_bank_core_req_data[i],
per_bank_core_req_tag[i]} = core_req_data_out[i]; per_bank_core_req_tag[i],
per_bank_core_req_flush[i]
} = core_req_data_out[i];
end end
// Banks access // Banks access
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks
wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr; wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
wire curr_bank_mem_rsp_valid; wire curr_bank_mem_rsp_valid;
if (NUM_BANKS == 1) begin if (NUM_BANKS == 1) begin
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s; assign curr_bank_mem_rsp_valid = mem_rsp_valid_s;
end else begin end else begin
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == i); assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == bank_id);
end end
`RESET_RELAY (bank_reset, reset); `RESET_RELAY (bank_reset, reset);
VX_cache_bank #( VX_cache_bank #(
.BANK_ID (i), .BANK_ID (bank_id),
.INSTANCE_ID (INSTANCE_ID), .INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)),
.CACHE_SIZE (CACHE_SIZE), .CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE), .LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
@ -344,65 +373,66 @@ module VX_cache import VX_gpu_pkg::*; #(
.MSHR_SIZE (MSHR_SIZE), .MSHR_SIZE (MSHR_SIZE),
.MREQ_SIZE (MREQ_SIZE), .MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH), .UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH), .TAG_WIDTH (TAG_WIDTH),
.CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF), .CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF),
.MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF) .MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF)
) bank ( ) bank (
.clk (clk), .clk (clk),
.reset (bank_reset), .reset (bank_reset),
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
.perf_read_misses (perf_read_miss_per_bank[i]), .perf_read_misses (perf_read_miss_per_bank[bank_id]),
.perf_write_misses (perf_write_miss_per_bank[i]), .perf_write_misses (perf_write_miss_per_bank[bank_id]),
.perf_mshr_stalls (perf_mshr_stall_per_bank[i]), .perf_mshr_stalls (perf_mshr_stall_per_bank[bank_id]),
`endif `endif
// Core request
.core_req_valid (per_bank_core_req_valid[i]),
.core_req_addr (per_bank_core_req_addr[i]),
.core_req_rw (per_bank_core_req_rw[i]),
.core_req_wsel (per_bank_core_req_wsel[i]),
.core_req_byteen (per_bank_core_req_byteen[i]),
.core_req_data (per_bank_core_req_data[i]),
.core_req_tag (per_bank_core_req_tag[i]),
.core_req_idx (per_bank_core_req_idx[i]),
.core_req_ready (per_bank_core_req_ready[i]),
// Core response // Core request
.core_rsp_valid (per_bank_core_rsp_valid[i]), .core_req_valid (per_bank_core_req_valid[bank_id]),
.core_rsp_data (per_bank_core_rsp_data[i]), .core_req_addr (per_bank_core_req_addr[bank_id]),
.core_rsp_tag (per_bank_core_rsp_tag[i]), .core_req_rw (per_bank_core_req_rw[bank_id]),
.core_rsp_idx (per_bank_core_rsp_idx[i]), .core_req_wsel (per_bank_core_req_wsel[bank_id]),
.core_rsp_ready (per_bank_core_rsp_ready[i]), .core_req_byteen (per_bank_core_req_byteen[bank_id]),
.core_req_data (per_bank_core_req_data[bank_id]),
.core_req_tag (per_bank_core_req_tag[bank_id]),
.core_req_idx (per_bank_core_req_idx[bank_id]),
.core_req_flush (per_bank_core_req_flush[bank_id]),
.core_req_ready (per_bank_core_req_ready[bank_id]),
// Core response
.core_rsp_valid (per_bank_core_rsp_valid[bank_id]),
.core_rsp_data (per_bank_core_rsp_data[bank_id]),
.core_rsp_tag (per_bank_core_rsp_tag[bank_id]),
.core_rsp_idx (per_bank_core_rsp_idx[bank_id]),
.core_rsp_ready (per_bank_core_rsp_ready[bank_id]),
// Memory request // Memory request
.mem_req_valid (per_bank_mem_req_valid[i]), .mem_req_valid (per_bank_mem_req_valid[bank_id]),
.mem_req_addr (curr_bank_mem_req_addr), .mem_req_addr (curr_bank_mem_req_addr),
.mem_req_rw (per_bank_mem_req_rw[i]), .mem_req_rw (per_bank_mem_req_rw[bank_id]),
.mem_req_wsel (per_bank_mem_req_wsel[i]), .mem_req_byteen (per_bank_mem_req_byteen[bank_id]),
.mem_req_byteen (per_bank_mem_req_byteen[i]), .mem_req_data (per_bank_mem_req_data[bank_id]),
.mem_req_data (per_bank_mem_req_data[i]), .mem_req_id (per_bank_mem_req_id[bank_id]),
.mem_req_id (per_bank_mem_req_id[i]), .mem_req_flush (per_bank_mem_req_flush[bank_id]),
.mem_req_ready (per_bank_mem_req_ready[i]), .mem_req_ready (per_bank_mem_req_ready[bank_id]),
// Memory response // Memory response
.mem_rsp_valid (curr_bank_mem_rsp_valid), .mem_rsp_valid (curr_bank_mem_rsp_valid),
.mem_rsp_data (mem_rsp_data_s), .mem_rsp_data (mem_rsp_data_s),
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)), .mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
.mem_rsp_ready (per_bank_mem_rsp_ready[i]), .mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
// initialization .flush_valid (per_bank_flush_valid[bank_id]),
.init_enable (init_enable), .flush_ready (per_bank_flush_ready[bank_id])
.init_line_sel (init_line_sel)
); );
if (NUM_BANKS == 1) begin if (NUM_BANKS == 1) begin
assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr; assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr;
end else begin end else begin
assign per_bank_mem_req_addr[i] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, i); assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id);
end end
end end
// Bank responses gather // Bank responses gather
@ -442,37 +472,41 @@ module VX_cache import VX_gpu_pkg::*; #(
wire mem_req_valid_p; wire mem_req_valid_p;
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p; wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p;
wire mem_req_rw_p; wire mem_req_rw_p;
wire [WORD_SEL_WIDTH-1:0] mem_req_wsel_p; wire [LINE_SIZE-1:0] mem_req_byteen_p;
wire [WORD_SIZE-1:0] mem_req_byteen_p; wire [`CS_LINE_WIDTH-1:0] mem_req_data_p;
wire [`CS_WORD_WIDTH-1:0] mem_req_data_p;
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_p; wire [MEM_TAG_WIDTH-1:0] mem_req_tag_p;
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id_p; wire [MSHR_ADDR_WIDTH-1:0] mem_req_id_p;
wire mem_req_flush_p;
wire mem_req_ready_p; wire mem_req_ready_p;
// Memory request arbitration // Memory request arbitration
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH)-1:0] data_in; wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign data_in[i] = {per_bank_mem_req_addr[i], assign data_in[i] = {
per_bank_mem_req_rw[i], per_bank_mem_req_addr[i],
per_bank_mem_req_wsel[i], per_bank_mem_req_rw[i],
per_bank_mem_req_byteen[i], per_bank_mem_req_byteen[i],
per_bank_mem_req_data[i], per_bank_mem_req_data[i],
per_bank_mem_req_id[i]}; per_bank_mem_req_id[i],
per_bank_mem_req_flush[i]
};
end end
`RESET_RELAY (mem_arb_reset, reset);
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS), .NUM_INPUTS (NUM_BANKS),
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + MSHR_ADDR_WIDTH), .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1),
.ARBITER ("R") .ARBITER ("F")
) mem_req_arb ( ) mem_req_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (mem_arb_reset),
.valid_in (per_bank_mem_req_valid), .valid_in (per_bank_mem_req_valid),
.ready_in (per_bank_mem_req_ready), .ready_in (per_bank_mem_req_ready),
.data_in (data_in), .data_in (data_in),
.data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_wsel_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p}), .data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}),
.valid_out (mem_req_valid_p), .valid_out (mem_req_valid_p),
.ready_out (mem_req_ready_p), .ready_out (mem_req_ready_p),
`UNUSED_PIN (sel_out) `UNUSED_PIN (sel_out)
@ -480,44 +514,28 @@ module VX_cache import VX_gpu_pkg::*; #(
if (NUM_BANKS > 1) begin if (NUM_BANKS > 1) begin
wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p); wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p);
assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p}); assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p});
end else begin end else begin
assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p); assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p);
end end
// Memory request multi-port handling // Memory request multi-port handling
assign mem_req_valid_s = mem_req_valid_p; assign mem_req_valid_s = mem_req_valid_p;
assign mem_req_addr_s = mem_req_addr_p; assign mem_req_addr_s = mem_req_addr_p;
assign mem_req_tag_s = mem_req_tag_p; assign mem_req_tag_s = mem_req_tag_p;
assign mem_req_flush_s = mem_req_flush_p;
assign mem_req_ready_p = mem_req_ready_s; assign mem_req_ready_p = mem_req_ready_s;
if (WRITE_ENABLE != 0) begin if (WRITE_ENABLE != 0) begin
if (`CS_WORDS_PER_LINE > 1) begin assign mem_req_rw_s = mem_req_rw_p;
reg [LINE_SIZE-1:0] mem_req_byteen_r; assign mem_req_byteen_s = mem_req_byteen_p;
reg [`CS_LINE_WIDTH-1:0] mem_req_data_r; assign mem_req_data_s = mem_req_data_p;
always @(*) begin
mem_req_byteen_r = '0;
mem_req_data_r = 'x;
mem_req_byteen_r[mem_req_wsel_p * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p;
mem_req_data_r[mem_req_wsel_p * `CS_WORD_WIDTH +: `CS_WORD_WIDTH] = mem_req_data_p;
end
assign mem_req_rw_s = mem_req_rw_p;
assign mem_req_byteen_s = mem_req_byteen_r;
assign mem_req_data_s = mem_req_data_r;
end else begin
`UNUSED_VAR (mem_req_wsel_p)
assign mem_req_rw_s = mem_req_rw_p;
assign mem_req_byteen_s = mem_req_byteen_p;
assign mem_req_data_s = mem_req_data_p;
end
end else begin end else begin
`UNUSED_VAR (mem_req_byteen_p) `UNUSED_VAR (mem_req_byteen_p)
`UNUSED_VAR (mem_req_wsel_p)
`UNUSED_VAR (mem_req_data_p) `UNUSED_VAR (mem_req_data_p)
`UNUSED_VAR (mem_req_rw_p) `UNUSED_VAR (mem_req_rw_p)
assign mem_req_rw_s = 0; assign mem_req_rw_s = 0;
assign mem_req_byteen_s = {LINE_SIZE{1'b1}}; assign mem_req_byteen_s = {LINE_SIZE{1'b1}};
assign mem_req_data_s = '0; assign mem_req_data_s = '0;
@ -527,10 +545,10 @@ module VX_cache import VX_gpu_pkg::*; #(
// per cycle: core_reads, core_writes // per cycle: core_reads, core_writes
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle; wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle; wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
wire [NUM_REQS-1:0] perf_core_reads_per_req; wire [NUM_REQS-1:0] perf_core_reads_per_req;
wire [NUM_REQS-1:0] perf_core_writes_per_req; wire [NUM_REQS-1:0] perf_core_writes_per_req;
// per cycle: read misses, write misses, msrq stalls, pipeline stalls // per cycle: read misses, write misses, msrq stalls, pipeline stalls
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle; wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle; wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
@ -539,16 +557,16 @@ module VX_cache import VX_gpu_pkg::*; #(
`BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw); `BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw);
`BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw); `BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw);
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req); `POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req);
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req); `POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req);
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank); `POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank); `POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank); `POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
wire [NUM_REQS-1:0] perf_crsp_stall_per_req; wire [NUM_REQS-1:0] perf_crsp_stall_per_req;
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
assign perf_crsp_stall_per_req[i] = core_bus_if[i].rsp_valid && ~core_bus_if[i].rsp_ready; assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready;
end end
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req); `POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req);
@ -561,7 +579,7 @@ module VX_cache import VX_gpu_pkg::*; #(
reg [`PERF_CTR_BITS-1:0] perf_write_misses; reg [`PERF_CTR_BITS-1:0] perf_write_misses;
reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls; reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls;
reg [`PERF_CTR_BITS-1:0] perf_mem_stalls; reg [`PERF_CTR_BITS-1:0] perf_mem_stalls;
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls; reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin

View file

@ -41,6 +41,9 @@ module VX_cache_bank #(
// Enable cache writeable // Enable cache writeable
parameter WRITE_ENABLE = 1, parameter WRITE_ENABLE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Request debug identifier // Request debug identifier
parameter UUID_WIDTH = 0, parameter UUID_WIDTH = 0,
@ -69,12 +72,13 @@ module VX_cache_bank #(
// Core Request // Core Request
input wire core_req_valid, input wire core_req_valid,
input wire [`CS_LINE_ADDR_WIDTH-1:0] core_req_addr, input wire [`CS_LINE_ADDR_WIDTH-1:0] core_req_addr,
input wire core_req_rw, input wire core_req_rw, // write enable
input wire [WORD_SEL_WIDTH-1:0] core_req_wsel, input wire [WORD_SEL_WIDTH-1:0] core_req_wsel, // select the word in a cacheline, e.g. word size = 4 bytes, cacheline size = 64 bytes, it should have log(64/4)= 4 bits
input wire [WORD_SIZE-1:0] core_req_byteen, input wire [WORD_SIZE-1:0] core_req_byteen,// which bytes in data to write
input wire [`CS_WORD_WIDTH-1:0] core_req_data, input wire [`CS_WORD_WIDTH-1:0] core_req_data, // data to be written
input wire [TAG_WIDTH-1:0] core_req_tag, input wire [TAG_WIDTH-1:0] core_req_tag, // identifier of the request (request id)
input wire [REQ_SEL_WIDTH-1:0] core_req_idx, input wire [REQ_SEL_WIDTH-1:0] core_req_idx, // index of the request in the core request array
input wire core_req_flush, // flush enable
output wire core_req_ready, output wire core_req_ready,
// Core Response // Core Response
@ -88,10 +92,10 @@ module VX_cache_bank #(
output wire mem_req_valid, output wire mem_req_valid,
output wire [`CS_LINE_ADDR_WIDTH-1:0] mem_req_addr, output wire [`CS_LINE_ADDR_WIDTH-1:0] mem_req_addr,
output wire mem_req_rw, output wire mem_req_rw,
output wire [WORD_SEL_WIDTH-1:0] mem_req_wsel, output wire [LINE_SIZE-1:0] mem_req_byteen,
output wire [WORD_SIZE-1:0] mem_req_byteen, output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
output wire [`CS_WORD_WIDTH-1:0] mem_req_data, output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, // index of the head entry in the mshr
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, output wire mem_req_flush,
input wire mem_req_ready, input wire mem_req_ready,
// Memory response // Memory response
@ -100,9 +104,9 @@ module VX_cache_bank #(
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id, input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
output wire mem_rsp_ready, output wire mem_rsp_ready,
// initialization // flush
input wire init_enable, input wire flush_valid,
input wire [`CS_LINE_SEL_BITS-1:0] init_line_sel output wire flush_ready
); );
localparam PIPELINE_STAGES = 2; localparam PIPELINE_STAGES = 2;
@ -128,23 +132,56 @@ module VX_cache_bank #(
wire [MSHR_ADDR_WIDTH-1:0] replay_id; wire [MSHR_ADDR_WIDTH-1:0] replay_id;
wire replay_ready; wire replay_ready;
wire is_init_st0;
wire is_flush_st0, is_flush_st1;
wire [NUM_WAYS-1:0] flush_way_st0;
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
wire rw_st0, rw_st1; wire rw_sel, rw_st0, rw_st1;
wire [WORD_SEL_WIDTH-1:0] wsel_st0, wsel_st1; wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
wire [WORD_SIZE-1:0] byteen_st0, byteen_st1; wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
wire [REQ_SEL_WIDTH-1:0] req_idx_st0, req_idx_st1; wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1;
wire [TAG_WIDTH-1:0] tag_st0, tag_st1; wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1;
wire [`CS_WORD_WIDTH-1:0] read_data_st1; wire [`CS_WORD_WIDTH-1:0] read_data_st1;
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1; wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1; wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1;
wire valid_sel, valid_st0, valid_st1; wire valid_sel, valid_st0, valid_st1;
wire is_init_st0;
wire is_creq_st0, is_creq_st1; wire is_creq_st0, is_creq_st1;
wire is_fill_st0, is_fill_st1; wire is_fill_st0, is_fill_st1;
wire is_replay_st0, is_replay_st1; wire is_replay_st0, is_replay_st1;
wire creq_flush_st0, creq_flush_st1;
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
wire [NUM_WAYS-1:0] tag_matches_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1;
wire mshr_pending_st0, mshr_pending_st1; wire mshr_pending_st0, mshr_pending_st1;
wire mshr_empty;
wire line_flush_valid;
wire line_flush_init;
wire [`CS_LINE_SEL_BITS-1:0] line_flush_sel;
wire [NUM_WAYS-1:0] line_flush_way;
wire line_flush_ready;
// flush unit
VX_bank_flush #(
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WRITEBACK (WRITEBACK)
) flush_unit (
.clk (clk),
.reset (reset),
.flush_in_valid (flush_valid),
.flush_in_ready (flush_ready),
.flush_out_init (line_flush_init),
.flush_out_valid (line_flush_valid),
.flush_out_line (line_flush_sel),
.flush_out_way (line_flush_way),
.flush_out_ready (line_flush_ready),
.mshr_empty (mshr_empty)
);
wire rdw_hazard_st0; wire rdw_hazard_st0;
reg rdw_hazard_st1; reg rdw_hazard_st1;
@ -154,76 +191,77 @@ module VX_cache_bank #(
// inputs arbitration: // inputs arbitration:
// mshr replay has highest priority to maximize utilization since there is no miss. // mshr replay has highest priority to maximize utilization since there is no miss.
// handle memory responses next to prevent deadlock with potential memory request from a miss. // handle memory responses next to prevent deadlock with potential memory request from a miss.
wire replay_grant = ~init_enable; // flush has precedence over core requests to ensure that the cache is in a consistent state.
wire replay_grant = ~line_flush_init;
wire replay_enable = replay_grant && replay_valid; wire replay_enable = replay_grant && replay_valid;
wire fill_grant = ~init_enable && ~replay_enable; wire fill_grant = ~line_flush_init && ~replay_enable;
wire fill_enable = fill_grant && mem_rsp_valid; wire fill_enable = fill_grant && mem_rsp_valid;
wire creq_grant = ~init_enable && ~replay_enable && ~fill_enable; wire flush_grant = ~line_flush_init && ~replay_enable && ~fill_enable;
wire flush_enable = flush_grant && line_flush_valid;
wire creq_grant = ~line_flush_init && ~replay_enable && ~fill_enable && ~flush_enable;
wire creq_enable = creq_grant && core_req_valid; wire creq_enable = creq_grant && core_req_valid;
assign replay_ready = replay_grant assign replay_ready = replay_grant
&& ~rdw_hazard_st0 && ~rdw_hazard_st0
&& ~pipe_stall; && ~pipe_stall;
assign mem_rsp_ready = fill_grant assign mem_rsp_ready = fill_grant
&& ~pipe_stall; && ~pipe_stall;
assign core_req_ready = creq_grant assign line_flush_ready = flush_grant
&& ~mreq_queue_alm_full && ~mreq_queue_alm_full
&& ~mshr_alm_full && ~pipe_stall;
&& ~pipe_stall;
wire init_fire = init_enable; assign core_req_ready = creq_grant
&& ~mreq_queue_alm_full
&& ~mshr_alm_full
&& ~pipe_stall;
wire init_fire = line_flush_init;
wire replay_fire = replay_valid && replay_ready; wire replay_fire = replay_valid && replay_ready;
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready; wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
wire flush_fire = line_flush_valid && line_flush_ready;
wire core_req_fire = core_req_valid && core_req_ready; wire core_req_fire = core_req_valid && core_req_ready;
wire [TAG_WIDTH-1:0] mshr_creq_tag = replay_enable ? replay_tag : core_req_tag; assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
assign tag_sel = replay_valid ? replay_tag : core_req_tag;
assign addr_sel = (line_flush_init | line_flush_valid) ? `CS_LINE_ADDR_WIDTH'(line_flush_sel) :
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
if (WRITE_ENABLE) begin
assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data);
end else begin
assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0];
`UNUSED_VAR (core_req_data)
`UNUSED_VAR (replay_data)
end
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin
assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel
end
if (UUID_WIDTH != 0) begin if (UUID_WIDTH != 0) begin
assign req_uuid_sel = mshr_creq_tag[TAG_WIDTH-1 -: UUID_WIDTH]; assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH];
end else begin end else begin
assign req_uuid_sel = 0; assign req_uuid_sel = 0;
end end
`UNUSED_VAR (mshr_creq_tag)
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || core_req_fire;
assign addr_sel = init_enable ? `CS_LINE_ADDR_WIDTH'(init_line_sel) :
(replay_valid ? replay_addr :
(mem_rsp_valid ? mem_rsp_addr : core_req_addr));
assign data_sel[`CS_WORD_WIDTH-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : (replay_valid ? replay_data : core_req_data);
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin
assign data_sel[i] = mem_rsp_data[i];
end
VX_pipe_register #( VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
.RESETW (1) .RESETW (1)
) pipe_reg0 ( ) pipe_reg0 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (~pipe_stall), .enable (~pipe_stall),
.data_in ({ .data_in ({valid_sel, line_flush_init, replay_enable, fill_enable, flush_enable, creq_enable, core_req_flush, line_flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
valid_sel, .data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
init_enable,
replay_enable,
fill_enable,
creq_enable,
addr_sel,
data_sel,
replay_valid ? replay_rw : core_req_rw,
replay_valid ? replay_byteen : core_req_byteen,
replay_valid ? replay_wsel : core_req_wsel,
replay_valid ? replay_idx : core_req_idx,
replay_valid ? replay_tag : core_req_tag,
replay_id
}),
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_creq_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
); );
if (UUID_WIDTH != 0) begin if (UUID_WIDTH != 0) begin
@ -232,20 +270,24 @@ module VX_cache_bank #(
assign req_uuid_st0 = 0; assign req_uuid_st0 = 0;
end end
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
wire do_fill_st0 = valid_st0 && is_fill_st0;
wire do_init_st0 = valid_st0 && is_init_st0; wire do_init_st0 = valid_st0 && is_init_st0;
wire do_flush_st0 = valid_st0 && is_flush_st0;
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0;
wire do_fill_st0 = valid_st0 && is_fill_st0;
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_init_st0); wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_init_st0);
wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0;
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0]; wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
wire [NUM_WAYS-1:0] tag_matches_st0, tag_matches_st1; wire [NUM_WAYS-1:0] repl_way_st0;
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1; wire [`CS_TAG_SEL_BITS-1:0] repl_tag_st0;
`RESET_RELAY (tag_reset, reset); `RESET_RELAY (tag_reset, reset);
VX_cache_tags #( VX_cache_tags #(
.INSTANCE_ID(INSTANCE_ID), .INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)),
.BANK_ID (BANK_ID), .BANK_ID (BANK_ID),
.CACHE_SIZE (CACHE_SIZE), .CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE), .LINE_SIZE (LINE_SIZE),
@ -261,30 +303,37 @@ module VX_cache_bank #(
.stall (pipe_stall), .stall (pipe_stall),
// read/Fill // init/fill/lookup/flush
.init (do_init_st0 || do_flush_st0),
.fill (do_fill_st0),
.lookup (do_lookup_st0), .lookup (do_lookup_st0),
.line_addr (addr_st0), .line_addr (addr_st0),
.fill (do_fill_st0), .tag_matches(tag_matches_st0),
.init (do_init_st0),
.way_sel (way_sel_st0), // replacement
.tag_matches(tag_matches_st0) .repl_way (repl_way_st0),
.repl_tag (repl_tag_st0)
); );
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0; assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
assign way_sel_st0 = is_fill_st0 ? repl_way_st0 : (is_flush_st0 ? flush_way_st0 : tag_matches_st0);
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_r_st0 = (is_fill_st0 || is_flush_st0) ? {repl_tag_st0, addr_st0[`CS_LINE_SEL_BITS-1:0]} : addr_st0;
VX_pipe_register #( VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + NUM_WAYS + 1), .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1),
.RESETW (1) .RESETW (1)
) pipe_reg1 ( ) pipe_reg1 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (~pipe_stall), .enable (~pipe_stall),
.data_in ({valid_st0, is_replay_st0, is_fill_st0, is_creq_st0, rw_st0, addr_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, tag_matches_st0, way_sel_st0, mshr_pending_st0}), .data_in ({valid_st0, is_flush_st0, is_replay_st0, is_fill_st0, is_creq_st0, creq_flush_st0, rw_st0, addr_r_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_replay_st1, is_fill_st1, is_creq_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, tag_matches_st1, way_sel_st1, mshr_pending_st1}) .data_out ({valid_st1, is_flush_st1, is_replay_st1, is_fill_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, mshr_pending_st1})
); );
// we have a tag hit // we have a tag hit
wire is_hit_st1 = (| tag_matches_st1); wire is_hit_st1 = (| way_sel_st1);
if (UUID_WIDTH != 0) begin if (UUID_WIDTH != 0) begin
assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH]; assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH];
@ -292,37 +341,62 @@ module VX_cache_bank #(
assign req_uuid_st1 = 0; assign req_uuid_st1 = 0;
end end
wire do_creq_rd_st1 = valid_st1 && is_creq_st1 && ~rw_st1; wire is_read_st1 = is_creq_st1 && ~rw_st1;
wire do_creq_wr_st1 = valid_st1 && is_creq_st1 && rw_st1; wire is_write_st1 = is_creq_st1 && rw_st1;
wire do_creq_rd_st1 = valid_st1 && is_read_st1;
wire do_creq_wr_st1 = valid_st1 && is_write_st1;
wire do_fill_st1 = valid_st1 && is_fill_st1; wire do_fill_st1 = valid_st1 && is_fill_st1;
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1; wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1; wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1; wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1;
wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1; wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1;
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1; wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1; wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
wire do_flush_st1 = valid_st1 && is_flush_st1;
`UNUSED_VAR (do_write_miss_st1) `UNUSED_VAR (do_write_miss_st1)
// ensure mshr replay always get a hit // ensure mshr replay always get a hit
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: invalid mshr replay")); `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: invalid mshr replay"));
// detect BRAM's read-during-write hazard // detect BRAM's read-during-write hazard
assign rdw_hazard_st0 = do_fill_st0; // after a fill assign rdw_hazard_st0 = do_fill_st0; // stall cycle after a fill
always @(posedge clk) begin wire rdw_case1 = do_cache_rd_st0 && do_cache_wr_st1 && (addr_st0 == addr_st1); // standard cache access
rdw_hazard_st1 <= (do_creq_rd_st0 && do_write_hit_st1 && (addr_st0 == addr_st1)) wire rdw_case2 = WRITEBACK && (do_flush_st0 || do_fill_st0) && do_cache_wr_st1; // a writeback can evict preceeding write
&& ~rdw_hazard_st1; // after a write to same address always @(posedge clk) begin // after a write to same address
rdw_hazard_st1 <= (rdw_case1 || rdw_case2)
&& ~rdw_hazard_st1; // invalidate if pipeline stalled to avoid repeats
end end
wire [`CS_WORD_WIDTH-1:0] write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0]; wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1; wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1;
wire [LINE_SIZE-1:0] write_byteen_st1;
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
wire [LINE_SIZE-1:0] dirty_byteen_st1;
wire dirty_valid_st1;
if (`CS_WORDS_PER_LINE > 1) begin
reg [LINE_SIZE-1:0] write_byteen_r;
always @(*) begin
write_byteen_r = '0;
write_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1;
end
assign write_byteen_st1 = write_byteen_r;
end else begin
assign write_byteen_st1 = byteen_st1;
end
`RESET_RELAY (data_reset, reset); `RESET_RELAY (data_reset, reset);
VX_cache_data #( VX_cache_data #(
.INSTANCE_ID (INSTANCE_ID), .INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
.BANK_ID (BANK_ID), .BANK_ID (BANK_ID),
.CACHE_SIZE (CACHE_SIZE), .CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE), .LINE_SIZE (LINE_SIZE),
@ -330,6 +404,7 @@ module VX_cache_bank #(
.NUM_WAYS (NUM_WAYS), .NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH) .UUID_WIDTH (UUID_WIDTH)
) cache_data ( ) cache_data (
.clk (clk), .clk (clk),
@ -339,23 +414,38 @@ module VX_cache_bank #(
.stall (pipe_stall), .stall (pipe_stall),
.read (do_read_hit_st1 || do_replay_rd_st1), .read (do_cache_rd_st1),
.fill (do_fill_st1), .fill (do_fill_st1 && ~rdw_hazard_st1),
.write (do_write_hit_st1 || do_replay_wr_st1), .flush (do_flush_st1),
.way_sel (way_sel_st1 | tag_matches_st1), .write (do_cache_wr_st1),
.way_sel (way_sel_st1),
.line_addr (addr_st1), .line_addr (addr_st1),
.wsel (wsel_st1), .wsel (wsel_st1),
.byteen (byteen_st1),
.fill_data (fill_data_st1), .fill_data (fill_data_st1),
.write_data (write_data_st1), .write_data (write_data_st1),
.read_data (read_data_st1) .write_byteen(write_byteen_st1),
.read_data (read_data_st1),
.dirty_valid(dirty_valid_st1),
.dirty_data (dirty_data_st1),
.dirty_byteen(dirty_byteen_st1)
); );
wire [MSHR_SIZE-1:0] mshr_matches_st0; wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0;
wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0;
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall; wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall;
wire mshr_lookup_st0 = mshr_allocate_st0; wire mshr_lookup_st0 = mshr_allocate_st0;
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall; wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall;
wire mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
// release allocated mshr entry if we had a hit
wire mshr_release_st1;
if (WRITEBACK) begin
assign mshr_release_st1 = is_hit_st1;
end else begin
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
// this can happen when writes are sent late, when the fill was already in flight.
assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
end
VX_pending_size #( VX_pending_size #(
.SIZE (MSHR_SIZE) .SIZE (MSHR_SIZE)
@ -364,15 +454,17 @@ module VX_cache_bank #(
.reset (reset), .reset (reset),
.incr (core_req_fire), .incr (core_req_fire),
.decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)), .decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)),
.empty (mshr_empty),
`UNUSED_PIN (alm_empty),
.full (mshr_alm_full), .full (mshr_alm_full),
`UNUSED_PIN (size), `UNUSED_PIN (alm_full),
`UNUSED_PIN (empty) `UNUSED_PIN (size)
); );
`RESET_RELAY (mshr_reset, reset); `RESET_RELAY (mshr_reset, reset);
VX_cache_mshr #( VX_cache_mshr #(
.INSTANCE_ID (INSTANCE_ID), .INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)),
.BANK_ID (BANK_ID), .BANK_ID (BANK_ID),
.LINE_SIZE (LINE_SIZE), .LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
@ -412,7 +504,8 @@ module VX_cache_bank #(
// lookup // lookup
.lookup_valid (mshr_lookup_st0), .lookup_valid (mshr_lookup_st0),
.lookup_addr (addr_st0), .lookup_addr (addr_st0),
.lookup_matches (mshr_matches_st0), .lookup_pending (mshr_lookup_pending_st0),
.lookup_rw (mshr_lookup_rw_st0),
// finalize // finalize
.finalize_valid (mshr_finalize_st1), .finalize_valid (mshr_finalize_st1),
@ -422,10 +515,12 @@ module VX_cache_bank #(
.finalize_prev (mshr_prev_st1) .finalize_prev (mshr_prev_st1)
); );
// ignore allocated id from mshr matches // check if there are pending requests to same line in the MSHR
wire [MSHR_SIZE-1:0] lookup_matches; wire [MSHR_SIZE-1:0] lookup_matches;
for (genvar i = 0; i < MSHR_SIZE; ++i) begin for (genvar i = 0; i < MSHR_SIZE; ++i) begin
assign lookup_matches[i] = (i != mshr_alloc_id_st0) && mshr_matches_st0[i]; assign lookup_matches[i] = mshr_lookup_pending_st0[i]
&& (i != mshr_alloc_id_st0) // exclude current mshr id
&& (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough
end end
assign mshr_pending_st0 = (| lookup_matches); assign mshr_pending_st0 = (| lookup_matches);
@ -436,7 +531,7 @@ module VX_cache_bank #(
wire [REQ_SEL_WIDTH-1:0] crsp_queue_idx; wire [REQ_SEL_WIDTH-1:0] crsp_queue_idx;
wire [TAG_WIDTH-1:0] crsp_queue_tag; wire [TAG_WIDTH-1:0] crsp_queue_tag;
assign crsp_queue_valid = do_read_hit_st1 || do_replay_rd_st1; assign crsp_queue_valid = do_cache_rd_st1;
assign crsp_queue_idx = req_idx_st1; assign crsp_queue_idx = req_idx_st1;
assign crsp_queue_data = read_data_st1; assign crsp_queue_data = read_data_st1;
assign crsp_queue_tag = tag_st1; assign crsp_queue_tag = tag_st1;
@ -463,29 +558,40 @@ module VX_cache_bank #(
// schedule memory request // schedule memory request
wire mreq_queue_push, mreq_queue_pop, mreq_queue_empty; wire mreq_queue_push, mreq_queue_pop, mreq_queue_empty;
wire [`CS_WORD_WIDTH-1:0] mreq_queue_data; wire [`CS_LINE_WIDTH-1:0] mreq_queue_data;
wire [WORD_SIZE-1:0] mreq_queue_byteen; wire [LINE_SIZE-1:0] mreq_queue_byteen;
wire [WORD_SEL_WIDTH-1:0] mreq_queue_wsel;
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr; wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
wire [MSHR_ADDR_WIDTH-1:0] mreq_queue_id; wire [MSHR_ADDR_WIDTH-1:0] mreq_queue_id;
wire mreq_queue_rw; wire mreq_queue_rw;
wire mreq_queue_flush;
assign mreq_queue_push = (do_read_miss_st1 && ~mshr_pending_st1) wire is_evict_st1 = (is_fill_st1 || is_flush_st1) && dirty_valid_st1;
|| do_creq_wr_st1; wire do_writeback_st1 = valid_st1 && is_evict_st1;
`UNUSED_VAR (do_writeback_st1)
if (WRITEBACK) begin
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|| do_writeback_st1)
&& ~rdw_hazard_st1;
end else begin
`UNUSED_VAR (dirty_valid_st1)
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|| do_creq_wr_st1)
&& ~rdw_hazard_st1;
end
assign mreq_queue_pop = mem_req_valid && mem_req_ready; assign mreq_queue_pop = mem_req_valid && mem_req_ready;
assign mreq_queue_rw = WRITE_ENABLE && (WRITEBACK ? is_evict_st1 : rw_st1);
assign mreq_queue_rw = WRITE_ENABLE && rw_st1;
assign mreq_queue_addr = addr_st1; assign mreq_queue_addr = addr_st1;
assign mreq_queue_id = mshr_id_st1; assign mreq_queue_id = mshr_id_st1;
assign mreq_queue_wsel = wsel_st1; assign mreq_queue_data = is_write_st1 ? write_data_st1 : dirty_data_st1;
assign mreq_queue_byteen = byteen_st1; assign mreq_queue_byteen = is_write_st1 ? write_byteen_st1 : dirty_byteen_st1;
assign mreq_queue_data = write_data_st1; assign mreq_queue_flush = creq_flush_st1;
`RESET_RELAY (mreq_queue_reset, reset); `RESET_RELAY (mreq_queue_reset, reset);
VX_fifo_queue #( VX_fifo_queue #(
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH), .DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1),
.DEPTH (MREQ_SIZE), .DEPTH (MREQ_SIZE),
.ALM_FULL (MREQ_SIZE-PIPELINE_STAGES), .ALM_FULL (MREQ_SIZE-PIPELINE_STAGES),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
@ -494,8 +600,8 @@ module VX_cache_bank #(
.reset (mreq_queue_reset), .reset (mreq_queue_reset),
.push (mreq_queue_push), .push (mreq_queue_push),
.pop (mreq_queue_pop), .pop (mreq_queue_pop),
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_wsel, mreq_queue_data}), .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}),
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_wsel, mem_req_data}), .data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_data, mem_req_flush}),
.empty (mreq_queue_empty), .empty (mreq_queue_empty),
.alm_full (mreq_queue_alm_full), .alm_full (mreq_queue_alm_full),
`UNUSED_PIN (full), `UNUSED_PIN (full),
@ -515,35 +621,34 @@ module VX_cache_bank #(
`ifdef DBG_TRACE_CACHE `ifdef DBG_TRACE_CACHE
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready; wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid) wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid || line_flush_valid)
&& ~(replay_fire || mem_rsp_fire || core_req_fire); && ~(replay_fire || mem_rsp_fire || core_req_fire || line_flush_valid);
always @(posedge clk) begin always @(posedge clk) begin
if (pipeline_stall) begin if (pipeline_stall) begin
`TRACE(3, ("%d: *** %s-bank%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, BANK_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full)); `TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw_st0=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard_st0));
end
if (init_enable) begin
`TRACE(2, ("%d: %s-bank%0d init: addr=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(init_line_sel, BANK_ID)));
end end
if (mem_rsp_fire) begin if (mem_rsp_fire) begin
`TRACE(2, ("%d: %s-bank%0d fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)); `TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
end end
if (replay_fire) begin if (replay_fire) begin
`TRACE(2, ("%d: %s-bank%0d mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)); `TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
end end
if (core_req_fire) begin if (core_req_fire) begin
if (core_req_rw) if (core_req_rw)
`TRACE(2, ("%d: %s-bank%0d core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)); `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
else else
`TRACE(2, ("%d: %s-bank%0d core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)); `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
end end
if (crsp_queue_fire) begin if (crsp_queue_fire) begin
`TRACE(2, ("%d: %s-bank%0d core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)); `TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
end end
if (mreq_queue_push) begin if (mreq_queue_push) begin
if (do_creq_wr_st1) if (do_creq_wr_st1 && !WRITEBACK)
`TRACE(2, ("%d: %s-bank%0d writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)); `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
else if (do_writeback_st1)
`TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%b, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data));
else else
`TRACE(2, ("%d: %s-bank%0d fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)); `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1));
end end
end end
`endif `endif

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -18,16 +18,16 @@ module VX_cache_bypass #(
parameter TAG_SEL_IDX = 0, parameter TAG_SEL_IDX = 0,
parameter PASSTHRU = 0, parameter PASSTHRU = 0,
parameter NC_ENABLE = 0, parameter NC_ENABLE = 0,
parameter WORD_SIZE = 1, parameter WORD_SIZE = 1,
parameter LINE_SIZE = 1, parameter LINE_SIZE = 1,
parameter CORE_ADDR_WIDTH = 1, parameter CORE_ADDR_WIDTH = 1,
parameter CORE_TAG_WIDTH = 1, parameter CORE_TAG_WIDTH = 1,
parameter MEM_ADDR_WIDTH = 1, parameter MEM_ADDR_WIDTH = 1,
parameter MEM_TAG_IN_WIDTH = 1, parameter MEM_TAG_IN_WIDTH = 1,
parameter MEM_TAG_OUT_WIDTH = 1, parameter MEM_TAG_OUT_WIDTH = 1,
@ -35,9 +35,9 @@ module VX_cache_bypass #(
parameter CORE_OUT_BUF = 0, parameter CORE_OUT_BUF = 0,
parameter MEM_OUT_BUF = 0, parameter MEM_OUT_BUF = 0,
parameter CORE_DATA_WIDTH = WORD_SIZE * 8 parameter CORE_DATA_WIDTH = WORD_SIZE * 8
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -71,40 +71,39 @@ module VX_cache_bypass #(
wire core_req_nc_valid; wire core_req_nc_valid;
wire [NUM_REQS-1:0] core_req_nc_valids; wire [NUM_REQS-1:0] core_req_nc_valids;
wire [NUM_REQS-1:0] core_req_nc_idxs; wire [NUM_REQS-1:0] core_req_nc_idxs;
wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx; wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx;
wire [NUM_REQS-1:0] core_req_nc_sel; wire [NUM_REQS-1:0] core_req_nc_sel;
wire core_req_nc_ready; wire core_req_nc_ready;
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
if (PASSTHRU != 0) begin if (PASSTHRU != 0) begin
assign core_req_nc_idxs[i] = 1'b1; assign core_req_nc_idxs[i] = 1'b1;
end else if (NC_ENABLE) begin end else if (NC_ENABLE) begin
assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO]; assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO];
end else begin end else begin
assign core_req_nc_idxs[i] = 1'b0; assign core_req_nc_idxs[i] = 1'b0;
end end
assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i]; assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i];
end end
VX_generic_arbiter #( VX_generic_arbiter #(
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS),
.TYPE (PASSTHRU ? "R" : "P"), .TYPE (PASSTHRU ? "R" : "P")
.LOCK_ENABLE (1)
) core_req_nc_arb ( ) core_req_nc_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.requests (core_req_nc_valids), .requests (core_req_nc_valids),
.grant_index (core_req_nc_idx), .grant_index (core_req_nc_idx),
.grant_onehot (core_req_nc_sel), .grant_onehot (core_req_nc_sel),
.grant_valid (core_req_nc_valid), .grant_valid (core_req_nc_valid),
.grant_unlock (core_req_nc_ready) .grant_ready (core_req_nc_ready)
); );
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i]; assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i];
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data; assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i]) assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i])
: core_bus_out_if[i].req_ready; : core_bus_out_if[i].req_ready;
end end
@ -118,7 +117,7 @@ module VX_cache_bypass #(
wire [`CS_LINE_WIDTH-1:0] mem_req_out_data; wire [`CS_LINE_WIDTH-1:0] mem_req_out_data;
wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag; wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag;
wire mem_req_out_ready; wire mem_req_out_ready;
wire core_req_nc_sel_rw; wire core_req_nc_sel_rw;
wire [WORD_SIZE-1:0] core_req_nc_sel_byteen; wire [WORD_SIZE-1:0] core_req_nc_sel_byteen;
wire [CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr; wire [CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr;
@ -129,22 +128,22 @@ module VX_cache_bypass #(
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in; wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_nc_mux_in[i] = { assign core_req_nc_mux_in[i] = {
core_bus_in_if[i].req_data.rw, core_bus_in_if[i].req_data.rw,
core_bus_in_if[i].req_data.byteen, core_bus_in_if[i].req_data.byteen,
core_bus_in_if[i].req_data.addr, core_bus_in_if[i].req_data.addr,
core_bus_in_if[i].req_data.atype, core_bus_in_if[i].req_data.atype,
core_bus_in_if[i].req_data.data, core_bus_in_if[i].req_data.data,
core_bus_in_if[i].req_data.tag core_bus_in_if[i].req_data.tag
}; };
end end
assign { assign {
core_req_nc_sel_rw, core_req_nc_sel_rw,
core_req_nc_sel_byteen, core_req_nc_sel_byteen,
core_req_nc_sel_addr, core_req_nc_sel_addr,
core_req_nc_sel_atype, core_req_nc_sel_atype,
core_req_nc_sel_data, core_req_nc_sel_data,
core_req_nc_sel_tag core_req_nc_sel_tag
} = core_req_nc_mux_in[core_req_nc_idx]; } = core_req_nc_mux_in[core_req_nc_idx];
assign core_req_nc_ready = ~mem_bus_in_if.req_valid && mem_req_out_ready; assign core_req_nc_ready = ~mem_bus_in_if.req_valid && mem_req_out_ready;
@ -157,11 +156,11 @@ module VX_cache_bypass #(
wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass; wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass;
wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0]; wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0];
if (WORDS_PER_LINE > 1) begin if (WORDS_PER_LINE > 1) begin
reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r; reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r;
reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r; reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0]; wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0];
always @(*) begin always @(*) begin
@ -176,7 +175,7 @@ module VX_cache_bypass #(
assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r; assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r;
if (NUM_REQS > 1) begin if (NUM_REQS > 1) begin
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id}); assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id});
end else begin end else begin
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id}); assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id});
end end
end else begin end else begin
@ -189,7 +188,7 @@ module VX_cache_bypass #(
end end
end end
wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass; wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass;
if (UUID_WIDTH != 0) begin if (UUID_WIDTH != 0) begin
assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass}; assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass};
@ -202,7 +201,7 @@ module VX_cache_bypass #(
`UNUSED_VAR (mem_bus_in_if.req_data.tag) `UNUSED_VAR (mem_bus_in_if.req_data.tag)
end else begin end else begin
if (NC_ENABLE) begin if (NC_ENABLE) begin
VX_bits_insert #( VX_bits_insert #(
.N (MEM_TAG_OUT_WIDTH-1), .N (MEM_TAG_OUT_WIDTH-1),
.S (1), .S (1),
.POS (TAG_SEL_IDX) .POS (TAG_SEL_IDX)
@ -213,8 +212,8 @@ module VX_cache_bypass #(
); );
end else begin end else begin
assign mem_req_out_tag = mem_bus_in_if.req_data.tag; assign mem_req_out_tag = mem_bus_in_if.req_data.tag;
end end
end end
assign mem_bus_in_if.req_ready = mem_req_out_ready; assign mem_bus_in_if.req_ready = mem_req_out_ready;
@ -225,11 +224,11 @@ module VX_cache_bypass #(
) mem_req_buf ( ) mem_req_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (mem_req_out_valid), .valid_in (mem_req_out_valid),
.ready_in (mem_req_out_ready), .ready_in (mem_req_out_ready),
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}), .data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}),
.data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.atype, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}), .data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.atype, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}),
.valid_out (mem_bus_out_if.req_valid), .valid_out (mem_bus_out_if.req_valid),
.ready_out (mem_bus_out_if.req_ready) .ready_out (mem_bus_out_if.req_ready)
); );
@ -253,7 +252,7 @@ module VX_cache_bypass #(
wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc; wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc;
VX_bits_remove #( VX_bits_remove #(
.N (MEM_TAG_OUT_WIDTH), .N (MEM_TAG_OUT_WIDTH),
.S (NC_ENABLE), .S (NC_ENABLE),
.POS (TAG_SEL_IDX) .POS (TAG_SEL_IDX)
@ -265,10 +264,10 @@ module VX_cache_bypass #(
wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx; wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx;
if (NUM_REQS > 1) begin if (NUM_REQS > 1) begin
assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS]; assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS];
end else begin end else begin
assign rsp_idx = 1'b0; assign rsp_idx = 1'b0;
end end
reg [NUM_REQS-1:0] rsp_nc_valid_r; reg [NUM_REQS-1:0] rsp_nc_valid_r;
always @(*) begin always @(*) begin
rsp_nc_valid_r = '0; rsp_nc_valid_r = '0;
@ -277,13 +276,13 @@ module VX_cache_bypass #(
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i]; assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i];
assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i];
end end
if (WORDS_PER_LINE > 1) begin if (WORDS_PER_LINE > 1) begin
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS]; wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ?
core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
end end
end else begin end else begin
@ -306,7 +305,7 @@ module VX_cache_bypass #(
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2; assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2;
end else begin end else begin
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag; assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag;
end end
end end
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
@ -320,7 +319,7 @@ module VX_cache_bypass #(
.valid_in (core_rsp_in_valid[i]), .valid_in (core_rsp_in_valid[i]),
.ready_in (core_rsp_in_ready[i]), .ready_in (core_rsp_in_ready[i]),
.data_in ({core_rsp_in_data[i], core_rsp_in_tag[i]}), .data_in ({core_rsp_in_data[i], core_rsp_in_tag[i]}),
.data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}), .data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}),
.valid_out (core_bus_in_if[i].rsp_valid), .valid_out (core_bus_in_if[i].rsp_valid),
.ready_out (core_bus_in_if[i].rsp_ready) .ready_out (core_bus_in_if[i].rsp_ready)
); );
@ -341,7 +340,7 @@ module VX_cache_bypass #(
assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data; assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data;
assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc; assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc;
end end
wire [NUM_REQS-1:0] core_rsp_out_valid; wire [NUM_REQS-1:0] core_rsp_out_valid;
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid; assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid;

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -24,20 +24,20 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
parameter NUM_REQS = 4, parameter NUM_REQS = 4,
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE = 16384, parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter LINE_SIZE = 64, parameter LINE_SIZE = 64,
// Number of banks // Number of banks
parameter NUM_BANKS = 1, parameter NUM_BANKS = 1,
// Number of associative ways // Number of associative ways
parameter NUM_WAYS = 4, parameter NUM_WAYS = 4,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE = 4, parameter WORD_SIZE = 4,
// Core Response Queue Size // Core Response Queue Size
parameter CRSQ_SIZE = 2, parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob // Miss Reserv Queue Knob
parameter MSHR_SIZE = 8, parameter MSHR_SIZE = 8,
// Memory Response Queue Size // Memory Response Queue Size
parameter MRSQ_SIZE = 0, parameter MRSQ_SIZE = 0,
// Memory Request Queue Size // Memory Request Queue Size
@ -46,6 +46,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
// Enable cache writeable // Enable cache writeable
parameter WRITE_ENABLE = 1, parameter WRITE_ENABLE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Request debug identifier // Request debug identifier
parameter UUID_WIDTH = 0, parameter UUID_WIDTH = 0,
@ -60,7 +63,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
// Memory request output buffer // Memory request output buffer
parameter MEM_OUT_BUF = 0 parameter MEM_OUT_BUF = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -74,17 +77,16 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
); );
localparam NUM_CACHES = `UP(NUM_UNITS); localparam NUM_CACHES = `UP(NUM_UNITS);
localparam PASSTHRU = (NUM_UNITS == 0); localparam PASSTHRU = (NUM_UNITS == 0);
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES); localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
`STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter")) `STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter"))
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
cache_perf_t perf_cache_tmp[1], perf_cache_unit[NUM_CACHES]; cache_perf_t perf_cache_unit[NUM_CACHES];
`PERF_CACHE_ADD (perf_cache_tmp, perf_cache_unit, 1, NUM_CACHES) `PERF_CACHE_ADD (cache_perf, perf_cache_unit, NUM_CACHES)
assign cache_perf = perf_cache_tmp[0];
`endif `endif
VX_mem_bus_if #( VX_mem_bus_if #(
@ -97,8 +99,6 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
.TAG_WIDTH (ARB_TAG_WIDTH) .TAG_WIDTH (ARB_TAG_WIDTH)
) arb_core_bus_if[NUM_CACHES * NUM_REQS](); ) arb_core_bus_if[NUM_CACHES * NUM_REQS]();
`RESET_RELAY (arb_reset, reset);
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_mem_bus_if #( VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE), .DATA_SIZE (WORD_SIZE),
@ -114,6 +114,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]); `ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
end end
`RESET_RELAY (arb_reset, reset);
VX_mem_arb #( VX_mem_arb #(
.NUM_INPUTS (NUM_INPUTS), .NUM_INPUTS (NUM_INPUTS),
.NUM_OUTPUTS (NUM_CACHES), .NUM_OUTPUTS (NUM_CACHES),
@ -135,9 +137,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
end end
end end
`RESET_RELAY (cache_reset, reset); for (genvar i = 0; i < NUM_CACHES; ++i) begin : caches
for (genvar i = 0; i < NUM_CACHES; ++i) begin `RESET_RELAY (cache_reset, reset);
VX_cache_wrap #( VX_cache_wrap #(
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)), .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)),
@ -152,6 +154,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
.MRSQ_SIZE (MRSQ_SIZE), .MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE), .MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH), .UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (ARB_TAG_WIDTH), .TAG_WIDTH (ARB_TAG_WIDTH),
.TAG_SEL_IDX (TAG_SEL_IDX), .TAG_SEL_IDX (TAG_SEL_IDX),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,17 +17,19 @@ module VX_cache_data #(
parameter `STRING INSTANCE_ID= "", parameter `STRING INSTANCE_ID= "",
parameter BANK_ID = 0, parameter BANK_ID = 0,
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE = 1024, parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter LINE_SIZE = 16, parameter LINE_SIZE = 16,
// Number of banks // Number of banks
parameter NUM_BANKS = 1, parameter NUM_BANKS = 1,
// Number of associative ways // Number of associative ways
parameter NUM_WAYS = 1, parameter NUM_WAYS = 1,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE = 1, parameter WORD_SIZE = 1,
// Enable cache writeable // Enable cache writeable
parameter WRITE_ENABLE = 1, parameter WRITE_ENABLE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Request debug identifier // Request debug identifier
parameter UUID_WIDTH = 0 parameter UUID_WIDTH = 0
) ( ) (
@ -41,59 +43,100 @@ module VX_cache_data #(
input wire stall, input wire stall,
input wire read, input wire read,
input wire fill, input wire fill,
input wire flush,
input wire write, input wire write,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel, input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel,
input wire [WORD_SIZE-1:0] byteen,
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
input wire [`CS_WORD_WIDTH-1:0] write_data, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] write_data,
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
input wire [NUM_WAYS-1:0] way_sel, input wire [NUM_WAYS-1:0] way_sel,
output wire [`CS_WORD_WIDTH-1:0] read_data,
output wire [`CS_WORD_WIDTH-1:0] read_data output wire dirty_valid,
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
output wire [LINE_SIZE-1:0] dirty_byteen
); );
`UNUSED_SPARAM (INSTANCE_ID) `UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (BANK_ID) `UNUSED_PARAM (BANK_ID)
`UNUSED_PARAM (WORD_SIZE) `UNUSED_PARAM (WORD_SIZE)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
`UNUSED_VAR (stall)
`UNUSED_VAR (line_addr) `UNUSED_VAR (line_addr)
`UNUSED_VAR (read) `UNUSED_VAR (read)
`UNUSED_VAR (flush)
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1; localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
if (WRITEBACK) begin
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0][LINE_SIZE-1:0] dirty_bytes_r;
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0] dirty_blocks_r;
wire [`CLOG2(`CS_LINES_PER_BANK * NUM_WAYS)-1:0] way_addr;
if (NUM_WAYS > 1) begin
assign way_addr = {line_sel, way_idx};
end else begin
assign way_addr = line_sel;
end
always @(posedge clk) begin
if (fill) begin
dirty_bytes_r[way_addr] <= '0;
end else if (write) begin
dirty_bytes_r[way_addr] <= dirty_bytes_r[way_addr] | write_byteen;
end
end
always @(posedge clk) begin
if (reset) begin
for (integer i = 0; i < `CS_LINES_PER_BANK * NUM_WAYS; ++i) begin
dirty_blocks_r[i] <= 0;
end
end else begin
if (fill) begin
dirty_blocks_r[way_addr] <= 0;
end else if (write) begin
dirty_blocks_r[way_addr] <= 1;
end
end
end
assign dirty_byteen = dirty_bytes_r[way_addr];
assign dirty_valid = dirty_blocks_r[way_addr];
end else begin
assign dirty_byteen = '0;
assign dirty_valid = 0;
end
// order the data layout to perform ways multiplexing last.
// this allows converting way index to binary in parallel with BRAM read.
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata; wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
wire [BYTEENW-1:0] wren; wire [BYTEENW-1:0] wren;
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
reg [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata_r; for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r; assign wdata[i] = (fill || !WRITE_ENABLE) ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{write_data[i]}};
always @(*) begin
wdata_r = {`CS_WORDS_PER_LINE{write_data}};
wren_r = '0;
wren_r[wsel] = byteen;
end end
// order the data layout to perform ways multiplexing last
// this allows performing onehot encoding of the way index in parallel with BRAM read.
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w; wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
assign wdata[i] = fill ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{wdata_r[i]}};
for (genvar j = 0; j < NUM_WAYS; ++j) begin for (genvar j = 0; j < NUM_WAYS; ++j) begin
assign wren_w[i][j] = (fill ? {WORD_SIZE{1'b1}} : wren_r[i]) assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
& {WORD_SIZE{((NUM_WAYS == 1) || way_sel[j])}}; & {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
end end
end end
assign wren = wren_w; assign wren = wren_w;
end else begin end else begin
`UNUSED_VAR (write) `UNUSED_VAR (write)
`UNUSED_VAR (byteen) `UNUSED_VAR (write_byteen)
`UNUSED_VAR (write_data) `UNUSED_VAR (write_data)
assign wdata = fill_data; assign wdata = fill_data;
assign wren = fill; assign wren = fill;
end end
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
VX_onehot_encoder #( VX_onehot_encoder #(
.N (NUM_WAYS) .N (NUM_WAYS)
@ -105,8 +148,6 @@ module VX_cache_data #(
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata; wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata;
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
VX_sp_ram #( VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH * NUM_WAYS), .DATAW (`CS_LINE_WIDTH * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK), .SIZE (`CS_LINES_PER_BANK),
@ -119,34 +160,41 @@ module VX_cache_data #(
.wren (wren), .wren (wren),
.addr (line_sel), .addr (line_sel),
.wdata (wdata), .wdata (wdata),
.rdata (rdata) .rdata (rdata)
); );
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata; wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
if (`CS_WORDS_PER_LINE > 1) begin if (`CS_WORDS_PER_LINE > 1) begin
assign per_way_rdata = rdata[wsel]; assign per_way_rdata = rdata[wsel];
end else begin end else begin
`UNUSED_VAR (wsel) `UNUSED_VAR (wsel)
assign per_way_rdata = rdata; assign per_way_rdata = rdata;
end end
assign read_data = per_way_rdata[way_idx]; assign read_data = per_way_rdata[way_idx];
`UNUSED_VAR (stall) wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] dirty_data_w;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
for (genvar j = 0; j < NUM_WAYS; ++j) begin
assign dirty_data_w[j][i] = rdata[i][j];
end
end
assign dirty_data = dirty_data_w[way_idx];
`ifdef DBG_TRACE_CACHE `ifdef DBG_TRACE_CACHE
always @(posedge clk) begin always @(posedge clk) begin
if (fill && ~stall) begin if (fill && ~stall) begin
`TRACE(3, ("%d: %s-bank%0d data-fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)); `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
end
if (flush && ~stall) begin
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b, byteen=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_valid, dirty_byteen));
end end
if (read && ~stall) begin if (read && ~stall) begin
`TRACE(3, ("%d: %s-bank%0d data-read: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, read_data, req_uuid)); `TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid));
end end
if (write && ~stall) begin if (write && ~stall) begin
`TRACE(3, ("%d: %s-bank%0d data-write: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, byteen, write_data, req_uuid)); `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid));
end end
end end
`endif `endif
endmodule endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,7 +14,7 @@
`ifndef VX_CACHE_DEFINE_VH `ifndef VX_CACHE_DEFINE_VH
`define VX_CACHE_DEFINE_VH `define VX_CACHE_DEFINE_VH
`include "VX_define.vh" `include "VX_define.vh"
`define CS_REQ_SEL_BITS `CLOG2(NUM_REQS) `define CS_REQ_SEL_BITS `CLOG2(NUM_REQS)
@ -50,7 +50,7 @@
`define CS_TAG_SEL_ADDR_START (1+`CS_LINE_SEL_ADDR_END) `define CS_TAG_SEL_ADDR_START (1+`CS_LINE_SEL_ADDR_END)
`define CS_TAG_SEL_ADDR_END (`CS_WORD_ADDR_WIDTH-1) `define CS_TAG_SEL_ADDR_END (`CS_WORD_ADDR_WIDTH-1)
`define CS_LINE_TAG_ADDR(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS] `define CS_LINE_ADDR_TAG(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS]
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -64,14 +64,14 @@
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`define PERF_CACHE_ADD(dst, src, dcount, scount) \ `define PERF_CACHE_ADD(dst, src, count) \
`PERF_COUNTER_ADD (dst, src, reads, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ `PERF_COUNTER_ADD (dst, src, reads, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, writes, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ `PERF_COUNTER_ADD (dst, src, writes, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, read_misses, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ `PERF_COUNTER_ADD (dst, src, read_misses, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, write_misses, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ `PERF_COUNTER_ADD (dst, src, write_misses, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, bank_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ `PERF_COUNTER_ADD (dst, src, bank_stalls, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, mshr_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ `PERF_COUNTER_ADD (dst, src, mshr_stalls, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ `PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) `PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, count, (count > 1))
`endif // VX_CACHE_DEFINE_VH `endif // VX_CACHE_DEFINE_VH

154
hw/rtl/cache/VX_cache_flush.sv vendored Normal file
View file

@ -0,0 +1,154 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_flush #(
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Number of banks
parameter NUM_BANKS = 1,
// Bank select latency
parameter BANK_SEL_LATENCY = 1
) (
input wire clk,
input wire reset,
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
input wire [NUM_BANKS-1:0] bank_req_fire,
output wire [NUM_BANKS-1:0] flush_valid,
input wire [NUM_BANKS-1:0] flush_ready
);
localparam STATE_IDLE = 0;
localparam STATE_WAIT = 1;
localparam STATE_FLUSH = 2;
localparam STATE_DONE = 3;
// track in-flight core requests
wire no_inflight_reqs;
if (BANK_SEL_LATENCY != 0) begin
localparam NUM_REQS_W = `CLOG2(NUM_REQS+1);
localparam NUM_BANKS_W = `CLOG2(NUM_BANKS+1);
wire [NUM_REQS-1:0] core_bus_out_fire;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_bus_out_fire[i] = core_bus_out_if[i].req_valid && core_bus_out_if[i].req_ready;
end
wire [NUM_REQS_W-1:0] core_bus_out_cnt;
wire [NUM_BANKS_W-1:0] bank_req_cnt;
`POP_COUNT(core_bus_out_cnt, core_bus_out_fire);
`POP_COUNT(bank_req_cnt, bank_req_fire);
`UNUSED_VAR (core_bus_out_cnt)
VX_pending_size #(
.SIZE (BANK_SEL_LATENCY * NUM_BANKS),
.INCRW (NUM_BANKS_W),
.DECRW (NUM_BANKS_W)
) pending_size (
.clk (clk),
.reset (reset),
.incr (NUM_BANKS_W'(core_bus_out_cnt)),
.decr (bank_req_cnt),
.empty (no_inflight_reqs),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end else begin
assign no_inflight_reqs = 0;
`UNUSED_VAR (bank_req_fire)
end
reg [1:0] state, state_n;
reg [NUM_BANKS-1:0] flush_done, flush_done_n;
wire [NUM_REQS-1:0] flush_req_mask;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
end
wire flush_req_enable = (| flush_req_mask);
reg [NUM_REQS-1:0] lock_released, lock_released_n;
for (genvar i = 0; i < NUM_REQS; ++i) begin
wire input_enable = ~flush_req_enable || lock_released[i];
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && input_enable;
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
assign core_bus_in_if[i].req_ready = core_bus_out_if[i].req_ready && input_enable;
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_bus_in_if[i].rsp_valid = core_bus_out_if[i].rsp_valid;
assign core_bus_in_if[i].rsp_data = core_bus_out_if[i].rsp_data;
assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready;
end
wire [NUM_REQS-1:0] core_bus_out_ready;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready;
end
always @(*) begin
state_n = state;
flush_done_n = flush_done;
lock_released_n = lock_released;
case (state)
STATE_IDLE: begin
if (flush_req_enable) begin
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT : STATE_FLUSH;
end
end
STATE_WAIT: begin
if (no_inflight_reqs) begin
state_n = STATE_FLUSH;
end
end
STATE_FLUSH: begin
flush_done_n = flush_done | flush_ready;
if (flush_done_n == 0) begin
state_n = STATE_DONE;
lock_released_n = flush_req_mask;
end
end
STATE_DONE: begin
lock_released_n = lock_released & ~core_bus_out_ready;
if (lock_released_n == 0) begin
state_n = STATE_IDLE;
end
end
endcase
end
always @(posedge clk) begin
if (reset) begin
state <= STATE_IDLE;
flush_done <= '0;
lock_released <= '0;
end else begin
state <= state_n;
flush_done <= flush_done_n;
lock_released <= lock_released_n;
end
end
assign flush_valid = {NUM_BANKS{state == STATE_FLUSH}};
endmodule

View file

@ -13,6 +13,7 @@
`include "VX_cache_define.vh" `include "VX_cache_define.vh"
// cache flush unit
module VX_cache_init #( module VX_cache_init #(
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE = 1024, parameter CACHE_SIZE = 1024,

View file

@ -104,7 +104,8 @@ module VX_cache_mshr #(
// lookup // lookup
input wire lookup_valid, input wire lookup_valid,
input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr, input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr,
output wire [MSHR_SIZE-1:0] lookup_matches, output wire [MSHR_SIZE-1:0] lookup_pending,
output wire [MSHR_SIZE-1:0] lookup_rw,
// finalize // finalize
input wire finalize_valid, input wire finalize_valid,
@ -216,13 +217,13 @@ module VX_cache_mshr #(
next_table <= next_table_n; next_table <= next_table_n;
end end
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s-bank%0d inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID, `RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid)) `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid))
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s-bank%0d invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID, `RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid)) `CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s-bank%0d invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, BANK_ID, `RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id)) `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
VX_dp_ram #( VX_dp_ram #(
@ -251,7 +252,9 @@ module VX_cache_mshr #(
assign dequeue_rw = write_table[dequeue_id_r]; assign dequeue_rw = write_table[dequeue_id_r];
assign dequeue_id = dequeue_id_r; assign dequeue_id = dequeue_id_r;
assign lookup_matches = addr_matches & ~write_table; // return pending entries for the given cache line
assign lookup_pending = addr_matches;
assign lookup_rw = write_table;
`UNUSED_VAR (lookup_valid) `UNUSED_VAR (lookup_valid)
@ -264,22 +267,22 @@ module VX_cache_mshr #(
show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire; show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire;
end end
if (allocate_fire) if (allocate_fire)
`TRACE(3, ("%d: %s-bank%0d mshr-allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `TRACE(3, ("%d: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)); `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid));
if (lookup_valid) if (lookup_valid)
`TRACE(3, ("%d: %s-bank%0d mshr-lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `TRACE(3, ("%d: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_matches, lkp_req_uuid)); `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid));
if (finalize_valid) if (finalize_valid)
`TRACE(3, ("%d: %s-bank%0d mshr-finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `TRACE(3, ("%d: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)); finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid));
if (fill_valid) if (fill_valid)
`TRACE(3, ("%d: %s-bank%0d mshr-fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, BANK_ID, `TRACE(3, ("%d: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)); `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id));
if (dequeue_fire) if (dequeue_fire)
`TRACE(3, ("%d: %s-bank%0d mshr-dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `TRACE(3, ("%d: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid)); `CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid));
if (show_table) begin if (show_table) begin
`TRACE(3, ("%d: %s-bank%0d mshr-table", $time, INSTANCE_ID, BANK_ID)); `TRACE(3, ("%d: %s table", $time, INSTANCE_ID));
for (integer i = 0; i < MSHR_SIZE; ++i) begin for (integer i = 0; i < MSHR_SIZE; ++i) begin
if (valid_table[i]) begin if (valid_table[i]) begin
`TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID))); `TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID)));

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,15 +17,15 @@ module VX_cache_tags #(
parameter `STRING INSTANCE_ID = "", parameter `STRING INSTANCE_ID = "",
parameter BANK_ID = 0, parameter BANK_ID = 0,
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE = 1024, parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter LINE_SIZE = 16, parameter LINE_SIZE = 16,
// Number of banks // Number of banks
parameter NUM_BANKS = 1, parameter NUM_BANKS = 1,
// Number of associative ways // Number of associative ways
parameter NUM_WAYS = 1, parameter NUM_WAYS = 1,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE = 1, parameter WORD_SIZE = 1,
// Request debug identifier // Request debug identifier
parameter UUID_WIDTH = 0 parameter UUID_WIDTH = 0
) ( ) (
@ -38,45 +38,63 @@ module VX_cache_tags #(
input wire stall, input wire stall,
// read/fill // init/fill/lookup
input wire init,
input wire fill,
input wire lookup, input wire lookup,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire fill, output wire [NUM_WAYS-1:0] tag_matches,
input wire init,
output wire [NUM_WAYS-1:0] way_sel, // replacement
output wire [NUM_WAYS-1:0] tag_matches output wire [NUM_WAYS-1:0] repl_way,
output wire [`CS_TAG_SEL_BITS-1:0] repl_tag
); );
`UNUSED_SPARAM (INSTANCE_ID) `UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (BANK_ID) `UNUSED_PARAM (BANK_ID)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
`UNUSED_VAR (lookup) `UNUSED_VAR (lookup)
// valid, tag
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS; localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS;
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0]; wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_TAG_ADDR(line_addr); wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
wire [NUM_WAYS-1:0] read_valid;
if (NUM_WAYS > 1) begin if (NUM_WAYS > 1) begin
reg [NUM_WAYS-1:0] repl_way; reg [NUM_WAYS-1:0] repl_way_r;
// cyclic assignment of replacement way // cyclic assignment of replacement way
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
repl_way <= 1; repl_way_r <= 1;
end else if (~stall) begin // hold the value on stalls prevent filling different slots twice end else if (~stall) begin // hold the value on stalls prevent filling different slots twice
repl_way <= {repl_way[NUM_WAYS-2:0], repl_way[NUM_WAYS-1]}; repl_way_r <= {repl_way_r[NUM_WAYS-2:0], repl_way_r[NUM_WAYS-1]};
end end
end
for (genvar i = 0; i < NUM_WAYS; ++i) begin
assign way_sel[i] = fill && repl_way[i];
end end
assign repl_way = repl_way_r;
VX_onehot_mux #(
.DATAW (`CS_TAG_SEL_BITS),
.N (NUM_WAYS)
) repl_tag_sel (
.data_in (read_tag),
.sel_in (repl_way_r),
.data_out (repl_tag)
);
end else begin end else begin
`UNUSED_VAR (stall) `UNUSED_VAR (stall)
assign way_sel = fill; assign repl_way = 1'b1;
assign repl_tag = read_tag;
end end
for (genvar i = 0; i < NUM_WAYS; ++i) begin for (genvar i = 0; i < NUM_WAYS; ++i) begin
wire [`CS_TAG_SEL_BITS-1:0] read_tag;
wire read_valid; wire do_fill = fill && repl_way[i];
wire do_write = init || do_fill;
wire line_valid = ~init;
VX_sp_ram #( VX_sp_ram #(
.DATAW (TAG_WIDTH), .DATAW (TAG_WIDTH),
@ -85,32 +103,34 @@ module VX_cache_tags #(
) tag_store ( ) tag_store (
.clk (clk), .clk (clk),
.read (1'b1), .read (1'b1),
.write (way_sel[i] || init), .write (do_write),
`UNUSED_PIN (wren), `UNUSED_PIN (wren),
.addr (line_sel), .addr (line_sel),
.wdata ({~init, line_tag}), .wdata ({line_valid, line_tag}),
.rdata ({read_valid, read_tag}) .rdata ({read_valid[i], read_tag[i]})
); );
assign tag_matches[i] = read_valid && (line_tag == read_tag);
end end
for (genvar i = 0; i < NUM_WAYS; ++i) begin
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
end
`ifdef DBG_TRACE_CACHE `ifdef DBG_TRACE_CACHE
always @(posedge clk) begin always @(posedge clk) begin
if (fill && ~stall) begin if (fill && ~stall) begin
`TRACE(3, ("%d: %s-bank%0d tag-fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag)); `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), repl_way, line_sel, line_tag));
end end
if (init) begin if (init) begin
`TRACE(3, ("%d: %s-bank%0d tag-init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)); `TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
end end
if (lookup && ~stall) begin if (lookup && ~stall) begin
if (tag_matches != 0) begin if (tag_matches != 0) begin
`TRACE(3, ("%d: %s-bank%0d tag-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag, req_uuid)); `TRACE(3, ("%d: %s hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
end else begin end else begin
`TRACE(3, ("%d: %s-bank%0d tag-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)); `TRACE(3, ("%d: %s miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
end end
end end
end end
`endif `endif
endmodule endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -23,20 +23,20 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE = 4096, parameter CACHE_SIZE = 4096,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter LINE_SIZE = 64, parameter LINE_SIZE = 64,
// Number of banks // Number of banks
parameter NUM_BANKS = 1, parameter NUM_BANKS = 1,
// Number of associative ways // Number of associative ways
parameter NUM_WAYS = 1, parameter NUM_WAYS = 1,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE = 4, parameter WORD_SIZE = 4,
// Core Response Queue Size // Core Response Queue Size
parameter CRSQ_SIZE = 2, parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob // Miss Reserv Queue Knob
parameter MSHR_SIZE = 8, parameter MSHR_SIZE = 8,
// Memory Response Queue Size // Memory Response Queue Size
parameter MRSQ_SIZE = 0, parameter MRSQ_SIZE = 0,
// Memory Request Queue Size // Memory Request Queue Size
@ -45,6 +45,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
// Enable cache writeable // Enable cache writeable
parameter WRITE_ENABLE = 1, parameter WRITE_ENABLE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Request debug identifier // Request debug identifier
parameter UUID_WIDTH = 0, parameter UUID_WIDTH = 0,
@ -63,7 +66,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
// Memory request output buffer // Memory request output buffer
parameter MEM_OUT_BUF = 0 parameter MEM_OUT_BUF = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -80,7 +83,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
@ -98,7 +101,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
) mem_bus_cache_if(); ) mem_bus_cache_if();
if (NC_OR_BYPASS) begin if (NC_OR_BYPASS) begin
`RESET_RELAY (nc_bypass_reset, reset); `RESET_RELAY (nc_bypass_reset, reset);
VX_cache_bypass #( VX_cache_bypass #(
@ -108,13 +111,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
.PASSTHRU (PASSTHRU), .PASSTHRU (PASSTHRU),
.NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE), .NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE),
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.LINE_SIZE (LINE_SIZE), .LINE_SIZE (LINE_SIZE),
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH), .CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
.CORE_TAG_WIDTH (TAG_WIDTH), .CORE_TAG_WIDTH (TAG_WIDTH),
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH), .MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
.MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH), .MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH),
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH), .MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH),
@ -132,15 +135,15 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
.mem_bus_in_if (mem_bus_cache_if), .mem_bus_in_if (mem_bus_cache_if),
.mem_bus_out_if (mem_bus_if) .mem_bus_out_if (mem_bus_if)
); );
end else begin end else begin
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]); `ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
end end
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if); `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if);
end end
if (PASSTHRU != 0) begin if (PASSTHRU != 0) begin
@ -152,7 +155,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
assign core_bus_cache_if[i].rsp_valid = 0; assign core_bus_cache_if[i].rsp_valid = 0;
assign core_bus_cache_if[i].rsp_data = '0; assign core_bus_cache_if[i].rsp_data = '0;
`UNUSED_VAR (core_bus_cache_if[i].rsp_ready) `UNUSED_VAR (core_bus_cache_if[i].rsp_ready)
end end
assign mem_bus_cache_if.req_valid = 0; assign mem_bus_cache_if.req_valid = 0;
assign mem_bus_cache_if.req_data = '0; assign mem_bus_cache_if.req_data = '0;
@ -183,6 +186,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
.MRSQ_SIZE (MRSQ_SIZE), .MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE), .MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH), .UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH), .TAG_WIDTH (TAG_WIDTH),
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF), .CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
@ -195,8 +199,8 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
`endif `endif
.core_bus_if (core_bus_cache_if), .core_bus_if (core_bus_cache_if),
.mem_bus_if (mem_bus_cache_if) .mem_bus_if (mem_bus_cache_if)
); );
end end
`ifdef DBG_TRACE_CACHE `ifdef DBG_TRACE_CACHE
@ -225,9 +229,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
end end
if (core_rsp_fire) begin if (core_rsp_fire) begin
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)); `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
end end
end end
end end
wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid; wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid;
wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid; wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid;
@ -246,17 +250,17 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
always @(posedge clk) begin always @(posedge clk) begin
if (mem_req_fire) begin if (mem_req_fire) begin
if (mem_bus_if.req_data.rw) if (mem_bus_if.req_data.rw)
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)); $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
else else
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)); $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
end end
if (mem_rsp_fire) begin if (mem_rsp_fire) begin
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n", `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid)); $time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
end end
end end
`endif `endif
endmodule endmodule

View file

@ -14,7 +14,7 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_alu_int #( module VX_alu_int #(
parameter CORE_ID = 0, parameter `STRING INSTANCE_ID = "",
parameter BLOCK_IDX = 0, parameter BLOCK_IDX = 0,
parameter NUM_LANES = 1 parameter NUM_LANES = 1
) ( ) (
@ -29,7 +29,7 @@ module VX_alu_int #(
VX_branch_ctl_if.master branch_ctl_if VX_branch_ctl_if.master branch_ctl_if
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
localparam LANE_BITS = `CLOG2(NUM_LANES); localparam LANE_BITS = `CLOG2(NUM_LANES);
localparam LANE_WIDTH = `UP(LANE_BITS); localparam LANE_WIDTH = `UP(LANE_BITS);
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
@ -121,7 +121,7 @@ module VX_alu_int #(
case ({is_alu_w, op_class}) case ({is_alu_w, op_class})
3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC 3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
3'b001: alu_result[i] = sub_slt_br_result; // SUB, SLTU, SLTI, BR* 3'b001: alu_result[i] = sub_slt_br_result; // SUB, SLTU, SLTI, BR*
3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO* 3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO*
3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI 3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI
3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW 3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW
3'b101: alu_result[i] = sub_result_w[i]; // SUBW 3'b101: alu_result[i] = sub_result_w[i]; // SUBW
@ -181,7 +181,7 @@ module VX_alu_int #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (1'b1), .enable (1'b1),
.data_in ({br_enable, br_wid, br_taken, br_dest}), .data_in ({br_enable, br_wid, br_taken, br_dest}),
.data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest}) .data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest})
); );
@ -193,9 +193,9 @@ module VX_alu_int #(
`ifdef DBG_TRACE_PIPELINE `ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin always @(posedge clk) begin
if (branch_ctl_if.valid) begin if (br_enable) begin
`TRACE(1, ("%d: core%0d-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", `TRACE(1, ("%d: %s-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
$time, CORE_ID, branch_ctl_if.wid, {commit_if.data.PC, 1'b0}, branch_ctl_if.taken, {branch_ctl_if.dest, 1'b0}, commit_if.data.uuid)); $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid));
end end
end end
`endif `endif

View file

@ -14,7 +14,7 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_alu_muldiv #( module VX_alu_muldiv #(
parameter CORE_ID = 0, parameter `STRING INSTANCE_ID = "",
parameter NUM_LANES = 1 parameter NUM_LANES = 1
) ( ) (
input wire clk, input wire clk,
@ -26,7 +26,7 @@ module VX_alu_muldiv #(
// Outputs // Outputs
VX_commit_if.master commit_if VX_commit_if.master commit_if
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS); localparam PID_WIDTH = `UP(PID_BITS);
localparam TAG_WIDTH = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + PID_WIDTH + 1 + 1; localparam TAG_WIDTH = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + PID_WIDTH + 1 + 1;
@ -69,7 +69,7 @@ module VX_alu_muldiv #(
wire mul_fire_in = mul_valid_in && mul_ready_in; wire mul_fire_in = mul_valid_in && mul_ready_in;
for (genvar i = 0; i < NUM_LANES; ++i) begin for (genvar i = 0; i < NUM_LANES; ++i) begin
wire [`XLEN-1:0] mul_resultl, mul_resulth; reg [`XLEN-1:0] mul_resultl, mul_resulth;
wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i]; wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i];
wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i]; wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i];
always @(*) begin always @(*) begin
@ -235,7 +235,7 @@ module VX_alu_muldiv #(
wire div_fire_in = div_valid_in && div_ready_in; wire div_fire_in = div_valid_in && div_ready_in;
for (genvar i = 0; i < NUM_LANES; ++i) begin for (genvar i = 0; i < NUM_LANES; ++i) begin
wire [`XLEN-1:0] div_quotient, div_remainder; reg [`XLEN-1:0] div_quotient, div_remainder;
always @(*) begin always @(*) begin
dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder); dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder);
end end

View file

@ -14,7 +14,7 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_alu_unit #( module VX_alu_unit #(
parameter CORE_ID = 0 parameter `STRING INSTANCE_ID = ""
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -27,7 +27,7 @@ module VX_alu_unit #(
VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS] VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS]
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
localparam BLOCK_SIZE = `NUM_ALU_BLOCKS; localparam BLOCK_SIZE = `NUM_ALU_BLOCKS;
localparam NUM_LANES = `NUM_ALU_LANES; localparam NUM_LANES = `NUM_ALU_LANES;
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
@ -75,7 +75,7 @@ module VX_alu_unit #(
`RESET_RELAY (int_reset, block_reset); `RESET_RELAY (int_reset, block_reset);
VX_alu_int #( VX_alu_int #(
.CORE_ID (CORE_ID), .INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
.BLOCK_IDX (block_idx), .BLOCK_IDX (block_idx),
.NUM_LANES (NUM_LANES) .NUM_LANES (NUM_LANES)
) alu_int ( ) alu_int (
@ -90,59 +90,61 @@ module VX_alu_unit #(
VX_execute_if #( VX_execute_if #(
.NUM_LANES (NUM_LANES) .NUM_LANES (NUM_LANES)
) mdv_execute_if(); ) muldiv_execute_if();
VX_commit_if #( VX_commit_if #(
.NUM_LANES (NUM_LANES) .NUM_LANES (NUM_LANES)
) mdv_commit_if(); ) muldiv_commit_if();
assign mdv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op; assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
assign mdv_execute_if.data = per_block_execute_if[block_idx].data; assign muldiv_execute_if.data = per_block_execute_if[block_idx].data;
`RESET_RELAY (mdv_reset, block_reset); `RESET_RELAY (muldiv_reset, block_reset);
VX_alu_muldiv #( VX_alu_muldiv #(
.CORE_ID (CORE_ID), .INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
.NUM_LANES (NUM_LANES) .NUM_LANES (NUM_LANES)
) mdv_unit ( ) muldiv_unit (
.clk (clk), .clk (clk),
.reset (mdv_reset), .reset (muldiv_reset),
.execute_if (mdv_execute_if), .execute_if (muldiv_execute_if),
.commit_if (mdv_commit_if) .commit_if (muldiv_commit_if)
); );
`endif `endif
assign per_block_execute_if[block_idx].ready = assign per_block_execute_if[block_idx].ready =
`ifdef EXT_M_ENABLE `ifdef EXT_M_ENABLE
is_muldiv_op ? mdv_execute_if.ready : is_muldiv_op ? muldiv_execute_if.ready :
`endif `endif
int_execute_if.ready; int_execute_if.ready;
// send response // send response
`RESET_RELAY (arb_reset, block_reset);
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (RSP_ARB_SIZE), .NUM_INPUTS (RSP_ARB_SIZE),
.DATAW (RSP_ARB_DATAW), .DATAW (RSP_ARB_DATAW),
.OUT_BUF (PARTIAL_BW ? 1 : 3) .OUT_BUF (PARTIAL_BW ? 1 : 3)
) rsp_arb ( ) rsp_arb (
.clk (clk), .clk (clk),
.reset (block_reset), .reset (arb_reset),
.valid_in ({ .valid_in ({
`ifdef EXT_M_ENABLE `ifdef EXT_M_ENABLE
mdv_commit_if.valid, muldiv_commit_if.valid,
`endif `endif
int_commit_if.valid int_commit_if.valid
}), }),
.ready_in ({ .ready_in ({
`ifdef EXT_M_ENABLE `ifdef EXT_M_ENABLE
mdv_commit_if.ready, muldiv_commit_if.ready,
`endif `endif
int_commit_if.ready int_commit_if.ready
}), }),
.data_in ({ .data_in ({
`ifdef EXT_M_ENABLE `ifdef EXT_M_ENABLE
mdv_commit_if.data, muldiv_commit_if.data,
`endif `endif
int_commit_if.data int_commit_if.data
}), }),

View file

@ -13,8 +13,8 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_commit import VX_gpu_pkg::*; #( module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
parameter CORE_ID = 0 parameter `STRING INSTANCE_ID = ""
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -27,7 +27,7 @@ module VX_commit import VX_gpu_pkg::*; #(
VX_commit_csr_if.master commit_csr_if, VX_commit_csr_if.master commit_csr_if,
VX_commit_sched_if.master commit_sched_if VX_commit_sched_if.master commit_sched_if
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1; localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1;
localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1); localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1);
localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1; localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1;
@ -36,12 +36,10 @@ module VX_commit import VX_gpu_pkg::*; #(
VX_commit_if commit_arb_if[`ISSUE_WIDTH](); VX_commit_if commit_arb_if[`ISSUE_WIDTH]();
wire [`ISSUE_WIDTH-1:0] commit_fire; wire [`ISSUE_WIDTH-1:0] per_issue_commit_fire;
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] commit_wid; wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] per_issue_commit_wid;
wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] commit_tmask; wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] per_issue_commit_tmask;
wire [`ISSUE_WIDTH-1:0] commit_eop; wire [`ISSUE_WIDTH-1:0] per_issue_commit_eop;
`RESET_RELAY (arb_reset, reset);
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
@ -55,6 +53,8 @@ module VX_commit import VX_gpu_pkg::*; #(
assign commit_if[j * `ISSUE_WIDTH + i].ready = ready_in[j]; assign commit_if[j * `ISSUE_WIDTH + i].ready = ready_in[j];
end end
`RESET_RELAY (arb_reset, reset);
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (`NUM_EX_UNITS), .NUM_INPUTS (`NUM_EX_UNITS),
.DATAW (DATAW), .DATAW (DATAW),
@ -72,10 +72,10 @@ module VX_commit import VX_gpu_pkg::*; #(
`UNUSED_PIN (sel_out) `UNUSED_PIN (sel_out)
); );
assign commit_fire[i] = commit_arb_if[i].valid && commit_arb_if[i].ready; assign per_issue_commit_fire[i] = commit_arb_if[i].valid && commit_arb_if[i].ready;
assign commit_tmask[i]= {`NUM_THREADS{commit_fire[i]}} & commit_arb_if[i].data.tmask; assign per_issue_commit_tmask[i]= {`NUM_THREADS{per_issue_commit_fire[i]}} & commit_arb_if[i].data.tmask;
assign commit_wid[i] = commit_arb_if[i].data.wid; assign per_issue_commit_wid[i] = commit_arb_if[i].data.wid;
assign commit_eop[i] = commit_arb_if[i].data.eop; assign per_issue_commit_eop[i] = commit_arb_if[i].data.eop;
end end
// CSRs update // CSRs update
@ -84,11 +84,11 @@ module VX_commit import VX_gpu_pkg::*; #(
wire [COMMIT_ALL_SIZEW-1:0] commit_size_all_r, commit_size_all_rr; wire [COMMIT_ALL_SIZEW-1:0] commit_size_all_r, commit_size_all_rr;
wire commit_fire_any, commit_fire_any_r, commit_fire_any_rr; wire commit_fire_any, commit_fire_any_r, commit_fire_any_rr;
assign commit_fire_any = (| commit_fire); assign commit_fire_any = (| per_issue_commit_fire);
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
wire [COMMIT_SIZEW-1:0] count; wire [COMMIT_SIZEW-1:0] count;
`POP_COUNT(count, commit_tmask[i]); `POP_COUNT(count, per_issue_commit_tmask[i]);
assign commit_size[i] = count; assign commit_size[i] = count;
end end
@ -136,19 +136,28 @@ module VX_commit import VX_gpu_pkg::*; #(
end end
assign commit_csr_if.instret = instret; assign commit_csr_if.instret = instret;
// Committed instructions // Track committed instructions
wire [`ISSUE_WIDTH-1:0] committed = commit_fire & commit_eop; reg [`NUM_WARPS-1:0] committed_warps;
always @(*) begin
committed_warps = 0;
for (integer i = 0; i < `ISSUE_WIDTH; ++i) begin
if (per_issue_commit_fire[i] && per_issue_commit_eop[i]) begin
committed_warps[per_issue_commit_wid[i]] = 1;
end
end
end
VX_pipe_register #( VX_pipe_register #(
.DATAW (`ISSUE_WIDTH * (1 + `NW_WIDTH)), .DATAW (`NUM_WARPS),
.RESETW (`ISSUE_WIDTH) .RESETW (`NUM_WARPS)
) committed_pipe_reg ( ) committed_pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (1'b1), .enable (1'b1),
.data_in ({committed, commit_wid}), .data_in (committed_warps),
.data_out ({commit_sched_if.committed, commit_sched_if.committed_wid}) .data_out ({commit_sched_if.committed_warps})
); );
// Writeback // Writeback
@ -171,7 +180,7 @@ module VX_commit import VX_gpu_pkg::*; #(
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
always @(posedge clk) begin always @(posedge clk) begin
if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})); `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}));
trace_ex_type(1, j); trace_ex_type(1, j);
`TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop)); `TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop));
`TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS); `TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS);

View file

@ -18,7 +18,8 @@
`endif `endif
module VX_core import VX_gpu_pkg::*; #( module VX_core import VX_gpu_pkg::*; #(
parameter CORE_ID = 0 parameter CORE_ID = 0,
parameter `STRING INSTANCE_ID = ""
) ( ) (
`SCOPE_IO_DECL `SCOPE_IO_DECL
@ -94,13 +95,14 @@ module VX_core import VX_gpu_pkg::*; #(
`SCOPE_IO_SWITCH (3) `SCOPE_IO_SWITCH (3)
VX_schedule #( VX_schedule #(
.INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)),
.CORE_ID (CORE_ID) .CORE_ID (CORE_ID)
) schedule ( ) schedule (
.clk (clk), .clk (clk),
.reset (schedule_reset), .reset (schedule_reset),
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
.perf_schedule_if (pipeline_perf_if.schedule), .sched_perf (pipeline_perf_if.sched),
`endif `endif
.base_dcrs (base_dcrs), .base_dcrs (base_dcrs),
@ -121,7 +123,7 @@ module VX_core import VX_gpu_pkg::*; #(
); );
VX_fetch #( VX_fetch #(
.CORE_ID (CORE_ID) .INSTANCE_ID ($sformatf("%s-fetch", INSTANCE_ID))
) fetch ( ) fetch (
`SCOPE_IO_BIND (0) `SCOPE_IO_BIND (0)
.clk (clk), .clk (clk),
@ -132,7 +134,7 @@ module VX_core import VX_gpu_pkg::*; #(
); );
VX_decode #( VX_decode #(
.CORE_ID (CORE_ID) .INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID))
) decode ( ) decode (
.clk (clk), .clk (clk),
.reset (decode_reset), .reset (decode_reset),
@ -142,7 +144,7 @@ module VX_core import VX_gpu_pkg::*; #(
); );
VX_issue #( VX_issue #(
.CORE_ID (CORE_ID) .INSTANCE_ID ($sformatf("%s-issue", INSTANCE_ID))
) issue ( ) issue (
`SCOPE_IO_BIND (1) `SCOPE_IO_BIND (1)
@ -150,7 +152,7 @@ module VX_core import VX_gpu_pkg::*; #(
.reset (issue_reset), .reset (issue_reset),
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
.perf_issue_if (pipeline_perf_if.issue), .issue_perf (pipeline_perf_if.issue),
`endif `endif
.decode_if (decode_if), .decode_if (decode_if),
@ -159,6 +161,7 @@ module VX_core import VX_gpu_pkg::*; #(
); );
VX_execute #( VX_execute #(
.INSTANCE_ID ($sformatf("%s-execute", INSTANCE_ID)),
.CORE_ID (CORE_ID) .CORE_ID (CORE_ID)
) execute ( ) execute (
`SCOPE_IO_BIND (2) `SCOPE_IO_BIND (2)
@ -186,7 +189,7 @@ module VX_core import VX_gpu_pkg::*; #(
); );
VX_commit #( VX_commit #(
.CORE_ID (CORE_ID) .INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID))
) commit ( ) commit (
.clk (clk), .clk (clk),
.reset (commit_reset), .reset (commit_reset),
@ -210,7 +213,7 @@ module VX_core import VX_gpu_pkg::*; #(
`RESET_RELAY (lmem_unit_reset, reset); `RESET_RELAY (lmem_unit_reset, reset);
VX_lmem_unit #( VX_lmem_unit #(
.CORE_ID (CORE_ID) .INSTANCE_ID (INSTANCE_ID)
) lmem_unit ( ) lmem_unit (
.clk (clk), .clk (clk),
.reset (lmem_unit_reset), .reset (lmem_unit_reset),
@ -229,20 +232,20 @@ module VX_core import VX_gpu_pkg::*; #(
`endif `endif
VX_lsu_mem_if #( for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
.NUM_LANES (DCACHE_CHANNELS),
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH)
) dcache_coalesced_if[`NUM_LSU_BLOCKS]();
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin VX_lsu_mem_if #(
.NUM_LANES (DCACHE_CHANNELS),
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH)
) dcache_coalesced_if();
`RESET_RELAY (coalescer_reset, reset); if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin `RESET_RELAY (mem_coalescer_reset, reset);
VX_mem_coalescer #( VX_mem_coalescer #(
.INSTANCE_ID ($sformatf("core%0d-coalescer", CORE_ID)), .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)),
.NUM_REQS (`NUM_LSU_LANES), .NUM_REQS (`NUM_LSU_LANES),
.DATA_IN_SIZE (LSU_WORD_SIZE), .DATA_IN_SIZE (LSU_WORD_SIZE),
.DATA_OUT_SIZE (DCACHE_WORD_SIZE), .DATA_OUT_SIZE (DCACHE_WORD_SIZE),
@ -251,9 +254,9 @@ module VX_core import VX_gpu_pkg::*; #(
.TAG_WIDTH (LSU_TAG_WIDTH), .TAG_WIDTH (LSU_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH), .UUID_WIDTH (`UUID_WIDTH),
.QUEUE_SIZE (`LSUQ_OUT_SIZE) .QUEUE_SIZE (`LSUQ_OUT_SIZE)
) coalescer ( ) mem_coalescer (
.clk (clk), .clk (clk),
.reset (coalescer_reset), .reset (mem_coalescer_reset),
// Input request // Input request
.in_req_valid (lsu_dcache_if[i].req_valid), .in_req_valid (lsu_dcache_if[i].req_valid),
@ -274,42 +277,37 @@ module VX_core import VX_gpu_pkg::*; #(
.in_rsp_ready (lsu_dcache_if[i].rsp_ready), .in_rsp_ready (lsu_dcache_if[i].rsp_ready),
// Output request // Output request
.out_req_valid (dcache_coalesced_if[i].req_valid), .out_req_valid (dcache_coalesced_if.req_valid),
.out_req_mask (dcache_coalesced_if[i].req_data.mask), .out_req_mask (dcache_coalesced_if.req_data.mask),
.out_req_rw (dcache_coalesced_if[i].req_data.rw), .out_req_rw (dcache_coalesced_if.req_data.rw),
.out_req_byteen (dcache_coalesced_if[i].req_data.byteen), .out_req_byteen (dcache_coalesced_if.req_data.byteen),
.out_req_addr (dcache_coalesced_if[i].req_data.addr), .out_req_addr (dcache_coalesced_if.req_data.addr),
.out_req_atype (dcache_coalesced_if[i].req_data.atype), .out_req_atype (dcache_coalesced_if.req_data.atype),
.out_req_data (dcache_coalesced_if[i].req_data.data), .out_req_data (dcache_coalesced_if.req_data.data),
.out_req_tag (dcache_coalesced_if[i].req_data.tag), .out_req_tag (dcache_coalesced_if.req_data.tag),
.out_req_ready (dcache_coalesced_if[i].req_ready), .out_req_ready (dcache_coalesced_if.req_ready),
// Output response // Output response
.out_rsp_valid (dcache_coalesced_if[i].rsp_valid), .out_rsp_valid (dcache_coalesced_if.rsp_valid),
.out_rsp_mask (dcache_coalesced_if[i].rsp_data.mask), .out_rsp_mask (dcache_coalesced_if.rsp_data.mask),
.out_rsp_data (dcache_coalesced_if[i].rsp_data.data), .out_rsp_data (dcache_coalesced_if.rsp_data.data),
.out_rsp_tag (dcache_coalesced_if[i].rsp_data.tag), .out_rsp_tag (dcache_coalesced_if.rsp_data.tag),
.out_rsp_ready (dcache_coalesced_if[i].rsp_ready) .out_rsp_ready (dcache_coalesced_if.rsp_ready)
); );
end else begin
`ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]);
end end
end else begin
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
`ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if[i], lsu_dcache_if[i]);
end
end
`RESET_RELAY (lsu_adapter_reset, reset);
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
VX_mem_bus_if #( VX_mem_bus_if #(
.DATA_SIZE (DCACHE_WORD_SIZE), .DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH) .TAG_WIDTH (DCACHE_TAG_WIDTH)
) dcache_bus_tmp_if[DCACHE_CHANNELS](); ) dcache_bus_tmp_if[DCACHE_CHANNELS]();
`RESET_RELAY (lsu_adapter_reset, reset);
VX_lsu_adapter #( VX_lsu_adapter #(
.NUM_LANES (DCACHE_CHANNELS), .NUM_LANES (DCACHE_CHANNELS),
.DATA_SIZE (DCACHE_WORD_SIZE), .DATA_SIZE (DCACHE_WORD_SIZE),
@ -320,15 +318,17 @@ module VX_core import VX_gpu_pkg::*; #(
) lsu_adapter ( ) lsu_adapter (
.clk (clk), .clk (clk),
.reset (lsu_adapter_reset), .reset (lsu_adapter_reset),
.lsu_mem_if (dcache_coalesced_if[i]), .lsu_mem_if (dcache_coalesced_if),
.mem_bus_if (dcache_bus_tmp_if) .mem_bus_if (dcache_bus_tmp_if)
); );
for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]); `ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]);
end end
end end
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle; wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle;

View file

@ -144,6 +144,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
`endif `endif
VX_core #( VX_core #(
.INSTANCE_ID ($sformatf("core")),
.CORE_ID (CORE_ID) .CORE_ID (CORE_ID)
) core ( ) core (
`SCOPE_IO_BIND (0) `SCOPE_IO_BIND (0)

View file

@ -26,13 +26,13 @@
addr+12'h80 : dst = 32'(src[$bits(src)-1:32]) addr+12'h80 : dst = 32'(src[$bits(src)-1:32])
`endif `endif
module VX_csr_data module VX_csr_data
import VX_gpu_pkg::*; import VX_gpu_pkg::*;
`ifdef EXT_F_ENABLE `ifdef EXT_F_ENABLE
import VX_fpu_pkg::*; import VX_fpu_pkg::*;
`endif `endif
#( #(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
input wire clk, input wire clk,
@ -147,7 +147,7 @@ import VX_fpu_pkg::*;
mscratch <= write_data; mscratch <= write_data;
end end
default: begin default: begin
`ASSERT(0, ("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid)); `ASSERT(0, ("%t: *** %s invalid CSR write address: %0h (#%0d)", $time, INSTANCE_ID, write_addr, write_uuid));
end end
endcase endcase
end end
@ -212,21 +212,21 @@ import VX_fpu_pkg::*;
`VX_DCR_MPM_CLASS_CORE: begin `VX_DCR_MPM_CLASS_CORE: begin
case (read_addr) case (read_addr)
// PERF: pipeline // PERF: pipeline
`CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched_idles); `CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched.idles);
`CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched_stalls); `CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched.stalls);
`CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.ibf_stalls); `CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.issue.ibf_stalls);
`CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.scb_stalls); `CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.issue.scb_stalls);
`CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_ALU]); `CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_r, pipeline_perf_if.issue.opd_stalls);
`CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_ALU]);
`ifdef EXT_F_ENABLE `ifdef EXT_F_ENABLE
`CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_FPU]); `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_FPU]);
`else `else
`VX_CSR_MPM_SCRB_FPU : read_data_ro_r = '0; `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, `PERF_CTR_BITS'(0));
`VX_CSR_MPM_SCRB_FPU_H : read_data_ro_r = '0;
`endif `endif
`CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_LSU]); `CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_LSU]);
`CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_SFU]); `CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_SFU]);
`CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.sfu_uses[`SFU_CSRS]); `CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]);
`CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.sfu_uses[`SFU_WCTL]); `CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]);
// PERF: memory // PERF: memory
`CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_r, pipeline_perf_if.ifetches); `CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_r, pipeline_perf_if.ifetches);
`CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_r, pipeline_perf_if.loads); `CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_r, pipeline_perf_if.loads);

View file

@ -14,6 +14,7 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_csr_unit import VX_gpu_pkg::*; #( module VX_csr_unit import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0, parameter CORE_ID = 0,
parameter NUM_LANES = 1 parameter NUM_LANES = 1
) ( ) (
@ -36,7 +37,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
VX_execute_if.slave execute_if, VX_execute_if.slave execute_if,
VX_commit_if.master commit_if VX_commit_if.master commit_if
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS); localparam PID_WIDTH = `UP(PID_BITS);
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
@ -72,7 +73,8 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
wire csr_write_enable = (execute_if.data.op_type == `INST_SFU_CSRRW); wire csr_write_enable = (execute_if.data.op_type == `INST_SFU_CSRRW);
VX_csr_data #( VX_csr_data #(
.CORE_ID (CORE_ID) .INSTANCE_ID (INSTANCE_ID),
.CORE_ID (CORE_ID)
) csr_data ( ) csr_data (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View file

@ -12,9 +12,8 @@
// limitations under the License. // limitations under the License.
`include "VX_define.vh" `include "VX_define.vh"
`include "VX_trace.vh"
module VX_dcr_data import VX_gpu_pkg::*; ( module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; (
input wire clk, input wire clk,
input wire reset, input wire reset,

View file

@ -12,7 +12,6 @@
// limitations under the License. // limitations under the License.
`include "VX_define.vh" `include "VX_define.vh"
`include "VX_trace.vh"
`ifdef EXT_F_ENABLE `ifdef EXT_F_ENABLE
`define USED_IREG(x) \ `define USED_IREG(x) \
@ -28,8 +27,8 @@
use_``x = 1 use_``x = 1
`endif `endif
module VX_decode import VX_gpu_pkg::*; #( module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
parameter CORE_ID = 0 parameter `STRING INSTANCE_ID = ""
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -44,7 +43,7 @@ module VX_decode import VX_gpu_pkg::*; #(
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + (`NR_BITS * 4); localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + (`NR_BITS * 4);
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
@ -145,6 +144,12 @@ module VX_decode import VX_gpu_pkg::*; #(
end end
`endif `endif
`STATIC_ASSERT($bits(alu_args_t) == $bits(op_args_t), ("alu_args_t size mismatch: current=%0d, expected=%0d", $bits(alu_args_t), $bits(op_args_t)));
`STATIC_ASSERT($bits(fpu_args_t) == $bits(op_args_t), ("fpu_args_t size mismatch: current=%0d, expected=%0d", $bits(fpu_args_t), $bits(op_args_t)));
`STATIC_ASSERT($bits(lsu_args_t) == $bits(op_args_t), ("lsu_args_t size mismatch: current=%0d, expected=%0d", $bits(lsu_args_t), $bits(op_args_t)));
`STATIC_ASSERT($bits(csr_args_t) == $bits(op_args_t), ("csr_args_t size mismatch: current=%0d, expected=%0d", $bits(csr_args_t), $bits(op_args_t)));
`STATIC_ASSERT($bits(wctl_args_t) == $bits(op_args_t), ("wctl_args_t size mismatch: current=%0d, expected=%0d", $bits(wctl_args_t), $bits(op_args_t)));
always @(*) begin always @(*) begin
ex_type = '0; ex_type = '0;
@ -552,7 +557,7 @@ module VX_decode import VX_gpu_pkg::*; #(
`ifdef DBG_TRACE_PIPELINE `ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin always @(posedge clk) begin
if (decode_if.valid && decode_if.ready) begin if (decode_if.valid && decode_if.ready) begin
`TRACE(1, ("%d: core%0d-decode: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, CORE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr)); `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr));
trace_ex_type(1, decode_if.data.ex_type); trace_ex_type(1, decode_if.data.ex_type);
`TRACE(1, (", op=")); `TRACE(1, (", op="));
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args); trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);

View file

@ -12,10 +12,9 @@
// limitations under the License. // limitations under the License.
`include "VX_define.vh" `include "VX_define.vh"
`include "VX_trace.vh"
module VX_dispatch import VX_gpu_pkg::*; #( module VX_dispatch import VX_gpu_pkg::*; #(
parameter CORE_ID = 0 parameter `STRING INSTANCE_ID = ""
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -24,12 +23,12 @@ module VX_dispatch import VX_gpu_pkg::*; #(
output wire [`PERF_CTR_BITS-1:0] perf_stalls [`NUM_EX_UNITS], output wire [`PERF_CTR_BITS-1:0] perf_stalls [`NUM_EX_UNITS],
`endif `endif
// inputs // inputs
VX_operands_if.slave operands_if [`ISSUE_WIDTH], VX_operands_if.slave operands_if,
// outputs // outputs
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH] VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS]
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH; localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH;
@ -38,104 +37,71 @@ module VX_dispatch import VX_gpu_pkg::*; #(
assign tids[i] = `NT_WIDTH'(i); assign tids[i] = `NT_WIDTH'(i);
end end
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin wire [`NT_WIDTH-1:0] last_active_tid;
wire [`NT_WIDTH-1:0] last_active_tid; VX_find_first #(
.N (`NUM_THREADS),
.DATAW (`NT_WIDTH),
.REVERSE (1)
) last_tid_select (
.valid_in (operands_if.data.tmask),
.data_in (tids),
.data_out (last_active_tid),
`UNUSED_PIN (valid_out)
);
VX_find_first #( wire [`NUM_EX_UNITS-1:0] operands_reset;
.N (`NUM_THREADS), assign operands_if.ready = operands_reset[operands_if.data.ex_type];
.DATAW (`NT_WIDTH),
.REVERSE (1) for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
) last_tid_select (
.valid_in (operands_if[i].data.tmask), `RESET_RELAY (buffer_reset, reset);
.data_in (tids),
.data_out (last_active_tid), VX_elastic_buffer #(
`UNUSED_PIN (valid_out) .DATAW (DATAW),
.SIZE (2),
.OUT_REG (2), // 2-cycle EB for area reduction
.LUTRAM (1)
) buffer (
.clk (clk),
.reset (buffer_reset),
.valid_in (operands_if.valid && (operands_if.data.ex_type == `EX_BITS'(i))),
.ready_in (operands_reset[i]),
.data_in ({
operands_if.data.uuid,
operands_if.data.wis,
operands_if.data.tmask,
operands_if.data.PC,
operands_if.data.op_type,
operands_if.data.op_args,
operands_if.data.wb,
operands_if.data.rd,
last_active_tid,
operands_if.data.rs1_data,
operands_if.data.rs2_data,
operands_if.data.rs3_data
}),
.data_out (dispatch_if[i].data),
.valid_out (dispatch_if[i].valid),
.ready_out (dispatch_if[i].ready)
); );
wire [`NUM_EX_UNITS-1:0] operands_reset;
`RESET_RELAY (buf_reset, reset);
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (2),
.OUT_REG (2)
) buffer (
.clk (clk),
.reset (buf_reset),
.valid_in (operands_if[i].valid && (operands_if[i].data.ex_type == j)),
.ready_in (operands_reset[j]),
.data_in (`TO_DISPATCH_DATA(operands_if[i].data, last_active_tid)),
.data_out (dispatch_if[j * `ISSUE_WIDTH + i].data),
.valid_out (dispatch_if[j * `ISSUE_WIDTH + i].valid),
.ready_out (dispatch_if[j * `ISSUE_WIDTH + i].ready)
);
end
assign operands_if[i].ready = operands_reset[operands_if[i].data.ex_type];
end end
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
wire [`NUM_EX_UNITS-1:0] perf_unit_stalls_per_cycle, perf_unit_stalls_per_cycle_r;
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_stalls_per_cycle;
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_r; reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_r;
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin wire operands_if_stall = operands_if.valid && ~operands_if.ready;
always @(*) begin
perf_issue_unit_stalls_per_cycle[i] = '0;
if (operands_if[i].valid && ~operands_if[i].ready) begin
perf_issue_unit_stalls_per_cycle[i][operands_if[i].data.ex_type] = 1;
end
end
end
VX_reduce #(
.DATAW_IN (`NUM_EX_UNITS),
.N (`ISSUE_WIDTH),
.OP ("|")
) reduce (
.data_in (perf_issue_unit_stalls_per_cycle),
.data_out (perf_unit_stalls_per_cycle)
);
`BUFFER(perf_unit_stalls_per_cycle_r, perf_unit_stalls_per_cycle);
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
perf_stalls_r[i] <= '0; perf_stalls_r[i] <= '0;
end else begin end else begin
perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(perf_unit_stalls_per_cycle_r[i]); perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(operands_if_stall && operands_if.data.ex_type == `EX_BITS'(i));
end end
end end
end
for (genvar i=0; i < `NUM_EX_UNITS; ++i) begin
assign perf_stalls[i] = perf_stalls_r[i]; assign perf_stalls[i] = perf_stalls_r[i];
end end
`endif `endif
`ifdef DBG_TRACE_PIPELINE
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
always @(posedge clk) begin
if (operands_if[i].valid && operands_if[i].ready) begin
`TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, wis_to_wid(operands_if[i].data.wis, i), {operands_if[i].data.PC, 1'b0}));
trace_ex_type(1, operands_if[i].data.ex_type);
`TRACE(1, (", op="));
trace_ex_op(1, operands_if[i].data.ex_type, operands_if[i].data.op_type, operands_if[i].data.op_args);
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if[i].data.tmask, operands_if[i].data.wb, operands_if[i].data.rd));
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs1_data, `NUM_THREADS);
`TRACE(1, (", rs2_data="));
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs2_data, `NUM_THREADS);
`TRACE(1, (", rs3_data="));
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs3_data, `NUM_THREADS);
trace_op_args(1, operands_if[i].data.ex_type, operands_if[i].data.op_type, operands_if[i].data.op_args);
`TRACE(1, (" (#%0d)\n", operands_if[i].data.uuid));
end
end
end
`endif
endmodule endmodule

View file

@ -14,6 +14,7 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_execute import VX_gpu_pkg::*; #( module VX_execute import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
`SCOPE_IO_DECL `SCOPE_IO_DECL
@ -55,7 +56,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`RESET_RELAY (sfu_reset, reset); `RESET_RELAY (sfu_reset, reset);
VX_alu_unit #( VX_alu_unit #(
.CORE_ID (CORE_ID) .INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID))
) alu_unit ( ) alu_unit (
.clk (clk), .clk (clk),
.reset (alu_reset), .reset (alu_reset),
@ -67,7 +68,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`SCOPE_IO_SWITCH (1) `SCOPE_IO_SWITCH (1)
VX_lsu_unit #( VX_lsu_unit #(
.CORE_ID (CORE_ID) .INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID))
) lsu_unit ( ) lsu_unit (
`SCOPE_IO_BIND (0) `SCOPE_IO_BIND (0)
.clk (clk), .clk (clk),
@ -81,7 +82,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`RESET_RELAY (fpu_reset, reset); `RESET_RELAY (fpu_reset, reset);
VX_fpu_unit #( VX_fpu_unit #(
.CORE_ID (CORE_ID) .INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID))
) fpu_unit ( ) fpu_unit (
.clk (clk), .clk (clk),
.reset (fpu_reset), .reset (fpu_reset),
@ -92,6 +93,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`endif `endif
VX_sfu_unit #( VX_sfu_unit #(
.INSTANCE_ID ($sformatf("%s-sfu", INSTANCE_ID)),
.CORE_ID (CORE_ID) .CORE_ID (CORE_ID)
) sfu_unit ( ) sfu_unit (
.clk (clk), .clk (clk),

View file

@ -14,7 +14,7 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_fetch import VX_gpu_pkg::*; #( module VX_fetch import VX_gpu_pkg::*; #(
parameter CORE_ID = 0 parameter `STRING INSTANCE_ID = ""
) ( ) (
`SCOPE_IO_DECL `SCOPE_IO_DECL
@ -30,7 +30,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
// outputs // outputs
VX_fetch_if.master fetch_if VX_fetch_if.master fetch_if
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
wire icache_req_valid; wire icache_req_valid;
@ -78,9 +78,11 @@ module VX_fetch import VX_gpu_pkg::*; #(
.reset (reset), .reset (reset),
.incr (icache_req_fire && schedule_if.data.wid == i), .incr (icache_req_fire && schedule_if.data.wid == i),
.decr (fetch_if.ibuf_pop[i]), .decr (fetch_if.ibuf_pop[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (alm_empty),
.full (pending_ibuf_full[i]), .full (pending_ibuf_full[i]),
`UNUSED_PIN (size), `UNUSED_PIN (alm_full),
`UNUSED_PIN (empty) `UNUSED_PIN (size)
); );
end end
wire ibuf_ready = ~pending_ibuf_full[schedule_if.data.wid]; wire ibuf_ready = ~pending_ibuf_full[schedule_if.data.wid];
@ -89,7 +91,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
`endif `endif
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0), `RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, {schedule_if.data.PC, 1'b0}, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid)) ("%t: *** %s invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, INSTANCE_ID, {schedule_if.data.PC, 1'b0}, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
// Icache Request // Icache Request
@ -129,45 +131,33 @@ module VX_fetch import VX_gpu_pkg::*; #(
assign icache_bus_if.rsp_ready = fetch_if.ready; assign icache_bus_if.rsp_ready = fetch_if.ready;
`ifdef DBG_SCOPE_FETCH `ifdef DBG_SCOPE_FETCH
if (CORE_ID == 0) begin wire schedule_fire = schedule_if.valid && schedule_if.ready;
`ifdef SCOPE wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
wire schedule_fire = schedule_if.valid && schedule_if.ready; VX_scope_tap #(
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; .SCOPE_ID (1),
VX_scope_tap #( .TRIGGERW (4),
.SCOPE_ID (1), .PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
.TRIGGERW (4), ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
.PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + (ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH)
ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + ) scope_tap (
(ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH) .clk (clk),
) scope_tap ( .reset (scope_reset),
.clk(clk), .start (1'b0),
.reset(scope_reset), .stop (1'b0),
.start(1'b0), .triggers ({
.stop(1'b0), reset,
.triggers({ schedule_fire,
reset, icache_req_fire,
schedule_fire, icache_rsp_fire
icache_req_fire, }),
icache_rsp_fire .probes ({
}), schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
.probes({ icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, }),
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag .bus_in (scope_bus_in),
}), .bus_out (scope_bus_out)
.bus_in(scope_bus_in), );
.bus_out(scope_bus_out)
);
`endif
`ifdef CHIPSCOPE
ila_fetch ila_fetch_inst (
.clk (clk),
.probe0 ({reset, schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, schedule_if.ready, schedule_if.valid}),
.probe1 ({icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.req_ready, icache_bus_if.req_valid}),
.probe2 ({icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid})
);
`endif
end
`else `else
`SCOPE_IO_UNUSED() `SCOPE_IO_UNUSED()
`endif `endif
@ -177,10 +167,10 @@ module VX_fetch import VX_gpu_pkg::*; #(
wire fetch_fire = fetch_if.valid && fetch_if.ready; wire fetch_fire = fetch_if.valid && fetch_if.ready;
always @(posedge clk) begin always @(posedge clk) begin
if (schedule_fire) begin if (schedule_fire) begin
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)); `TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid));
end end
if (fetch_fire) begin if (fetch_fire) begin
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)); `TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
end end
end end
`endif `endif

View file

@ -14,7 +14,7 @@
`include "VX_fpu_define.vh" `include "VX_fpu_define.vh"
module VX_fpu_unit import VX_fpu_pkg::*; #( module VX_fpu_unit import VX_fpu_pkg::*; #(
parameter CORE_ID = 0 parameter `STRING INSTANCE_ID = ""
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -26,7 +26,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
VX_commit_if.master commit_if [`ISSUE_WIDTH], VX_commit_if.master commit_if [`ISSUE_WIDTH],
VX_fpu_csr_if.master fpu_csr_if[`NUM_FPU_BLOCKS] VX_fpu_csr_if.master fpu_csr_if[`NUM_FPU_BLOCKS]
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
localparam BLOCK_SIZE = `NUM_FPU_BLOCKS; localparam BLOCK_SIZE = `NUM_FPU_BLOCKS;
localparam NUM_LANES = `NUM_FPU_LANES; localparam NUM_LANES = `NUM_FPU_LANES;
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
@ -84,12 +84,14 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
wire execute_fire = per_block_execute_if[block_idx].valid && per_block_execute_if[block_idx].ready; wire execute_fire = per_block_execute_if[block_idx].valid && per_block_execute_if[block_idx].ready;
wire fpu_rsp_fire = fpu_rsp_valid && fpu_rsp_ready; wire fpu_rsp_fire = fpu_rsp_valid && fpu_rsp_ready;
`RESET_RELAY (ibuf_reset, block_reset);
VX_index_buffer #( VX_index_buffer #(
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + PID_WIDTH + 1 + 1), .DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + PID_WIDTH + 1 + 1),
.SIZE (`FPUQ_SIZE) .SIZE (`FPUQ_SIZE)
) tag_store ( ) tag_store (
.clk (clk), .clk (clk),
.reset (block_reset), .reset (ibuf_reset),
.acquire_en (execute_fire), .acquire_en (execute_fire),
.write_addr (fpu_req_tag), .write_addr (fpu_req_tag),
.write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}), .write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}),
@ -226,12 +228,14 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
// send response // send response
`RESET_RELAY (rsp_reset, block_reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1), .DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
.SIZE (0) .SIZE (0)
) rsp_buf ( ) rsp_buf (
.clk (clk), .clk (clk),
.reset (block_reset), .reset (rsp_reset),
.valid_in (fpu_rsp_valid), .valid_in (fpu_rsp_valid),
.ready_in (fpu_rsp_ready), .ready_in (fpu_rsp_ready),
.data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}), .data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),

View file

@ -14,33 +14,36 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_ibuffer import VX_gpu_pkg::*; #( module VX_ibuffer import VX_gpu_pkg::*; #(
parameter CORE_ID = 0 parameter `STRING INSTANCE_ID = ""
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
`ifdef PERF_ENABLE
output wire [`PERF_CTR_BITS-1:0] perf_stalls,
`endif
// inputs // inputs
VX_decode_if.slave decode_if, VX_decode_if.slave decode_if,
// outputs // outputs
VX_ibuffer_if.master ibuffer_if [`NUM_WARPS] VX_ibuffer_if.master ibuffer_if [PER_ISSUE_WARPS]
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4); localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4);
wire [`NUM_WARPS-1:0] ibuf_ready_in; wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in;
assign decode_if.ready = ibuf_ready_in[decode_if.data.wid]; assign decode_if.ready = ibuf_ready_in[decode_if.data.wid];
for (genvar i = 0; i < `NUM_WARPS; ++i) begin for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (`IBUF_SIZE), .SIZE (`IBUF_SIZE),
.OUT_REG (2) // use a 2-cycle FIFO .OUT_REG (2) // 2-cycle EB for area reduction
) instr_buf ( ) instr_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (decode_if.valid && decode_if.data.wid == i), .valid_in (decode_if.valid && decode_if.data.wid == ISSUE_WIS_W'(w)),
.data_in ({ .data_in ({
decode_if.data.uuid, decode_if.data.uuid,
decode_if.data.tmask, decode_if.data.tmask,
@ -52,15 +55,32 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
decode_if.data.rd, decode_if.data.rd,
decode_if.data.rs1, decode_if.data.rs1,
decode_if.data.rs2, decode_if.data.rs2,
decode_if.data.rs3}), decode_if.data.rs3
.ready_in (ibuf_ready_in[i]), }),
.valid_out(ibuffer_if[i].valid), .ready_in (ibuf_ready_in[w]),
.data_out (ibuffer_if[i].data), .valid_out(ibuffer_if[w].valid),
.ready_out(ibuffer_if[i].ready) .data_out (ibuffer_if[w].data),
.ready_out(ibuffer_if[w].ready)
); );
`ifndef L1_ENABLE `ifndef L1_ENABLE
assign decode_if.ibuf_pop[i] = ibuffer_if[i].valid && ibuffer_if[i].ready; assign decode_if.ibuf_pop[w] = ibuffer_if[w].valid && ibuffer_if[w].ready;
`endif `endif
end end
`ifdef PERF_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
wire decode_if_stall = decode_if.valid && ~decode_if.ready;
always @(posedge clk) begin
if (reset) begin
perf_ibf_stalls <= '0;
end else begin
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(decode_if_stall);
end
end
assign perf_stalls = perf_ibf_stalls;
`endif
endmodule endmodule

View file

@ -12,10 +12,9 @@
// limitations under the License. // limitations under the License.
`include "VX_define.vh" `include "VX_define.vh"
`include "VX_trace.vh"
module VX_issue #( module VX_issue import VX_gpu_pkg::*; #(
parameter CORE_ID = 0 parameter `STRING INSTANCE_ID = ""
) ( ) (
`SCOPE_IO_DECL `SCOPE_IO_DECL
@ -23,137 +22,81 @@ module VX_issue #(
input wire reset, input wire reset,
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
VX_pipeline_perf_if.issue perf_issue_if, output issue_perf_t issue_perf,
`endif `endif
VX_decode_if.slave decode_if, VX_decode_if.slave decode_if,
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH], VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH] VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH]
); );
VX_ibuffer_if ibuffer_if [`NUM_WARPS]();
VX_scoreboard_if scoreboard_if [`ISSUE_WIDTH]();
VX_operands_if operands_if [`ISSUE_WIDTH]();
`RESET_RELAY (ibuf_reset, reset);
`RESET_RELAY (scoreboard_reset, reset);
`RESET_RELAY (operands_reset, reset);
`RESET_RELAY (dispatch_reset, reset);
VX_ibuffer #(
.CORE_ID (CORE_ID)
) ibuffer (
.clk (clk),
.reset (ibuf_reset),
.decode_if (decode_if),
.ibuffer_if (ibuffer_if)
);
VX_scoreboard #(
.CORE_ID (CORE_ID)
) scoreboard (
.clk (clk),
.reset (scoreboard_reset),
`ifdef PERF_ENABLE
.perf_scb_stalls(perf_issue_if.scb_stalls),
.perf_units_uses(perf_issue_if.units_uses),
.perf_sfu_uses (perf_issue_if.sfu_uses),
`endif
.writeback_if (writeback_if),
.ibuffer_if (ibuffer_if),
.scoreboard_if (scoreboard_if)
);
VX_operands #(
.CORE_ID (CORE_ID)
) operands (
.clk (clk),
.reset (operands_reset),
.writeback_if (writeback_if),
.scoreboard_if (scoreboard_if),
.operands_if (operands_if)
);
VX_dispatch #(
.CORE_ID (CORE_ID)
) dispatch (
.clk (clk),
.reset (dispatch_reset),
`ifdef PERF_ENABLE
`UNUSED_PIN (perf_stalls),
`endif
.operands_if (operands_if),
.dispatch_if (dispatch_if)
);
`ifdef DBG_SCOPE_ISSUE
if (CORE_ID == 0) begin
`ifdef SCOPE
wire operands_if_fire = operands_if[0].valid && operands_if[0].ready;
wire operands_if_not_ready = ~operands_if[0].ready;
wire writeback_if_valid = writeback_if[0].valid;
VX_scope_tap #(
.SCOPE_ID (2),
.TRIGGERW (4),
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS +
1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) +
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
) scope_tap (
.clk(clk),
.reset(scope_reset),
.start(1'b0),
.stop(1'b0),
.triggers({
reset,
operands_if_fire,
operands_if_not_ready,
writeback_if_valid
}),
.probes({
operands_if[0].data.uuid,
operands_if[0].data.tmask,
operands_if[0].data.ex_type,
operands_if[0].data.op_type,
operands_if[0].data.wb,
operands_if[0].data.rd,
operands_if[0].data.rs1_data,
operands_if[0].data.rs2_data,
operands_if[0].data.rs3_data,
writeback_if[0].data.uuid,
writeback_if[0].data.tmask,
writeback_if[0].data.rd,
writeback_if[0].data.data,
writeback_if[0].data.eop
}),
.bus_in(scope_bus_in),
.bus_out(scope_bus_out)
);
`endif
`ifdef CHIPSCOPE
ila_issue ila_issue_inst (
.clk (clk),
.probe0 ({operands_if.uuid, ibuffer.rs3, ibuffer.rs2, ibuffer.rs1, operands_if.PC, operands_if.tmask, operands_if.wid, operands_if.ex_type, operands_if.op_type, operands_if.ready, operands_if.valid}),
.probe1 ({writeback_if.uuid, writeback_if.data[0], writeback_if.PC, writeback_if.tmask, writeback_if.wid, writeback_if.eop, writeback_if.valid})
);
`endif
end
`else
`SCOPE_IO_UNUSED()
`endif
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls; issue_perf_t per_issue_perf [`ISSUE_WIDTH];
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, ibf_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
wire decode_stall = decode_if.valid && ~decode_if.ready; `PERF_COUNTER_ADD (issue_perf, per_issue_perf, scb_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
`PERF_COUNTER_ADD (issue_perf, per_issue_perf, opd_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
always @(posedge clk) begin for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
if (reset) begin `PERF_COUNTER_ADD (issue_perf, per_issue_perf, units_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
perf_ibf_stalls <= '0; end
end else begin for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(decode_stall); `PERF_COUNTER_ADD (issue_perf, per_issue_perf, sfu_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2))
end
end end
assign perf_issue_if.ibf_stalls = perf_ibf_stalls;
`endif `endif
wire [ISSUE_ISW_W-1:0] decode_isw = wid_to_isw(decode_if.data.wid);
wire [ISSUE_WIS_W-1:0] decode_wis = wid_to_wis(decode_if.data.wid);
wire [`ISSUE_WIDTH-1:0] decode_ready_in;
assign decode_if.ready = decode_ready_in[decode_isw];
`SCOPE_IO_SWITCH (`ISSUE_WIDTH)
for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : issue_slices
VX_decode_if #(
.NUM_WARPS (PER_ISSUE_WARPS)
) per_issue_decode_if();
VX_dispatch_if per_issue_dispatch_if[`NUM_EX_UNITS]();
assign per_issue_decode_if.valid = decode_if.valid && (decode_isw == ISSUE_ISW_W'(issue_id));
assign per_issue_decode_if.data.uuid = decode_if.data.uuid;
assign per_issue_decode_if.data.wid = decode_wis;
assign per_issue_decode_if.data.tmask = decode_if.data.tmask;
assign per_issue_decode_if.data.PC = decode_if.data.PC;
assign per_issue_decode_if.data.ex_type = decode_if.data.ex_type;
assign per_issue_decode_if.data.op_type = decode_if.data.op_type;
assign per_issue_decode_if.data.op_args = decode_if.data.op_args;
assign per_issue_decode_if.data.wb = decode_if.data.wb;
assign per_issue_decode_if.data.rd = decode_if.data.rd;
assign per_issue_decode_if.data.rs1 = decode_if.data.rs1;
assign per_issue_decode_if.data.rs2 = decode_if.data.rs2;
assign per_issue_decode_if.data.rs3 = decode_if.data.rs3;
assign decode_ready_in[issue_id] = per_issue_decode_if.ready;
`ifndef L1_ENABLE
assign decode_if.ibuf_pop[issue_id * PER_ISSUE_WARPS +: PER_ISSUE_WARPS] = per_issue_decode_if.ibuf_pop;
`endif
`RESET_RELAY (slice_reset, reset);
VX_issue_slice #(
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)),
.ISSUE_ID (issue_id)
) issue_slice (
`SCOPE_IO_BIND(issue_id)
.clk (clk),
.reset (slice_reset),
`ifdef PERF_ENABLE
.issue_perf (per_issue_perf[issue_id]),
`endif
.decode_if (per_issue_decode_if),
.writeback_if (writeback_if[issue_id]),
.dispatch_if (per_issue_dispatch_if)
);
// Assign transposed dispatch_if
for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin
`ASSIGN_VX_IF(dispatch_if[ex_id * `ISSUE_WIDTH + issue_id], per_issue_dispatch_if[ex_id]);
end
end
endmodule endmodule

View file

@ -0,0 +1,159 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0
) (
`SCOPE_IO_DECL
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output issue_perf_t issue_perf,
`endif
VX_decode_if.slave decode_if,
VX_writeback_if.slave writeback_if,
VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS]
);
`UNUSED_PARAM (ISSUE_ID)
VX_ibuffer_if ibuffer_if [PER_ISSUE_WARPS]();
VX_scoreboard_if scoreboard_if();
VX_operands_if operands_if();
`RESET_RELAY (ibuf_reset, reset);
`RESET_RELAY (scoreboard_reset, reset);
`RESET_RELAY (operands_reset, reset);
`RESET_RELAY (dispatch_reset, reset);
VX_ibuffer #(
.INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID))
) ibuffer (
.clk (clk),
.reset (ibuf_reset),
`ifdef PERF_ENABLE
.perf_stalls (issue_perf.ibf_stalls),
`endif
.decode_if (decode_if),
.ibuffer_if (ibuffer_if)
);
VX_scoreboard #(
.INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID))
) scoreboard (
.clk (clk),
.reset (scoreboard_reset),
`ifdef PERF_ENABLE
.perf_stalls (issue_perf.scb_stalls),
.perf_units_uses(issue_perf.units_uses),
.perf_sfu_uses (issue_perf.sfu_uses),
`endif
.writeback_if (writeback_if),
.ibuffer_if (ibuffer_if),
.scoreboard_if (scoreboard_if)
);
VX_operands #(
.INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID))
) operands (
.clk (clk),
.reset (operands_reset),
`ifdef PERF_ENABLE
.perf_stalls (issue_perf.opd_stalls),
`endif
.writeback_if (writeback_if),
.scoreboard_if (scoreboard_if),
.operands_if (operands_if)
);
VX_dispatch #(
.INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID))
) dispatch (
.clk (clk),
.reset (dispatch_reset),
`ifdef PERF_ENABLE
`UNUSED_PIN (perf_stalls),
`endif
.operands_if (operands_if),
.dispatch_if (dispatch_if)
);
`ifdef DBG_SCOPE_ISSUE
wire operands_if_fire = operands_if.valid && operands_if.ready;
wire operands_if_not_ready = ~operands_if.ready;
wire writeback_if_valid = writeback_if.valid;
VX_scope_tap #(
.SCOPE_ID (2),
.TRIGGERW (4),
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS +
1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) +
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
) scope_tap (
.clk (clk),
.reset (scope_reset),
.start (1'b0),
.stop (1'b0),
.triggers ({
reset,
operands_if_fire,
operands_if_not_ready,
writeback_if_valid
}),
.probes ({
operands_if.data.uuid,
operands_if.data.tmask,
operands_if.data.ex_type,
operands_if.data.op_type,
operands_if.data.wb,
operands_if.data.rd,
operands_if.data.rs1_data,
operands_if.data.rs2_data,
operands_if.data.rs3_data,
writeback_if.data.uuid,
writeback_if.data.tmask,
writeback_if.data.rd,
writeback_if.data.data,
writeback_if.data.eop
}),
.bus_in (scope_bus_in),
.bus_out (scope_bus_out)
);
`else
`SCOPE_IO_UNUSED()
`endif
`ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin
if (operands_if.valid && operands_if.ready) begin
`TRACE(1, ("%d: %s wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0}));
trace_ex_type(1, operands_if.data.ex_type);
`TRACE(1, (", op="));
trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd));
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS);
`TRACE(1, (", rs2_data="));
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS);
`TRACE(1, (", rs3_data="));
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS);
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid));
end
end
`endif
endmodule

132
hw/rtl/core/VX_issue_top.sv Normal file
View file

@ -0,0 +1,132 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_issue_top import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "issue"
) (
// Clock
input wire clk,
input wire reset,
input wire decode_valid,
input wire [`UUID_WIDTH-1:0] decode_uuid,
input wire [`NW_WIDTH-1:0] decode_wid,
input wire [`NUM_THREADS-1:0] decode_tmask,
input wire [`PC_BITS-1:0] decode_PC,
input wire [`EX_BITS-1:0] decode_ex_type,
input wire [`INST_OP_BITS-1:0] decode_op_type,
input op_args_t decode_op_args,
input wire decode_wb,
input wire [`NR_BITS-1:0] decode_rd,
input wire [`NR_BITS-1:0] decode_rs1,
input wire [`NR_BITS-1:0] decode_rs2,
input wire [`NR_BITS-1:0] decode_rs3,
output wire decode_ready,
input wire writeback_valid[`ISSUE_WIDTH],
input wire [`UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH],
input wire [ISSUE_WIS_W-1:0] writeback_wis[`ISSUE_WIDTH],
input wire [`NUM_THREADS-1:0] writeback_tmask[`ISSUE_WIDTH],
input wire [`PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH],
input wire [`NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH],
input wire [`NUM_THREADS-1:0][`XLEN-1:0] writeback_data[`ISSUE_WIDTH],
input wire writeback_sop[`ISSUE_WIDTH],
input wire writeback_eop[`ISSUE_WIDTH],
output wire dispatch_valid[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`UUID_WIDTH-1:0] dispatch_uuid[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [ISSUE_WIS_W-1:0] dispatch_wis[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0] dispatch_tmask[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`PC_BITS-1:0] dispatch_PC[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`INST_ALU_BITS-1:0] dispatch_op_type[`NUM_EX_UNITS * `ISSUE_WIDTH],
output op_args_t dispatch_op_args[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire dispatch_wb[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NR_BITS-1:0] dispatch_rd[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NT_WIDTH-1:0] dispatch_tid[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data[`NUM_EX_UNITS * `ISSUE_WIDTH],
input wire dispatch_ready[`NUM_EX_UNITS * `ISSUE_WIDTH]
);
VX_decode_if decode_if();
VX_dispatch_if dispatch_if[`NUM_EX_UNITS * `ISSUE_WIDTH]();
VX_writeback_if writeback_if[`ISSUE_WIDTH]();
assign decode_if.valid = decode_valid;
assign decode_if.data.uuid = decode_uuid;
assign decode_if.data.wid = decode_wid;
assign decode_if.data.tmask = decode_tmask;
assign decode_if.data.PC = decode_PC;
assign decode_if.data.ex_type = decode_ex_type;
assign decode_if.data.op_type = decode_op_type;
assign decode_if.data.op_args = decode_op_args;
assign decode_if.data.wb = decode_wb;
assign decode_if.data.rd = decode_rd;
assign decode_if.data.rs1 = decode_rs1;
assign decode_if.data.rs2 = decode_rs2;
assign decode_if.data.rs3 = decode_rs3;
assign decode_ready = decode_if.ready;
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
assign writeback_if[i].valid = writeback_valid[i];
assign writeback_if[i].data.uuid = writeback_uuid[i];
assign writeback_if[i].data.wis = writeback_wis[i];
assign writeback_if[i].data.tmask = writeback_tmask[i];
assign writeback_if[i].data.PC = writeback_PC[i];
assign writeback_if[i].data.rd = writeback_rd[i];
assign writeback_if[i].data.data = writeback_data[i];
assign writeback_if[i].data.sop = writeback_sop[i];
assign writeback_if[i].data.eop = writeback_eop[i];
end
for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin
assign dispatch_valid[i] = dispatch_if[i].valid;
assign dispatch_uuid[i] = dispatch_if[i].data.uuid;
assign dispatch_wis[i] = dispatch_if[i].data.wis;
assign dispatch_tmask[i] = dispatch_if[i].data.tmask;
assign dispatch_PC[i] = dispatch_if[i].data.PC;
assign dispatch_op_type[i] = dispatch_if[i].data.op_type;
assign dispatch_op_args[i] = dispatch_if[i].data.op_args;
assign dispatch_wb[i] = dispatch_if[i].data.wb;
assign dispatch_rd[i] = dispatch_if[i].data.rd;
assign dispatch_tid[i] = dispatch_if[i].data.tid;
assign dispatch_rs1_data[i] = dispatch_if[i].data.rs1_data;
assign dispatch_rs2_data[i] = dispatch_if[i].data.rs2_data;
assign dispatch_rs3_data[i] = dispatch_if[i].data.rs3_data;
assign dispatch_if[i].ready = dispatch_ready[i];
end
`ifdef PERF_ENABLE
issue_perf_t issue_perf = '0;
`endif
VX_issue #(
.INSTANCE_ID (INSTANCE_ID)
) issue (
`SCOPE_IO_BIND (0)
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.issue_perf (issue_perf),
`endif
.decode_if (decode_if),
.writeback_if (writeback_if),
.dispatch_if (dispatch_if)
);
endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,11 +14,11 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_lmem_unit import VX_gpu_pkg::*; #( module VX_lmem_unit import VX_gpu_pkg::*; #(
parameter CORE_ID = 0 parameter `STRING INSTANCE_ID = ""
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
output cache_perf_t cache_perf, output cache_perf_t cache_perf,
`endif `endif
@ -37,31 +37,31 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
.NUM_LANES (`NUM_LSU_LANES), .NUM_LANES (`NUM_LSU_LANES),
.DATA_SIZE (LSU_WORD_SIZE), .DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH) .TAG_WIDTH (LSU_TAG_WIDTH)
) lmem_lsu_if[`NUM_LSU_BLOCKS](); ) lsu_switch_if[`NUM_LSU_BLOCKS]();
`RESET_RELAY (req_reset, reset);
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL]; assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL];
end end
wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask); wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask);
wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask); wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask);
wire req_global_ready; wire req_global_ready;
wire req_local_ready; wire req_local_ready;
`RESET_RELAY (switch_reset, reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (REQ_DATAW), .DATAW (REQ_DATAW),
.SIZE (2), .SIZE (2),
.OUT_REG (1) .OUT_REG (1)
) req_global_buf ( ) req_global_buf (
.clk (clk), .clk (clk),
.reset (req_reset), .reset (switch_reset),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global), .valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
.data_in ({ .data_in ({
lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask, lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask,
lsu_mem_in_if[i].req_data.rw, lsu_mem_in_if[i].req_data.rw,
@ -81,7 +81,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
lsu_mem_out_if[i].req_data.atype, lsu_mem_out_if[i].req_data.atype,
lsu_mem_out_if[i].req_data.data, lsu_mem_out_if[i].req_data.data,
lsu_mem_out_if[i].req_data.tag lsu_mem_out_if[i].req_data.tag
}), }),
.ready_out (lsu_mem_out_if[i].req_ready) .ready_out (lsu_mem_out_if[i].req_ready)
); );
@ -91,8 +91,8 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
.OUT_REG (0) .OUT_REG (0)
) req_local_buf ( ) req_local_buf (
.clk (clk), .clk (clk),
.reset (req_reset), .reset (switch_reset),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local), .valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
.data_in ({ .data_in ({
lsu_mem_in_if[i].req_data.mask & is_addr_local_mask, lsu_mem_in_if[i].req_data.mask & is_addr_local_mask,
lsu_mem_in_if[i].req_data.rw, lsu_mem_in_if[i].req_data.rw,
@ -103,73 +103,47 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
lsu_mem_in_if[i].req_data.tag lsu_mem_in_if[i].req_data.tag
}), }),
.ready_in (req_local_ready), .ready_in (req_local_ready),
.valid_out (lmem_lsu_if[i].req_valid), .valid_out (lsu_switch_if[i].req_valid),
.data_out ({ .data_out ({
lmem_lsu_if[i].req_data.mask, lsu_switch_if[i].req_data.mask,
lmem_lsu_if[i].req_data.rw, lsu_switch_if[i].req_data.rw,
lmem_lsu_if[i].req_data.byteen, lsu_switch_if[i].req_data.byteen,
lmem_lsu_if[i].req_data.addr, lsu_switch_if[i].req_data.addr,
lmem_lsu_if[i].req_data.atype, lsu_switch_if[i].req_data.atype,
lmem_lsu_if[i].req_data.data, lsu_switch_if[i].req_data.data,
lmem_lsu_if[i].req_data.tag lsu_switch_if[i].req_data.tag
}), }),
.ready_out (lmem_lsu_if[i].req_ready) .ready_out (lsu_switch_if[i].req_ready)
); );
assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global) assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global)
|| (req_local_ready && is_addr_local); || (req_local_ready && is_addr_local);
end
`RESET_RELAY (rsp_reset, reset); VX_stream_arb #(
.NUM_INPUTS (2),
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin .DATAW (RSP_DATAW),
.ARBITER ("R"),
wire rsp_arb_valid; .OUT_BUF (1)
wire rsp_arb_index; ) rsp_arb (
wire rsp_arb_ready; .clk (clk),
.reset (switch_reset),
VX_generic_arbiter #( .valid_in ({
.NUM_REQS (2), lsu_switch_if[i].rsp_valid,
.LOCK_ENABLE (1),
.TYPE ("R")
) arbiter (
.clk (clk),
.reset (rsp_reset),
.requests ({
lmem_lsu_if[i].rsp_valid,
lsu_mem_out_if[i].rsp_valid lsu_mem_out_if[i].rsp_valid
}), }),
.grant_valid (rsp_arb_valid), .ready_in ({
.grant_index (rsp_arb_index), lsu_switch_if[i].rsp_ready,
`UNUSED_PIN (grant_onehot), lsu_mem_out_if[i].rsp_ready
.grant_unlock(rsp_arb_ready)
);
VX_elastic_buffer #(
.DATAW (RSP_DATAW),
.SIZE (2),
.OUT_REG (0)
) rsp_buf (
.clk (clk),
.reset (rsp_reset),
.valid_in (rsp_arb_valid),
.data_in ({
rsp_arb_index ? lmem_lsu_if[i].rsp_data.mask : lsu_mem_out_if[i].rsp_data.mask,
rsp_arb_index ? lmem_lsu_if[i].rsp_data.data : lsu_mem_out_if[i].rsp_data.data,
rsp_arb_index ? lmem_lsu_if[i].rsp_data.tag : lsu_mem_out_if[i].rsp_data.tag
}), }),
.ready_in (rsp_arb_ready), .data_in ({
lsu_switch_if[i].rsp_data,
lsu_mem_out_if[i].rsp_data
}),
.data_out (lsu_mem_in_if[i].rsp_data),
.valid_out (lsu_mem_in_if[i].rsp_valid), .valid_out (lsu_mem_in_if[i].rsp_valid),
.data_out ({ .ready_out (lsu_mem_in_if[i].rsp_ready),
lsu_mem_in_if[i].rsp_data.mask, `UNUSED_PIN (sel_out)
lsu_mem_in_if[i].rsp_data.data,
lsu_mem_in_if[i].rsp_data.tag
}),
.ready_out (lsu_mem_in_if[i].rsp_ready)
); );
assign lsu_mem_out_if[i].rsp_ready = rsp_arb_ready && ~rsp_arb_index;
assign lmem_lsu_if[i].rsp_ready = rsp_arb_ready && rsp_arb_index;
end end
VX_mem_bus_if #( VX_mem_bus_if #(
@ -177,25 +151,25 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
.TAG_WIDTH (LSU_TAG_WIDTH) .TAG_WIDTH (LSU_TAG_WIDTH)
) lmem_bus_if[LSU_NUM_REQS](); ) lmem_bus_if[LSU_NUM_REQS]();
`RESET_RELAY (adapter_reset, reset); for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
VX_mem_bus_if #( VX_mem_bus_if #(
.DATA_SIZE (LSU_WORD_SIZE), .DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH) .TAG_WIDTH (LSU_TAG_WIDTH)
) lmem_bus_tmp_if[`NUM_LSU_LANES](); ) lmem_bus_tmp_if[`NUM_LSU_LANES]();
`RESET_RELAY (adapter_reset, reset);
VX_lsu_adapter #( VX_lsu_adapter #(
.NUM_LANES (`NUM_LSU_LANES), .NUM_LANES (`NUM_LSU_LANES),
.DATA_SIZE (LSU_WORD_SIZE), .DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH), .TAG_WIDTH (LSU_TAG_WIDTH),
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH), .TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
.REQ_OUT_BUF (2), .REQ_OUT_BUF (3),
.RSP_OUT_BUF (1) .RSP_OUT_BUF (0)
) lsu_adapter ( ) lsu_adapter (
.clk (clk), .clk (clk),
.reset (adapter_reset), .reset (adapter_reset),
.lsu_mem_if (lmem_lsu_if[i]), .lsu_mem_if (lsu_switch_if[i]),
.mem_bus_if (lmem_bus_tmp_if) .mem_bus_if (lmem_bus_tmp_if)
); );
@ -205,17 +179,18 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
end end
`RESET_RELAY (lmem_reset, reset); `RESET_RELAY (lmem_reset, reset);
VX_local_mem #( VX_local_mem #(
.INSTANCE_ID($sformatf("core%0d-lmem", CORE_ID)), .INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)),
.SIZE (1 << `LMEM_LOG_SIZE), .SIZE (1 << `LMEM_LOG_SIZE),
.NUM_REQS (LSU_NUM_REQS), .NUM_REQS (LSU_NUM_REQS),
.NUM_BANKS (`LMEM_NUM_BANKS), .NUM_BANKS (`LMEM_NUM_BANKS),
.WORD_SIZE (LSU_WORD_SIZE), .WORD_SIZE (LSU_WORD_SIZE),
.ADDR_WIDTH (LMEM_ADDR_WIDTH), .ADDR_WIDTH (LMEM_ADDR_WIDTH),
.UUID_WIDTH (`UUID_WIDTH), .UUID_WIDTH (`UUID_WIDTH),
.TAG_WIDTH (LSU_TAG_WIDTH) .TAG_WIDTH (LSU_TAG_WIDTH),
) local_mem ( .OUT_BUF (3)
) local_mem (
.clk (clk), .clk (clk),
.reset (lmem_reset), .reset (lmem_reset),
`ifdef PERF_ENABLE `ifdef PERF_ENABLE

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,10 +14,10 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_lsu_adapter import VX_gpu_pkg::*; #( module VX_lsu_adapter import VX_gpu_pkg::*; #(
parameter NUM_LANES = 1, parameter NUM_LANES = 1,
parameter DATA_SIZE = 1, parameter DATA_SIZE = 1,
parameter TAG_WIDTH = 1, parameter TAG_WIDTH = 1,
parameter TAG_SEL_BITS = 0, parameter TAG_SEL_BITS = 0,
parameter `STRING ARBITER = "P", parameter `STRING ARBITER = "P",
parameter REQ_OUT_BUF = 0, parameter REQ_OUT_BUF = 0,
parameter RSP_OUT_BUF = 0 parameter RSP_OUT_BUF = 0
@ -63,12 +63,12 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #(
assign mem_bus_if[i].req_data.tag = req_tag_out[i]; assign mem_bus_if[i].req_data.tag = req_tag_out[i];
assign req_ready_out[i] = mem_bus_if[i].req_ready; assign req_ready_out[i] = mem_bus_if[i].req_ready;
end end
VX_stream_unpack #( VX_stream_unpack #(
.NUM_REQS (NUM_LANES), .NUM_REQS (NUM_LANES),
.DATA_WIDTH (REQ_DATA_WIDTH), .DATA_WIDTH (REQ_DATA_WIDTH),
.TAG_WIDTH (TAG_WIDTH), .TAG_WIDTH (TAG_WIDTH),
.OUT_BUF (REQ_OUT_BUF) .OUT_BUF (REQ_OUT_BUF)
) stream_unpack ( ) stream_unpack (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -77,7 +77,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #(
.data_in (req_data_in), .data_in (req_data_in),
.tag_in (lsu_mem_if.req_data.tag), .tag_in (lsu_mem_if.req_data.tag),
.ready_in (lsu_mem_if.req_ready), .ready_in (lsu_mem_if.req_ready),
.valid_out (req_valid_out), .valid_out (req_valid_out),
.data_out (req_data_out), .data_out (req_data_out),
.tag_out (req_tag_out), .tag_out (req_tag_out),
.ready_out (req_ready_out) .ready_out (req_ready_out)

View file

@ -13,9 +13,8 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_lsu_slice import VX_gpu_pkg::*; #( module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
parameter CORE_ID = 0, parameter `STRING INSTANCE_ID = ""
parameter BLOCK_ID = 0
) ( ) (
`SCOPE_IO_DECL `SCOPE_IO_DECL
@ -88,7 +87,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
wire [NUM_LANES-1:0] mem_req_mask; wire [NUM_LANES-1:0] mem_req_mask;
wire mem_req_rw; wire mem_req_rw;
wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] mem_req_addr; wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] mem_req_addr;
reg [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] mem_req_byteen; wire [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] mem_req_byteen;
reg [NUM_LANES-1:0][LSU_WORD_SIZE*8-1:0] mem_req_data; reg [NUM_LANES-1:0][LSU_WORD_SIZE*8-1:0] mem_req_data;
wire [TAG_WIDTH-1:0] mem_req_tag; wire [TAG_WIDTH-1:0] mem_req_tag;
wire mem_req_ready; wire mem_req_ready;
@ -159,27 +158,30 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
// byte enable formatting // byte enable formatting
for (genvar i = 0; i < NUM_LANES; ++i) begin for (genvar i = 0; i < NUM_LANES; ++i) begin
reg [LSU_WORD_SIZE-1:0] mem_req_byteen_r;
always @(*) begin always @(*) begin
mem_req_byteen[i] = '0; mem_req_byteen_r = '0;
case (`INST_LSU_WSIZE(execute_if.data.op_type)) case (`INST_LSU_WSIZE(execute_if.data.op_type))
0: begin // 8-bit 0: begin // 8-bit
mem_req_byteen[i][req_align[i]] = 1'b1; mem_req_byteen_r[req_align[i]] = 1'b1;
end end
1: begin // 16 bit 1: begin // 16 bit
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1; mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1;
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1; mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1;
end end
`ifdef XLEN_64 `ifdef XLEN_64
2: begin // 32 bit 2: begin // 32 bit
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1; mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1;
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1; mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1;
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1; mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1;
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1; mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1;
end end
`endif `endif
default : mem_req_byteen[i] = {LSU_WORD_SIZE{1'b1}}; // 3: 64 bit
default : mem_req_byteen_r = {LSU_WORD_SIZE{1'b1}};
endcase endcase
end end
assign mem_req_byteen[i] = mem_req_byteen_r;
end end
// memory misalignment not supported! // memory misalignment not supported!
@ -312,7 +314,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`RESET_RELAY (mem_scheduler_reset, reset); `RESET_RELAY (mem_scheduler_reset, reset);
VX_mem_scheduler #( VX_mem_scheduler #(
.INSTANCE_ID ($sformatf("core%0d-lsu-memsched%0d", CORE_ID, BLOCK_ID)), .INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)),
.CORE_REQS (NUM_LANES), .CORE_REQS (NUM_LANES),
.MEM_CHANNELS(NUM_LANES), .MEM_CHANNELS(NUM_LANES),
.WORD_SIZE (LSU_WORD_SIZE), .WORD_SIZE (LSU_WORD_SIZE),
@ -504,11 +506,11 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`ifdef DBG_TRACE_MEM `ifdef DBG_TRACE_MEM
always @(posedge clk) begin always @(posedge clk) begin
if (execute_if.valid && fence_lock) begin if (execute_if.valid && fence_lock) begin
`TRACE(1, ("%d: *** D$%0d fence wait\n", $time, CORE_ID)); `TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID));
end end
if (mem_req_fire) begin if (mem_req_fire) begin
if (mem_req_rw) begin if (mem_req_rw) begin
`TRACE(1, ("%d: D$%0d Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); `TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
`TRACE(1, (", atype=")); `TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES); `TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
@ -516,7 +518,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES); `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES);
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid)); `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid));
end else begin end else begin
`TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); `TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
`TRACE(1, (", atype=")); `TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES); `TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
@ -524,8 +526,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
end end
end end
if (mem_rsp_fire) begin if (mem_rsp_fire) begin
`TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", `TRACE(1, ("%d: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
$time, CORE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop)); $time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES); `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES);
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)); `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid));
end end
@ -533,36 +535,20 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`endif `endif
`ifdef DBG_SCOPE_LSU `ifdef DBG_SCOPE_LSU
if (CORE_ID == 0 && BLOCK_ID == 0) begin VX_scope_tap #(
`ifdef SCOPE .SCOPE_ID (3),
VX_scope_tap #( .TRIGGERW (3),
.SCOPE_ID (3), .PROBEW (1 + NUM_LANES*(`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE*8) + `UUID_WIDTH + NUM_LANES*LSU_WORD_SIZE*8 + `UUID_WIDTH)
.TRIGGERW (3), ) scope_tap (
.PROBEW (`UUID_WIDTH+NUM_LANES*(`XLEN+4+`XLEN)+1+`UUID_WIDTH+NUM_LANES*`XLEN) .clk (clk),
) scope_tap ( .reset (scope_reset),
.clk(clk), .start (1'b0),
.reset(scope_reset), .stop (1'b0),
.start(1'b0), .triggers({reset, mem_req_fire, mem_rsp_fire}),
.stop(1'b0), .probes ({mem_req_rw, full_addr, mem_req_byteen, mem_req_data, execute_if.data.uuid, rsp_data, rsp_uuid}),
.triggers({reset, mem_req_fire, mem_rsp_fire}), .bus_in (scope_bus_in),
.probes({execute_if.data.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}), .bus_out(scope_bus_out)
.bus_in(scope_bus_in), );
.bus_out(scope_bus_out)
);
`endif
`ifdef CHIPSCOPE
wire [31:0] full_addr_0 = full_addr[0];
wire [31:0] mem_req_data_0 = mem_req_data[0];
wire [31:0] rsp_data_0 = rsp_data[0];
ila_lsu ila_lsu_inst (
.clk (clk),
.probe0 ({mem_req_data_0, execute_if.data.uuid, execute_if.data.wid, execute_if.data.PC, mem_req_mask, full_addr_0, mem_req_byteen, mem_req_rw, mem_req_ready, mem_req_valid}),
.probe1 ({rsp_data_0, rsp_uuid, mem_rsp_eop, rsp_pc, rsp_rd, mem_rsp_mask, rsp_wid, mem_rsp_ready, mem_rsp_valid}),
.probe2 ({lsu_mem_if.req_data.data, lsu_mem_if.req_data.tag, lsu_mem_if.req_data.byteen, lsu_mem_if.req_data.addr, lsu_mem_if.req_data.rw, lsu_mem_if.req_ready, lsu_mem_if.req_valid}),
.probe3 ({lsu_mem_if.rsp_data.data, lsu_mem_if.rsp_data.tag, lsu_mem_if.rsp_ready, lsu_mem_if.rsp_valid})
);
`endif
end
`else `else
`SCOPE_IO_UNUSED() `SCOPE_IO_UNUSED()
`endif `endif

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,8 +14,8 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_lsu_unit import VX_gpu_pkg::*; #( module VX_lsu_unit import VX_gpu_pkg::*; #(
parameter CORE_ID = 0 parameter `STRING INSTANCE_ID = ""
) ( ) (
`SCOPE_IO_DECL `SCOPE_IO_DECL
input wire clk, input wire clk,
@ -24,7 +24,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
// Inputs // Inputs
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH], VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
// Outputs // Outputs
VX_commit_if.master commit_if [`ISSUE_WIDTH], VX_commit_if.master commit_if [`ISSUE_WIDTH],
VX_lsu_mem_if.master lsu_mem_if [`NUM_LSU_BLOCKS] VX_lsu_mem_if.master lsu_mem_if [`NUM_LSU_BLOCKS]
); );
@ -32,10 +32,9 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
localparam NUM_LANES = `NUM_LSU_LANES; localparam NUM_LANES = `NUM_LSU_LANES;
`ifdef SCOPE `ifdef SCOPE
localparam scope_lsu = 0;
`SCOPE_IO_SWITCH (BLOCK_SIZE); `SCOPE_IO_SWITCH (BLOCK_SIZE);
`endif `endif
VX_execute_if #( VX_execute_if #(
.NUM_LANES (NUM_LANES) .NUM_LANES (NUM_LANES)
) per_block_execute_if[BLOCK_SIZE](); ) per_block_execute_if[BLOCK_SIZE]();
@ -55,17 +54,16 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
.NUM_LANES (NUM_LANES) .NUM_LANES (NUM_LANES)
) per_block_commit_if[BLOCK_SIZE](); ) per_block_commit_if[BLOCK_SIZE]();
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_slices
`RESET_RELAY (block_reset, reset); `RESET_RELAY (slice_reset, reset);
VX_lsu_slice #( VX_lsu_slice #(
.CORE_ID (CORE_ID), .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx))
.BLOCK_ID (block_idx)
) lsu_slice( ) lsu_slice(
`SCOPE_IO_BIND (scope_lsu+block_idx) `SCOPE_IO_BIND (block_idx)
.clk (clk), .clk (clk),
.reset (block_reset), .reset (slice_reset),
.execute_if (per_block_execute_if[block_idx]), .execute_if (per_block_execute_if[block_idx]),
.commit_if (per_block_commit_if[block_idx]), .commit_if (per_block_commit_if[block_idx]),
.lsu_mem_if (lsu_mem_if[block_idx]) .lsu_mem_if (lsu_mem_if[block_idx])
@ -82,5 +80,5 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
.commit_in_if (per_block_commit_if), .commit_in_if (per_block_commit_if),
.commit_out_if (commit_if) .commit_out_if (commit_if)
); );
endmodule endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,29 +14,288 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_operands import VX_gpu_pkg::*; #( module VX_operands import VX_gpu_pkg::*; #(
parameter CORE_ID = 0 parameter `STRING INSTANCE_ID = "",
parameter NUM_BANKS = 4,
parameter OUT_BUF = 4 // using 2-cycle EB for area reduction
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH], `ifdef PERF_ENABLE
VX_scoreboard_if.slave scoreboard_if [`ISSUE_WIDTH], output wire [`PERF_CTR_BITS-1:0] perf_stalls,
VX_operands_if.master operands_if [`ISSUE_WIDTH] `endif
VX_writeback_if.slave writeback_if,
VX_scoreboard_if.slave scoreboard_if,
VX_operands_if.master operands_if
); );
`UNUSED_SPARAM (INSTANCE_ID)
localparam NUM_SRC_REGS = 3;
localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_REGS);
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
localparam METADATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS;
localparam DATAW = `UUID_WIDTH + METADATAW + 3 * `NUM_THREADS * `XLEN;
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
localparam XLEN_SIZE = `XLEN / 8;
localparam BYTEENW = `NUM_THREADS * XLEN_SIZE;
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin `UNUSED_VAR (writeback_if.data.sop)
`RESET_RELAY (slice_reset, reset);
VX_gpr_slice #( wire [NUM_SRC_REGS-1:0] src_valid;
.CORE_ID (CORE_ID) wire [NUM_SRC_REGS-1:0] req_in_valid;
) gpr_slice ( wire [NUM_SRC_REGS-1:0] req_in_ready;
.clk (clk), wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data;
.reset (slice_reset), wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
.writeback_if (writeback_if[i]),
.scoreboard_if(scoreboard_if[i]), wire [NUM_BANKS-1:0] gpr_rd_valid_n, gpr_rd_ready;
.operands_if (operands_if[i]) reg [NUM_BANKS-1:0] gpr_rd_valid;
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr_n;
reg [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr;
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx_n;
reg [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx;
wire pipe_in_ready;
reg pipe_out_valid;
wire pipe_out_ready;
reg [`UUID_WIDTH-1:0] pipe_out_uuid;
reg [METADATAW-1:0] pipe_out_data;
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data, src_data_n;
reg [NUM_SRC_REGS-1:0] data_fetched;
reg has_collision, has_collision_n;
wire stg_in_valid, stg_in_ready;
wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3,
scoreboard_if.data.rs2,
scoreboard_if.data.rs1};
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
if (ISSUE_WIS != 0) begin
assign req_in_data[i] = {src_regs[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis};
end else begin
assign req_in_data[i] = src_regs[i][`NR_BITS-1:BANK_SEL_BITS];
end
if (NUM_BANKS != 1) begin
assign req_bank_idx[i] = src_regs[i][BANK_SEL_BITS-1:0];
end else begin
assign req_bank_idx[i] = '0;
end
end
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched[i];
end
assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid;
VX_stream_xbar #(
.NUM_INPUTS (NUM_SRC_REGS),
.NUM_OUTPUTS (NUM_BANKS),
.DATAW (PER_BANK_ADDRW),
.ARBITER ("P"), // use priority arbiter
.PERF_CTR_BITS(`PERF_CTR_BITS),
.OUT_BUF (0) // no output buffering
) req_xbar (
.clk (clk),
.reset (reset),
`UNUSED_PIN(collisions),
.valid_in (req_in_valid),
.data_in (req_in_data),
.sel_in (req_bank_idx),
.ready_in (req_in_ready),
.valid_out (gpr_rd_valid_n),
.data_out (gpr_rd_addr_n),
.sel_out (gpr_rd_req_idx_n),
.ready_out (gpr_rd_ready)
);
assign gpr_rd_ready = {NUM_BANKS{stg_in_ready}};
always @(*) begin
has_collision_n = 0;
for (integer i = 0; i < NUM_SRC_REGS; ++i) begin
for (integer j = 1; j < (NUM_SRC_REGS-i); ++j) begin
has_collision_n |= src_valid[i]
&& src_valid[j+i]
&& (req_bank_idx[i] == req_bank_idx[j+i]);
end
end
end
always @(*) begin
src_data_n = src_data;
for (integer b = 0; b < NUM_BANKS; ++b) begin
if (gpr_rd_valid[b]) begin
src_data_n[gpr_rd_req_idx[b]] = gpr_rd_data[b];
end
end
end
wire pipe_stall = pipe_out_valid && ~pipe_out_ready;
assign pipe_in_ready = ~pipe_stall;
assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n;
wire stg_in_fire = stg_in_valid && stg_in_ready;
always @(posedge clk) begin
if (reset) begin
pipe_out_valid <= 0;
gpr_rd_valid <= '0;
data_fetched <= '0;
src_data <= '0;
end else begin
if (~pipe_stall) begin
pipe_out_valid <= scoreboard_if.valid;
gpr_rd_valid <= gpr_rd_valid_n;
if (scoreboard_if.ready) begin
data_fetched <= '0;
end else begin
data_fetched <= data_fetched | req_in_ready;
end
if (stg_in_fire) begin
src_data <= '0;
end else begin
src_data <= src_data_n;
end
end
end
if (~pipe_stall) begin
pipe_out_uuid <= scoreboard_if.data.uuid;
pipe_out_data <= {
scoreboard_if.data.wis,
scoreboard_if.data.tmask,
scoreboard_if.data.PC,
scoreboard_if.data.wb,
scoreboard_if.data.ex_type,
scoreboard_if.data.op_type,
scoreboard_if.data.op_args,
scoreboard_if.data.rd
};
has_collision <= has_collision_n;
gpr_rd_addr <= gpr_rd_addr_n;
gpr_rd_req_idx <= gpr_rd_req_idx_n;
end
end
assign pipe_out_ready = stg_in_ready;
assign stg_in_valid = pipe_out_valid && ~has_collision;
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (1)
) out_buffer (
.clk (clk),
.reset (reset),
.valid_in (stg_in_valid),
.ready_in (stg_in_ready),
.data_in ({
pipe_out_uuid,
pipe_out_data,
src_data_n[0],
src_data_n[1],
src_data_n[2]
}),
.data_out ({
operands_if.data.uuid,
operands_if.data.wis,
operands_if.data.tmask,
operands_if.data.PC,
operands_if.data.wb,
operands_if.data.ex_type,
operands_if.data.op_type,
operands_if.data.op_args,
operands_if.data.rd,
operands_if.data.rs1_data,
operands_if.data.rs2_data,
operands_if.data.rs3_data
}),
.valid_out (operands_if.valid),
.ready_out (operands_if.ready)
);
wire [PER_BANK_ADDRW-1:0] gpr_wr_addr;
if (ISSUE_WIS != 0) begin
assign gpr_wr_addr = {writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS], writeback_if.data.wis};
end else begin
assign gpr_wr_addr = writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS];
end
wire [BANK_SEL_WIDTH-1:0] gpr_wr_bank_idx;
if (NUM_BANKS != 1) begin
assign gpr_wr_bank_idx = writeback_if.data.rd[BANK_SEL_BITS-1:0];
end else begin
assign gpr_wr_bank_idx = '0;
end
`ifdef GPR_RESET
reg wr_enabled = 0;
always @(posedge clk) begin
if (reset) begin
wr_enabled <= 1;
end
end
`else
wire wr_enabled = 1;
`endif
for (genvar b = 0; b < NUM_BANKS; ++b) begin
wire gpr_wr_enabled;
if (BANK_SEL_BITS != 0) begin
assign gpr_wr_enabled = wr_enabled
&& writeback_if.valid
&& (gpr_wr_bank_idx == BANK_SEL_BITS'(b));
end else begin
assign gpr_wr_enabled = wr_enabled && writeback_if.valid;
end
wire [BYTEENW-1:0] wren;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}};
end
`ifdef GPR_RESET
VX_dp_ram_rst #(
`else
VX_dp_ram #(
`endif
.DATAW (`XLEN * `NUM_THREADS),
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
.WRENW (BYTEENW),
.NO_RWCHECK (1)
) gpr_ram (
.clk (clk),
`ifdef GPR_RESET
.reset (reset),
`endif
.read (1'b1),
.wren (wren),
.write (gpr_wr_enabled),
.waddr (gpr_wr_addr),
.wdata (writeback_if.data.data),
.raddr (gpr_rd_addr[b]),
.rdata (gpr_rd_data[b])
); );
end end
`ifdef PERF_ENABLE
reg [`PERF_CTR_BITS-1:0] collisions_r;
always @(posedge clk) begin
if (reset) begin
collisions_r <= '0;
end else begin
collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_in_ready && has_collision_n);
end
end
assign perf_stalls = collisions_r;
`endif
endmodule endmodule

View file

@ -14,13 +14,14 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_schedule import VX_gpu_pkg::*; #( module VX_schedule import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
VX_pipeline_perf_if.schedule perf_schedule_if, output sched_perf_t sched_perf,
`endif `endif
// configuration // configuration
@ -42,6 +43,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
// status // status
output wire busy output wire busy
); );
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (CORE_ID) `UNUSED_PARAM (CORE_ID)
reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // updated when a warp is activated or disabled reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // updated when a warp is activated or disabled
@ -290,7 +292,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
`RESET_RELAY (split_join_reset, reset); `RESET_RELAY (split_join_reset, reset);
VX_split_join #( VX_split_join #(
.CORE_ID (CORE_ID) .INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID))
) split_join ( ) split_join (
.clk (clk), .clk (clk),
.reset (split_join_reset), .reset (split_join_reset),
@ -368,24 +370,42 @@ module VX_schedule import VX_gpu_pkg::*; #(
assign schedule_if.data.uuid = instr_uuid; assign schedule_if.data.uuid = instr_uuid;
`RESET_RELAY (pending_instr_reset, reset); // Track pending instructions per warp
wire no_pending_instr; reg [`NUM_WARPS-1:0] per_warp_incr;
VX_pending_instr #( always @(*) begin
.CTR_WIDTH (12), per_warp_incr = 0;
.DECR_COUNT (`ISSUE_WIDTH), if (schedule_if_fire) begin
.ALM_EMPTY (1) per_warp_incr[schedule_if.data.wid] = 1;
) pending_instr( end
.clk (clk), end
.reset (pending_instr_reset),
.incr (schedule_if_fire), wire [`NUM_WARPS-1:0] pending_warp_empty;
.incr_wid (schedule_if.data.wid), wire [`NUM_WARPS-1:0] pending_warp_alm_empty;
.decr (commit_sched_if.committed),
.decr_wid (commit_sched_if.committed_wid), for (genvar i = 0; i < `NUM_WARPS; ++i) begin
.alm_empty_wid (sched_csr_if.alm_empty_wid),
.alm_empty (sched_csr_if.alm_empty), `RESET_RELAY (pending_instr_reset, reset);
.empty (no_pending_instr)
); VX_pending_size #(
.SIZE (4096),
.ALM_EMPTY (1)
) counter (
.clk (clk),
.reset (pending_instr_reset),
.incr (per_warp_incr[i]),
.decr (commit_sched_if.committed_warps[i]),
.empty (pending_warp_empty[i]),
.alm_empty (pending_warp_alm_empty[i]),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
assign sched_csr_if.alm_empty = pending_warp_alm_empty[sched_csr_if.alm_empty_wid];
wire no_pending_instr = (& pending_warp_empty);
`BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1); `BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1);
@ -412,7 +432,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
end end
end end
end end
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** core%0d-scheduler-timeout: stalled_warps=%b", $time, CORE_ID, stalled_warps)) `RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** %s timeout: stalled_warps=%b", $time, INSTANCE_ID, stalled_warps))
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_sched_idles; reg [`PERF_CTR_BITS-1:0] perf_sched_idles;
@ -431,8 +451,8 @@ module VX_schedule import VX_gpu_pkg::*; #(
end end
end end
assign perf_schedule_if.sched_idles = perf_sched_idles; assign sched_perf.idles = perf_sched_idles;
assign perf_schedule_if.sched_stalls = perf_sched_stalls; assign sched_perf.stalls = perf_sched_stalls;
`endif `endif
endmodule endmodule

View file

@ -14,39 +14,37 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_scoreboard import VX_gpu_pkg::*; #( module VX_scoreboard import VX_gpu_pkg::*; #(
parameter CORE_ID = 0 parameter `STRING INSTANCE_ID = ""
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
output reg [`PERF_CTR_BITS-1:0] perf_scb_stalls, output reg [`PERF_CTR_BITS-1:0] perf_stalls,
output reg [`PERF_CTR_BITS-1:0] perf_units_uses [`NUM_EX_UNITS], output reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_units_uses,
output reg [`PERF_CTR_BITS-1:0] perf_sfu_uses [`NUM_SFU_UNITS], output reg [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_sfu_uses,
`endif `endif
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH], VX_writeback_if.slave writeback_if,
VX_ibuffer_if.slave ibuffer_if [`NUM_WARPS], VX_ibuffer_if.slave ibuffer_if [PER_ISSUE_WARPS],
VX_scoreboard_if.master scoreboard_if [`ISSUE_WIDTH] VX_scoreboard_if.master scoreboard_if
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4) + 1; localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4) + 1;
VX_ibuffer_if staging_if [PER_ISSUE_WARPS]();
reg [PER_ISSUE_WARPS-1:0] operands_ready;
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
reg [`NUM_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle; reg [PER_ISSUE_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle;
wire [`NUM_EX_UNITS-1:0] perf_units_per_cycle, perf_units_per_cycle_r; wire [`NUM_EX_UNITS-1:0] perf_units_per_cycle, perf_units_per_cycle_r;
reg [`NUM_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle; reg [PER_ISSUE_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle;
wire [`NUM_SFU_UNITS-1:0] perf_sfu_per_cycle, perf_sfu_per_cycle_r; wire [`NUM_SFU_UNITS-1:0] perf_sfu_per_cycle, perf_sfu_per_cycle_r;
wire [`NUM_WARPS-1:0] perf_issue_stalls_per_cycle;
wire [`CLOG2(`NUM_WARPS+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r;
`POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle);
VX_reduce #( VX_reduce #(
.DATAW_IN (`NUM_EX_UNITS), .DATAW_IN (`NUM_EX_UNITS),
.N (`NUM_WARPS), .N (PER_ISSUE_WARPS),
.OP ("|") .OP ("|")
) perf_units_reduce ( ) perf_units_reduce (
.data_in (perf_inuse_units_per_cycle), .data_in (perf_inuse_units_per_cycle),
@ -55,22 +53,28 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
VX_reduce #( VX_reduce #(
.DATAW_IN (`NUM_SFU_UNITS), .DATAW_IN (`NUM_SFU_UNITS),
.N (`NUM_WARPS), .N (PER_ISSUE_WARPS),
.OP ("|") .OP ("|")
) perf_sfu_reduce ( ) perf_sfu_reduce (
.data_in (perf_inuse_sfu_per_cycle), .data_in (perf_inuse_sfu_per_cycle),
.data_out (perf_sfu_per_cycle) .data_out (perf_sfu_per_cycle)
); );
`BUFFER(perf_stalls_per_cycle_r, perf_stalls_per_cycle); `BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT)); `BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT));
wire [PER_ISSUE_WARPS-1:0] stg_valid_in;
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
assign stg_valid_in[w] = staging_if[w].valid;
end
wire perf_stall_per_cycle = (|stg_valid_in) && ~(|(stg_valid_in & operands_ready));
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
perf_scb_stalls <= '0; perf_stalls <= '0;
end else begin end else begin
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(perf_stalls_per_cycle_r); perf_stalls <= perf_stalls + `PERF_CTR_BITS'(perf_stall_per_cycle);
end end
end end
@ -95,138 +99,121 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
end end
`endif `endif
VX_ibuffer_if staging_if [`NUM_WARPS](); for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
wire [`NUM_WARPS-1:0][3:0] staging_opds_busy;
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (1) .SIZE (1)
) stanging_buf ( ) stanging_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (ibuffer_if[i].valid), .valid_in (ibuffer_if[w].valid),
.data_in (ibuffer_if[i].data), .data_in (ibuffer_if[w].data),
.ready_in (ibuffer_if[i].ready), .ready_in (ibuffer_if[w].ready),
.valid_out(staging_if[i].valid), .valid_out(staging_if[w].valid),
.data_out (staging_if[i].data), .data_out (staging_if[w].data),
.ready_out(staging_if[i].ready) .ready_out(staging_if[w].ready)
); );
end end
for (genvar i = 0; i < `NUM_WARPS; ++i) begin for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
reg [`NUM_REGS-1:0] inuse_regs; reg [`NUM_REGS-1:0] inuse_regs;
reg [3:0] operands_busy_r, operands_busy_n; reg [3:0] operands_busy, operands_busy_n;
localparam iw = i % `ISSUE_WIDTH; wire ibuffer_fire = ibuffer_if[w].valid && ibuffer_if[w].ready;
localparam wis = i / `ISSUE_WIDTH;
wire ibuffer_fire = ibuffer_if[i].valid && ibuffer_if[i].ready; wire staging_fire = staging_if[w].valid && staging_if[w].ready;
wire staging_fire = staging_if[i].valid && staging_if[i].ready; wire writeback_fire = writeback_if.valid
&& (writeback_if.data.wis == ISSUE_WIS_W'(w))
wire writeback_fire = writeback_if[iw].valid && writeback_if.data.eop;
&& (writeback_if[iw].data.wis == ISSUE_WIS_W'(wis))
&& writeback_if[iw].data.eop;
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units; reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units;
reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu; reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu;
reg [`SFU_WIDTH-1:0] sfu_type;
always @(*) begin always @(*) begin
case (staging_if[i].data.op_type) perf_inuse_units_per_cycle[w] = '0;
`INST_SFU_CSRRW, perf_inuse_sfu_per_cycle[w] = '0;
`INST_SFU_CSRRS, if (staging_if[w].valid) begin
`INST_SFU_CSRRC: sfu_type = `SFU_CSRS; if (operands_busy[0]) begin
default: sfu_type = `SFU_WCTL; perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rd]] = 1;
endcase if (inuse_units[staging_if[w].data.rd] == `EX_SFU) begin
end perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rd]] = 1;
always @(*) begin
perf_inuse_units_per_cycle[i] = '0;
perf_inuse_sfu_per_cycle[i] = '0;
if (staging_if[i].valid) begin
if (operands_busy_r[0]) begin
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rd]] = 1;
if (inuse_units[staging_if[i].data.rd] == `EX_SFU) begin
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rd]] = 1;
end end
end end
if (operands_busy_r[1]) begin if (operands_busy[1]) begin
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs1]] = 1; perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs1]] = 1;
if (inuse_units[staging_if[i].data.rs1] == `EX_SFU) begin if (inuse_units[staging_if[w].data.rs1] == `EX_SFU) begin
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs1]] = 1; perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs1]] = 1;
end end
end end
if (operands_busy_r[2]) begin if (operands_busy[2]) begin
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs2]] = 1; perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs2]] = 1;
if (inuse_units[staging_if[i].data.rs2] == `EX_SFU) begin if (inuse_units[staging_if[w].data.rs2] == `EX_SFU) begin
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs2]] = 1; perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs2]] = 1;
end end
end end
if (operands_busy_r[3]) begin if (operands_busy[3]) begin
perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs3]] = 1; perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs3]] = 1;
if (inuse_units[staging_if[i].data.rs3] == `EX_SFU) begin if (inuse_units[staging_if[w].data.rs3] == `EX_SFU) begin
perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs3]] = 1; perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs3]] = 1;
end end
end end
end end
end end
assign perf_issue_stalls_per_cycle[i] = staging_if[i].valid && ~staging_if[i].ready;
`endif `endif
always @(*) begin always @(*) begin
operands_busy_n = operands_busy_r; operands_busy_n = operands_busy;
if (ibuffer_fire) begin if (ibuffer_fire) begin
operands_busy_n = { operands_busy_n = {
inuse_regs[ibuffer_if[i].data.rs3], inuse_regs[ibuffer_if[w].data.rs3],
inuse_regs[ibuffer_if[i].data.rs2], inuse_regs[ibuffer_if[w].data.rs2],
inuse_regs[ibuffer_if[i].data.rs1], inuse_regs[ibuffer_if[w].data.rs1],
inuse_regs[ibuffer_if[i].data.rd] inuse_regs[ibuffer_if[w].data.rd]
}; };
end end
if (writeback_fire) begin if (writeback_fire) begin
if (ibuffer_fire) begin if (ibuffer_fire) begin
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rd) begin if (writeback_if.data.rd == ibuffer_if[w].data.rd) begin
operands_busy_n[0] = 0; operands_busy_n[0] = 0;
end end
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs1) begin if (writeback_if.data.rd == ibuffer_if[w].data.rs1) begin
operands_busy_n[1] = 0; operands_busy_n[1] = 0;
end end
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs2) begin if (writeback_if.data.rd == ibuffer_if[w].data.rs2) begin
operands_busy_n[2] = 0; operands_busy_n[2] = 0;
end end
if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs3) begin if (writeback_if.data.rd == ibuffer_if[w].data.rs3) begin
operands_busy_n[3] = 0; operands_busy_n[3] = 0;
end end
end else begin end else begin
if (writeback_if[iw].data.rd == staging_if[i].data.rd) begin if (writeback_if.data.rd == staging_if[w].data.rd) begin
operands_busy_n[0] = 0; operands_busy_n[0] = 0;
end end
if (writeback_if[iw].data.rd == staging_if[i].data.rs1) begin if (writeback_if.data.rd == staging_if[w].data.rs1) begin
operands_busy_n[1] = 0; operands_busy_n[1] = 0;
end end
if (writeback_if[iw].data.rd == staging_if[i].data.rs2) begin if (writeback_if.data.rd == staging_if[w].data.rs2) begin
operands_busy_n[2] = 0; operands_busy_n[2] = 0;
end end
if (writeback_if[iw].data.rd == staging_if[i].data.rs3) begin if (writeback_if.data.rd == staging_if[w].data.rs3) begin
operands_busy_n[3] = 0; operands_busy_n[3] = 0;
end end
end end
end end
if (staging_fire && staging_if[i].data.wb) begin if (staging_fire && staging_if[w].data.wb) begin
if (staging_if[i].data.rd == ibuffer_if[i].data.rd) begin if (staging_if[w].data.rd == ibuffer_if[w].data.rd) begin
operands_busy_n[0] = 1; operands_busy_n[0] = 1;
end end
if (staging_if[i].data.rd == ibuffer_if[i].data.rs1) begin if (staging_if[w].data.rd == ibuffer_if[w].data.rs1) begin
operands_busy_n[1] = 1; operands_busy_n[1] = 1;
end end
if (staging_if[i].data.rd == ibuffer_if[i].data.rs2) begin if (staging_if[w].data.rd == ibuffer_if[w].data.rs2) begin
operands_busy_n[2] = 1; operands_busy_n[2] = 1;
end end
if (staging_if[i].data.rd == ibuffer_if[i].data.rs3) begin if (staging_if[w].data.rd == ibuffer_if[w].data.rs3) begin
operands_busy_n[3] = 1; operands_busy_n[3] = 1;
end end
end end
@ -237,25 +224,24 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
inuse_regs <= '0; inuse_regs <= '0;
end else begin end else begin
if (writeback_fire) begin if (writeback_fire) begin
inuse_regs[writeback_if[iw].data.rd] <= 0; inuse_regs[writeback_if.data.rd] <= 0;
end end
if (staging_fire && staging_if[i].data.wb) begin if (staging_fire && staging_if[w].data.wb) begin
inuse_regs[staging_if[i].data.rd] <= 1; inuse_regs[staging_if[w].data.rd] <= 1;
end end
end end
operands_busy_r <= operands_busy_n; operands_busy <= operands_busy_n;
operands_ready[w] <= ~(| operands_busy_n);
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
if (staging_fire && staging_if[i].data.wb) begin if (staging_fire && staging_if[w].data.wb) begin
inuse_units[staging_if[i].data.rd] <= staging_if[i].data.ex_type; inuse_units[staging_if[w].data.rd] <= staging_if[w].data.ex_type;
if (staging_if[i].data.ex_type == `EX_SFU) begin if (staging_if[w].data.ex_type == `EX_SFU) begin
inuse_sfu[staging_if[i].data.rd] <= sfu_type; inuse_sfu[staging_if[w].data.rd] <= op_to_sfu_type(staging_if[w].data.op_type);
end end
end end
`endif `endif
end end
assign staging_opds_busy[i] = operands_busy_r;
`ifdef SIMULATION `ifdef SIMULATION
reg [31:0] timeout_ctr; reg [31:0] timeout_ctr;
@ -263,11 +249,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
if (reset) begin if (reset) begin
timeout_ctr <= '0; timeout_ctr <= '0;
end else begin end else begin
if (staging_if[i].valid && ~staging_if[i].ready) begin if (staging_if[w].valid && ~staging_if[w].ready) begin
`ifdef DBG_TRACE_PIPELINE `ifdef DBG_TRACE_PIPELINE
`TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", `TRACE(3, ("%d: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
$time, CORE_ID, i, {staging_if[i].data.PC, 1'b0}, staging_if[i].data.tmask, timeout_ctr, $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
operands_busy_r, staging_if[i].data.uuid)); operands_busy, staging_if[w].data.uuid));
`endif `endif
timeout_ctr <= timeout_ctr + 1; timeout_ctr <= timeout_ctr + 1;
end else if (ibuffer_fire) begin end else if (ibuffer_fire) begin
@ -277,59 +263,57 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
end end
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT), `RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)", ("%t: *** %s timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
$time, CORE_ID, i, {staging_if[i].data.PC, 1'b0}, staging_if[i].data.tmask, timeout_ctr, $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
operands_busy_r, staging_if[i].data.uuid)); operands_busy, staging_if[w].data.uuid));
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[iw].data.rd] != 0, `RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if.data.rd] != 0,
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)", ("%t: *** %s invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
$time, CORE_ID, i, {writeback_if[iw].data.PC, 1'b0}, writeback_if[iw].data.tmask, writeback_if[iw].data.rd, writeback_if[iw].data.uuid)); $time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid));
`endif `endif
end end
`RESET_RELAY (arb_reset, reset); wire [PER_ISSUE_WARPS-1:0] arb_valid_in;
wire [PER_ISSUE_WARPS-1:0][DATAW-1:0] arb_data_in;
wire [PER_ISSUE_WARPS-1:0] arb_ready_in;
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin
wire [ISSUE_RATIO-1:0] valid_in; assign arb_valid_in[w] = staging_if[w].valid && operands_ready[w];
wire [ISSUE_RATIO-1:0][DATAW-1:0] data_in; assign arb_data_in[w] = staging_if[w].data;
wire [ISSUE_RATIO-1:0] ready_in; assign staging_if[w].ready = arb_ready_in[w] && operands_ready[w];
for (genvar j = 0; j < ISSUE_RATIO; ++j) begin
wire operands_ready = ~(| staging_opds_busy[j * `ISSUE_WIDTH + i]);
assign valid_in[j] = staging_if[j * `ISSUE_WIDTH + i].valid && operands_ready;
assign data_in[j] = staging_if[j * `ISSUE_WIDTH + i].data;
assign staging_if[j * `ISSUE_WIDTH + i].ready = ready_in[j] && operands_ready;
end
VX_stream_arb #(
.NUM_INPUTS (ISSUE_RATIO),
.DATAW (DATAW),
.ARBITER ("R"),
.OUT_BUF (2)
) out_arb (
.clk (clk),
.reset (arb_reset),
.valid_in (valid_in),
.ready_in (ready_in),
.data_in (data_in),
.data_out ({
scoreboard_if[i].data.uuid,
scoreboard_if[i].data.tmask,
scoreboard_if[i].data.PC,
scoreboard_if[i].data.ex_type,
scoreboard_if[i].data.op_type,
scoreboard_if[i].data.op_args,
scoreboard_if[i].data.wb,
scoreboard_if[i].data.rd,
scoreboard_if[i].data.rs1,
scoreboard_if[i].data.rs2,
scoreboard_if[i].data.rs3
}),
.valid_out (scoreboard_if[i].valid),
.ready_out (scoreboard_if[i].ready),
.sel_out (scoreboard_if[i].data.wis)
);
end end
`RESET_RELAY (arb_reset, reset);
VX_stream_arb #(
.NUM_INPUTS (PER_ISSUE_WARPS),
.DATAW (DATAW),
.ARBITER ("F"),
.LUTRAM (1),
.OUT_BUF (4) // using 2-cycle EB for area reduction
) out_arb (
.clk (clk),
.reset (arb_reset),
.valid_in (arb_valid_in),
.ready_in (arb_ready_in),
.data_in (arb_data_in),
.data_out ({
scoreboard_if.data.uuid,
scoreboard_if.data.tmask,
scoreboard_if.data.PC,
scoreboard_if.data.ex_type,
scoreboard_if.data.op_type,
scoreboard_if.data.op_args,
scoreboard_if.data.wb,
scoreboard_if.data.rd,
scoreboard_if.data.rs1,
scoreboard_if.data.rs2,
scoreboard_if.data.rs3
}),
.valid_out (scoreboard_if.valid),
.ready_out (scoreboard_if.ready),
.sel_out (scoreboard_if.data.wis)
);
endmodule endmodule

View file

@ -14,6 +14,7 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_sfu_unit import VX_gpu_pkg::*; #( module VX_sfu_unit import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
input wire clk, input wire clk,
@ -39,7 +40,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
VX_commit_if.master commit_if [`ISSUE_WIDTH], VX_commit_if.master commit_if [`ISSUE_WIDTH],
VX_warp_ctl_if.master warp_ctl_if VX_warp_ctl_if.master warp_ctl_if
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
localparam BLOCK_SIZE = 1; localparam BLOCK_SIZE = 1;
localparam NUM_LANES = `NUM_SFU_LANES; localparam NUM_LANES = `NUM_SFU_LANES;
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
@ -83,7 +84,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
`RESET_RELAY (wctl_reset, reset); `RESET_RELAY (wctl_reset, reset);
VX_wctl_unit #( VX_wctl_unit #(
.CORE_ID (CORE_ID), .INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)),
.NUM_LANES (NUM_LANES) .NUM_LANES (NUM_LANES)
) wctl_unit ( ) wctl_unit (
.clk (clk), .clk (clk),
@ -111,6 +112,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
`RESET_RELAY (csr_reset, reset); `RESET_RELAY (csr_reset, reset);
VX_csr_unit #( VX_csr_unit #(
.INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)),
.CORE_ID (CORE_ID), .CORE_ID (CORE_ID),
.NUM_LANES (NUM_LANES) .NUM_LANES (NUM_LANES)
) csr_unit ( ) csr_unit (

View file

@ -14,7 +14,7 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_split_join import VX_gpu_pkg::*; #( module VX_split_join import VX_gpu_pkg::*; #(
parameter CORE_ID = 0 parameter `STRING INSTANCE_ID = ""
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -31,7 +31,7 @@ module VX_split_join import VX_gpu_pkg::*; #(
input wire [`NW_WIDTH-1:0] stack_wid, input wire [`NW_WIDTH-1:0] stack_wid,
output wire [`DV_STACK_SIZEW-1:0] stack_ptr output wire [`DV_STACK_SIZEW-1:0] stack_ptr
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_data [`NUM_WARPS-1:0]; wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_data [`NUM_WARPS-1:0];
wire [`DV_STACK_SIZEW-1:0] ipdom_q_ptr [`NUM_WARPS-1:0]; wire [`DV_STACK_SIZEW-1:0] ipdom_q_ptr [`NUM_WARPS-1:0];

399
hw/rtl/core/VX_trace_pkg.sv Normal file
View file

@ -0,0 +1,399 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`ifndef VX_TRACE_PKG_VH
`define VX_TRACE_PKG_VH
`include "VX_define.vh"
package VX_trace_pkg;
`ifdef SIMULATION
`ifdef SV_DPI
import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/);
`endif
import VX_gpu_pkg::*;
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
case (ex_type)
`EX_ALU: `TRACE(level, ("ALU"));
`EX_LSU: `TRACE(level, ("LSU"));
`EX_FPU: `TRACE(level, ("FPU"));
`EX_SFU: `TRACE(level, ("SFU"));
default: `TRACE(level, ("?"));
endcase
endtask
task trace_ex_op(input int level,
input [`EX_BITS-1:0] ex_type,
input [`INST_OP_BITS-1:0] op_type,
input VX_gpu_pkg::op_args_t op_args
);
case (ex_type)
`EX_ALU: begin
case (op_args.alu.xtype)
`ALU_TYPE_ARITH: begin
if (op_args.alu.is_w) begin
if (op_args.alu.use_imm) begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADDIW"));
`INST_ALU_SLL: `TRACE(level, ("SLLIW"));
`INST_ALU_SRL: `TRACE(level, ("SRLIW"));
`INST_ALU_SRA: `TRACE(level, ("SRAIW"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADDW"));
`INST_ALU_SUB: `TRACE(level, ("SUBW"));
`INST_ALU_SLL: `TRACE(level, ("SLLW"));
`INST_ALU_SRL: `TRACE(level, ("SRLW"));
`INST_ALU_SRA: `TRACE(level, ("SRAW"));
default: `TRACE(level, ("?"));
endcase
end
end else begin
if (op_args.alu.use_imm) begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADDI"));
`INST_ALU_SLL: `TRACE(level, ("SLLI"));
`INST_ALU_SRL: `TRACE(level, ("SRLI"));
`INST_ALU_SRA: `TRACE(level, ("SRAI"));
`INST_ALU_SLT: `TRACE(level, ("SLTI"));
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"));
`INST_ALU_XOR: `TRACE(level, ("XORI"));
`INST_ALU_OR: `TRACE(level, ("ORI"));
`INST_ALU_AND: `TRACE(level, ("ANDI"));
`INST_ALU_LUI: `TRACE(level, ("LUI"));
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADD"));
`INST_ALU_SUB: `TRACE(level, ("SUB"));
`INST_ALU_SLL: `TRACE(level, ("SLL"));
`INST_ALU_SRL: `TRACE(level, ("SRL"));
`INST_ALU_SRA: `TRACE(level, ("SRA"));
`INST_ALU_SLT: `TRACE(level, ("SLT"));
`INST_ALU_SLTU: `TRACE(level, ("SLTU"));
`INST_ALU_XOR: `TRACE(level, ("XOR"));
`INST_ALU_OR: `TRACE(level, ("OR"));
`INST_ALU_AND: `TRACE(level, ("AND"));
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"));
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"));
default: `TRACE(level, ("?"));
endcase
end
end
end
`ALU_TYPE_BRANCH: begin
case (`INST_BR_BITS'(op_type))
`INST_BR_EQ: `TRACE(level, ("BEQ"));
`INST_BR_NE: `TRACE(level, ("BNE"));
`INST_BR_LT: `TRACE(level, ("BLT"));
`INST_BR_GE: `TRACE(level, ("BGE"));
`INST_BR_LTU: `TRACE(level, ("BLTU"));
`INST_BR_GEU: `TRACE(level, ("BGEU"));
`INST_BR_JAL: `TRACE(level, ("JAL"));
`INST_BR_JALR: `TRACE(level, ("JALR"));
`INST_BR_ECALL: `TRACE(level, ("ECALL"));
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"));
`INST_BR_URET: `TRACE(level, ("URET"));
`INST_BR_SRET: `TRACE(level, ("SRET"));
`INST_BR_MRET: `TRACE(level, ("MRET"));
default: `TRACE(level, ("?"));
endcase
end
`ALU_TYPE_MULDIV: begin
if (op_args.alu.is_w) begin
case (`INST_M_BITS'(op_type))
`INST_M_MUL: `TRACE(level, ("MULW"));
`INST_M_DIV: `TRACE(level, ("DIVW"));
`INST_M_DIVU: `TRACE(level, ("DIVUW"));
`INST_M_REM: `TRACE(level, ("REMW"));
`INST_M_REMU: `TRACE(level, ("REMUW"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (`INST_M_BITS'(op_type))
`INST_M_MUL: `TRACE(level, ("MUL"));
`INST_M_MULH: `TRACE(level, ("MULH"));
`INST_M_MULHSU:`TRACE(level, ("MULHSU"));
`INST_M_MULHU: `TRACE(level, ("MULHU"));
`INST_M_DIV: `TRACE(level, ("DIV"));
`INST_M_DIVU: `TRACE(level, ("DIVU"));
`INST_M_REM: `TRACE(level, ("REM"));
`INST_M_REMU: `TRACE(level, ("REMU"));
default: `TRACE(level, ("?"));
endcase
end
end
default: `TRACE(level, ("?"));
endcase
end
`EX_LSU: begin
if (op_args.lsu.is_float) begin
case (`INST_LSU_BITS'(op_type))
`INST_LSU_LW: `TRACE(level, ("FLW"));
`INST_LSU_LD: `TRACE(level, ("FLD"));
`INST_LSU_SW: `TRACE(level, ("FSW"));
`INST_LSU_SD: `TRACE(level, ("FSD"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (`INST_LSU_BITS'(op_type))
`INST_LSU_LB: `TRACE(level, ("LB"));
`INST_LSU_LH: `TRACE(level, ("LH"));
`INST_LSU_LW: `TRACE(level, ("LW"));
`INST_LSU_LD: `TRACE(level, ("LD"));
`INST_LSU_LBU:`TRACE(level, ("LBU"));
`INST_LSU_LHU:`TRACE(level, ("LHU"));
`INST_LSU_LWU:`TRACE(level, ("LWU"));
`INST_LSU_SB: `TRACE(level, ("SB"));
`INST_LSU_SH: `TRACE(level, ("SH"));
`INST_LSU_SW: `TRACE(level, ("SW"));
`INST_LSU_SD: `TRACE(level, ("SD"));
`INST_LSU_FENCE:`TRACE(level,("FENCE"));
default: `TRACE(level, ("?"));
endcase
end
end
`EX_FPU: begin
case (`INST_FPU_BITS'(op_type))
`INST_FPU_ADD: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FADD.D"));
else
`TRACE(level, ("FADD.S"));
end
`INST_FPU_SUB: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FSUB.D"));
else
`TRACE(level, ("FSUB.S"));
end
`INST_FPU_MUL: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FMUL.D"));
else
`TRACE(level, ("FMUL.S"));
end
`INST_FPU_DIV: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FDIV.D"));
else
`TRACE(level, ("FDIV.S"));
end
`INST_FPU_SQRT: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FSQRT.D"));
else
`TRACE(level, ("FSQRT.S"));
end
`INST_FPU_MADD: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FMADD.D"));
else
`TRACE(level, ("FMADD.S"));
end
`INST_FPU_MSUB: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FMSUB.D"));
else
`TRACE(level, ("FMSUB.S"));
end
`INST_FPU_NMADD: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FNMADD.D"));
else
`TRACE(level, ("FNMADD.S"));
end
`INST_FPU_NMSUB: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FNMSUB.D"));
else
`TRACE(level, ("FNMSUB.S"));
end
`INST_FPU_CMP: begin
if (op_args.fpu.fmt[0]) begin
case (op_args.fpu.frm[1:0])
0: `TRACE(level, ("FLE.D"));
1: `TRACE(level, ("FLT.D"));
2: `TRACE(level, ("FEQ.D"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (op_args.fpu.frm[1:0])
0: `TRACE(level, ("FLE.S"));
1: `TRACE(level, ("FLT.S"));
2: `TRACE(level, ("FEQ.S"));
default: `TRACE(level, ("?"));
endcase
end
end
`INST_FPU_F2F: begin
if (op_args.fpu.fmt[0]) begin
`TRACE(level, ("FCVT.D.S"));
end else begin
`TRACE(level, ("FCVT.S.D"));
end
end
`INST_FPU_F2I: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.L.D"));
end else begin
`TRACE(level, ("FCVT.W.D"));
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.L.S"));
end else begin
`TRACE(level, ("FCVT.W.S"));
end
end
end
`INST_FPU_F2U: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.LU.D"));
end else begin
`TRACE(level, ("FCVT.WU.D"));
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.LU.S"));
end else begin
`TRACE(level, ("FCVT.WU.S"));
end
end
end
`INST_FPU_I2F: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.D.L"));
end else begin
`TRACE(level, ("FCVT.D.W"));
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.S.L"));
end else begin
`TRACE(level, ("FCVT.S.W"));
end
end
end
`INST_FPU_U2F: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.D.LU"));
end else begin
`TRACE(level, ("FCVT.D.WU"));
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.S.LU"));
end else begin
`TRACE(level, ("FCVT.S.WU"));
end
end
end
`INST_FPU_MISC: begin
if (op_args.fpu.fmt[0]) begin
case (op_args.fpu.frm)
0: `TRACE(level, ("FSGNJ.D"));
1: `TRACE(level, ("FSGNJN.D"));
2: `TRACE(level, ("FSGNJX.D"));
3: `TRACE(level, ("FCLASS.D"));
4: `TRACE(level, ("FMV.X.D"));
5: `TRACE(level, ("FMV.D.X"));
6: `TRACE(level, ("FMIN.D"));
7: `TRACE(level, ("FMAX.D"));
endcase
end else begin
case (op_args.fpu.frm)
0: `TRACE(level, ("FSGNJ.S"));
1: `TRACE(level, ("FSGNJN.S"));
2: `TRACE(level, ("FSGNJX.S"));
3: `TRACE(level, ("FCLASS.S"));
4: `TRACE(level, ("FMV.X.S"));
5: `TRACE(level, ("FMV.S.X"));
6: `TRACE(level, ("FMIN.S"));
7: `TRACE(level, ("FMAX.S"));
endcase
end
end
default: `TRACE(level, ("?"));
endcase
end
`EX_SFU: begin
case (`INST_SFU_BITS'(op_type))
`INST_SFU_TMC: `TRACE(level, ("TMC"));
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"));
`INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end
`INST_SFU_JOIN: `TRACE(level, ("JOIN"));
`INST_SFU_BAR: `TRACE(level, ("BAR"));
`INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end
`INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
`INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
`INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
default: `TRACE(level, ("?"));
endcase
end
default: `TRACE(level, ("?"));
endcase
endtask
task trace_op_args(input int level,
input [`EX_BITS-1:0] ex_type,
input [`INST_OP_BITS-1:0] op_type,
input VX_gpu_pkg::op_args_t op_args
);
case (ex_type)
`EX_ALU: begin
`TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm));
end
`EX_LSU: begin
`TRACE(level, (", offset=0x%0h", op_args.lsu.offset));
end
`EX_FPU: begin
`TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm));
end
`EX_SFU: begin
if (`INST_SFU_IS_CSR(op_type)) begin
`TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm));
end
end
default:;
endcase
endtask
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
case (addr)
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"));
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"));
`VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0"));
`VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1"));
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
default: `TRACE(level, ("?"));
endcase
endtask
`endif
endpackage
`endif // VX_TRACE_PKG_VH

View file

@ -14,7 +14,7 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_wctl_unit import VX_gpu_pkg::*; #( module VX_wctl_unit import VX_gpu_pkg::*; #(
parameter CORE_ID = 0, parameter `STRING INSTANCE_ID = "",
parameter NUM_LANES = 1 parameter NUM_LANES = 1
) ( ) (
input wire clk, input wire clk,
@ -27,7 +27,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
VX_warp_ctl_if.master warp_ctl_if, VX_warp_ctl_if.master warp_ctl_if,
VX_commit_if.master commit_if VX_commit_if.master commit_if
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_SPARAM (INSTANCE_ID)
localparam LANE_BITS = `CLOG2(NUM_LANES); localparam LANE_BITS = `CLOG2(NUM_LANES);
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS); localparam PID_WIDTH = `UP(PID_BITS);

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,7 +15,7 @@
`ifdef FPU_DPI `ifdef FPU_DPI
module VX_fpu_dpi import VX_fpu_pkg::*; #( module VX_fpu_dpi import VX_fpu_pkg::*; #(
parameter NUM_LANES = 1, parameter NUM_LANES = 1,
parameter TAG_WIDTH = 1, parameter TAG_WIDTH = 1,
parameter OUT_BUF = 0 parameter OUT_BUF = 0
@ -29,7 +29,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0] mask_in, input wire [NUM_LANES-1:0] mask_in,
input wire [TAG_WIDTH-1:0] tag_in, input wire [TAG_WIDTH-1:0] tag_in,
input wire [`INST_FPU_BITS-1:0] op_type, input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_FMT_BITS-1:0] fmt, input wire [`INST_FMT_BITS-1:0] fmt,
input wire [`INST_FRM_BITS-1:0] frm, input wire [`INST_FRM_BITS-1:0] frm,
@ -37,7 +37,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa, input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
input wire [NUM_LANES-1:0][`XLEN-1:0] datab, input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
input wire [NUM_LANES-1:0][`XLEN-1:0] datac, input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
output wire [NUM_LANES-1:0][`XLEN-1:0] result, output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire has_fflags, output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags, output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -55,31 +55,31 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
localparam FPC_BITS = `LOG2UP(NUM_FPC); localparam FPC_BITS = `LOG2UP(NUM_FPC);
localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH; localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH;
wire [NUM_FPC-1:0] per_core_ready_in; wire [NUM_FPC-1:0] per_core_ready_in;
wire [NUM_FPC-1:0][NUM_LANES-1:0][`XLEN-1:0] per_core_result; wire [NUM_FPC-1:0][NUM_LANES-1:0][`XLEN-1:0] per_core_result;
wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out; wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out;
reg [NUM_FPC-1:0] per_core_ready_out; reg [NUM_FPC-1:0] per_core_ready_out;
wire [NUM_FPC-1:0] per_core_valid_out; wire [NUM_FPC-1:0] per_core_valid_out;
wire [NUM_FPC-1:0] per_core_has_fflags; wire [NUM_FPC-1:0] per_core_has_fflags;
fflags_t [NUM_FPC-1:0] per_core_fflags; fflags_t [NUM_FPC-1:0] per_core_fflags;
wire div_ready_in, sqrt_ready_in; wire div_ready_in, sqrt_ready_in;
wire [NUM_LANES-1:0][`XLEN-1:0] div_result, sqrt_result; wire [NUM_LANES-1:0][`XLEN-1:0] div_result, sqrt_result;
wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out; wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out;
wire div_ready_out, sqrt_ready_out; wire div_ready_out, sqrt_ready_out;
wire div_valid_out, sqrt_valid_out; wire div_valid_out, sqrt_valid_out;
wire div_has_fflags, sqrt_has_fflags; wire div_has_fflags, sqrt_has_fflags;
fflags_t div_fflags, sqrt_fflags; fflags_t div_fflags, sqrt_fflags;
reg [FPC_BITS-1:0] core_select; reg [FPC_BITS-1:0] core_select;
reg is_fadd, is_fsub, is_fmul, is_fmadd, is_fmsub, is_fnmadd, is_fnmsub; reg is_fadd, is_fsub, is_fmul, is_fmadd, is_fmsub, is_fnmadd, is_fnmsub;
reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f; reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f;
reg dst_fmt, int_fmt; reg dst_fmt, int_fmt;
reg [NUM_LANES-1:0][63:0] operands [3]; reg [NUM_LANES-1:0][63:0] operands [3];
always @(*) begin always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin for (integer i = 0; i < NUM_LANES; ++i) begin
operands[0][i] = 64'(dataa[i]); operands[0][i] = 64'(dataa[i]);
@ -92,23 +92,23 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
always @(*) begin always @(*) begin
is_fadd = 0; is_fadd = 0;
is_fsub = 0; is_fsub = 0;
is_fmul = 0; is_fmul = 0;
is_fmadd = 0; is_fmadd = 0;
is_fmsub = 0; is_fmsub = 0;
is_fnmadd = 0; is_fnmadd = 0;
is_fnmsub = 0; is_fnmsub = 0;
is_div = 0; is_div = 0;
is_fcmp = 0; is_fcmp = 0;
is_itof = 0; is_itof = 0;
is_utof = 0; is_utof = 0;
is_ftoi = 0; is_ftoi = 0;
is_ftou = 0; is_ftou = 0;
is_f2f = 0; is_f2f = 0;
dst_fmt = 0; dst_fmt = 0;
int_fmt = 0; int_fmt = 0;
`ifdef FLEN_64 `ifdef FLEN_64
dst_fmt = fmt[0]; dst_fmt = fmt[0];
`endif `endif
@ -132,23 +132,23 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
`INST_FPU_F2U: begin core_select = FPU_CVT; is_ftou = 1; end `INST_FPU_F2U: begin core_select = FPU_CVT; is_ftou = 1; end
`INST_FPU_I2F: begin core_select = FPU_CVT; is_itof = 1; end `INST_FPU_I2F: begin core_select = FPU_CVT; is_itof = 1; end
`INST_FPU_U2F: begin core_select = FPU_CVT; is_utof = 1; end `INST_FPU_U2F: begin core_select = FPU_CVT; is_utof = 1; end
`INST_FPU_F2F: begin core_select = FPU_CVT; is_f2f = 1; end `INST_FPU_F2F: begin core_select = FPU_CVT; is_f2f = 1; end
default: begin core_select = FPU_NCP; end default: begin core_select = FPU_NCP; end
endcase endcase
end end
generate generate
begin : fma begin : fma
reg [NUM_LANES-1:0][`XLEN-1:0] result_fma; reg [NUM_LANES-1:0][`XLEN-1:0] result_fma;
wire [NUM_LANES-1:0][63:0] result_fadd; reg [NUM_LANES-1:0][63:0] result_fadd;
wire [NUM_LANES-1:0][63:0] result_fsub; reg [NUM_LANES-1:0][63:0] result_fsub;
wire [NUM_LANES-1:0][63:0] result_fmul; reg [NUM_LANES-1:0][63:0] result_fmul;
wire [NUM_LANES-1:0][63:0] result_fmadd; reg [NUM_LANES-1:0][63:0] result_fmadd;
wire [NUM_LANES-1:0][63:0] result_fmsub; reg [NUM_LANES-1:0][63:0] result_fmsub;
wire [NUM_LANES-1:0][63:0] result_fnmadd; reg [NUM_LANES-1:0][63:0] result_fnmadd;
wire [NUM_LANES-1:0][63:0] result_fnmsub; reg [NUM_LANES-1:0][63:0] result_fnmsub;
fflags_t [NUM_LANES-1:0] fflags_fma; fflags_t [NUM_LANES-1:0] fflags_fma;
fflags_t [NUM_LANES-1:0] fflags_fadd; fflags_t [NUM_LANES-1:0] fflags_fadd;
fflags_t [NUM_LANES-1:0] fflags_fsub; fflags_t [NUM_LANES-1:0] fflags_fsub;
@ -162,7 +162,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
wire fma_ready = per_core_ready_out[FPU_FMA] || ~per_core_valid_out[FPU_FMA]; wire fma_ready = per_core_ready_out[FPU_FMA] || ~per_core_valid_out[FPU_FMA];
wire fma_fire = fma_valid && fma_ready; wire fma_fire = fma_valid && fma_ready;
always @(*) begin always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_fadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]); dpi_fadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]);
dpi_fsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]); dpi_fsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]);
@ -175,20 +175,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
result_fma[i] = is_fadd ? result_fadd[i][`XLEN-1:0] : result_fma[i] = is_fadd ? result_fadd[i][`XLEN-1:0] :
is_fsub ? result_fsub[i][`XLEN-1:0] : is_fsub ? result_fsub[i][`XLEN-1:0] :
is_fmul ? result_fmul[i][`XLEN-1:0] : is_fmul ? result_fmul[i][`XLEN-1:0] :
is_fmadd ? result_fmadd[i][`XLEN-1:0] : is_fmadd ? result_fmadd[i][`XLEN-1:0] :
is_fmsub ? result_fmsub[i][`XLEN-1:0] : is_fmsub ? result_fmsub[i][`XLEN-1:0] :
is_fnmadd ? result_fnmadd[i][`XLEN-1:0] : is_fnmadd ? result_fnmadd[i][`XLEN-1:0] :
is_fnmsub ? result_fnmsub[i][`XLEN-1:0] : is_fnmsub ? result_fnmsub[i][`XLEN-1:0] :
'0; '0;
fflags_fma[i] = is_fadd ? fflags_fadd[i] : fflags_fma[i] = is_fadd ? fflags_fadd[i] :
is_fsub ? fflags_fsub[i] : is_fsub ? fflags_fsub[i] :
is_fmul ? fflags_fmul[i] : is_fmul ? fflags_fmul[i] :
is_fmadd ? fflags_fmadd[i] : is_fmadd ? fflags_fmadd[i] :
is_fmsub ? fflags_fmsub[i] : is_fmsub ? fflags_fmsub[i] :
is_fnmadd ? fflags_fnmadd[i] : is_fnmadd ? fflags_fnmadd[i] :
is_fnmsub ? fflags_fnmsub[i] : is_fnmsub ? fflags_fnmsub[i] :
'0; '0;
end end
end end
@ -213,19 +213,19 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
end end
endgenerate endgenerate
generate generate
begin : fdiv begin : fdiv
reg [NUM_LANES-1:0][`XLEN-1:0] result_fdiv_r; reg [NUM_LANES-1:0][`XLEN-1:0] result_fdiv_r;
wire [NUM_LANES-1:0][63:0] result_fdiv; reg [NUM_LANES-1:0][63:0] result_fdiv;
fflags_t [NUM_LANES-1:0] fflags_fdiv; fflags_t [NUM_LANES-1:0] fflags_fdiv;
wire fdiv_valid = (valid_in && core_select == FPU_DIVSQRT) && is_div; wire fdiv_valid = (valid_in && core_select == FPU_DIVSQRT) && is_div;
wire fdiv_ready = div_ready_out || ~div_valid_out; wire fdiv_ready = div_ready_out || ~div_valid_out;
wire fdiv_fire = fdiv_valid && fdiv_ready; wire fdiv_fire = fdiv_valid && fdiv_ready;
always @(*) begin always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_fdiv (fdiv_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]); dpi_fdiv (fdiv_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]);
result_fdiv_r[i] = result_fdiv[i][`XLEN-1:0]; result_fdiv_r[i] = result_fdiv[i][`XLEN-1:0];
end end
@ -252,18 +252,18 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
end end
endgenerate endgenerate
generate generate
begin : fsqrt begin : fsqrt
reg [NUM_LANES-1:0][`XLEN-1:0] result_fsqrt_r; reg [NUM_LANES-1:0][`XLEN-1:0] result_fsqrt_r;
wire [NUM_LANES-1:0][63:0] result_fsqrt; reg [NUM_LANES-1:0][63:0] result_fsqrt;
fflags_t [NUM_LANES-1:0] fflags_fsqrt; fflags_t [NUM_LANES-1:0] fflags_fsqrt;
wire fsqrt_valid = (valid_in && core_select == FPU_DIVSQRT) && ~is_div; wire fsqrt_valid = (valid_in && core_select == FPU_DIVSQRT) && ~is_div;
wire fsqrt_ready = sqrt_ready_out || ~sqrt_valid_out; wire fsqrt_ready = sqrt_ready_out || ~sqrt_valid_out;
wire fsqrt_fire = fsqrt_valid && fsqrt_ready; wire fsqrt_fire = fsqrt_valid && fsqrt_ready;
always @(*) begin always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_fsqrt (fsqrt_fire, int'(dst_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]); dpi_fsqrt (fsqrt_fire, int'(dst_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]);
result_fsqrt_r[i] = result_fsqrt[i][`XLEN-1:0]; result_fsqrt_r[i] = result_fsqrt[i][`XLEN-1:0];
@ -295,12 +295,12 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
begin : fcvt begin : fcvt
reg [NUM_LANES-1:0][`XLEN-1:0] result_fcvt; reg [NUM_LANES-1:0][`XLEN-1:0] result_fcvt;
wire [NUM_LANES-1:0][63:0] result_itof; reg [NUM_LANES-1:0][63:0] result_itof;
wire [NUM_LANES-1:0][63:0] result_utof; reg [NUM_LANES-1:0][63:0] result_utof;
wire [NUM_LANES-1:0][63:0] result_ftoi; reg [NUM_LANES-1:0][63:0] result_ftoi;
wire [NUM_LANES-1:0][63:0] result_ftou; reg [NUM_LANES-1:0][63:0] result_ftou;
wire [NUM_LANES-1:0][63:0] result_f2f; reg [NUM_LANES-1:0][63:0] result_f2f;
fflags_t [NUM_LANES-1:0] fflags_fcvt; fflags_t [NUM_LANES-1:0] fflags_fcvt;
fflags_t [NUM_LANES-1:0] fflags_itof; fflags_t [NUM_LANES-1:0] fflags_itof;
fflags_t [NUM_LANES-1:0] fflags_utof; fflags_t [NUM_LANES-1:0] fflags_utof;
@ -310,20 +310,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
wire fcvt_valid = (valid_in && core_select == FPU_CVT); wire fcvt_valid = (valid_in && core_select == FPU_CVT);
wire fcvt_ready = per_core_ready_out[FPU_CVT] || ~per_core_valid_out[FPU_CVT]; wire fcvt_ready = per_core_ready_out[FPU_CVT] || ~per_core_valid_out[FPU_CVT];
wire fcvt_fire = fcvt_valid && fcvt_ready; wire fcvt_fire = fcvt_valid && fcvt_ready;
always @(*) begin always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_itof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]); dpi_itof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]);
dpi_utof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]); dpi_utof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]);
dpi_ftoi (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]); dpi_ftoi (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]);
dpi_ftou (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]); dpi_ftou (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]);
dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]); dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]);
result_fcvt[i] = is_itof ? result_itof[i][`XLEN-1:0] : result_fcvt[i] = is_itof ? result_itof[i][`XLEN-1:0] :
is_utof ? result_utof[i][`XLEN-1:0] : is_utof ? result_utof[i][`XLEN-1:0] :
is_ftoi ? result_ftoi[i][`XLEN-1:0] : is_ftoi ? result_ftoi[i][`XLEN-1:0] :
is_ftou ? result_ftou[i][`XLEN-1:0] : is_ftou ? result_ftou[i][`XLEN-1:0] :
is_f2f ? result_f2f[i][`XLEN-1:0] : is_f2f ? result_f2f[i][`XLEN-1:0] :
'0; '0;
fflags_fcvt[i] = is_itof ? fflags_itof[i] : fflags_fcvt[i] = is_itof ? fflags_itof[i] :
@ -355,19 +355,19 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
end end
endgenerate endgenerate
generate generate
begin : fncp begin : fncp
reg [NUM_LANES-1:0][`XLEN-1:0] result_fncp; reg [NUM_LANES-1:0][`XLEN-1:0] result_fncp;
wire [NUM_LANES-1:0][63:0] result_fclss; reg [NUM_LANES-1:0][63:0] result_fclss;
wire [NUM_LANES-1:0][63:0] result_flt; reg [NUM_LANES-1:0][63:0] result_flt;
wire [NUM_LANES-1:0][63:0] result_fle; reg [NUM_LANES-1:0][63:0] result_fle;
wire [NUM_LANES-1:0][63:0] result_feq; reg [NUM_LANES-1:0][63:0] result_feq;
wire [NUM_LANES-1:0][63:0] result_fmin; reg [NUM_LANES-1:0][63:0] result_fmin;
wire [NUM_LANES-1:0][63:0] result_fmax; reg [NUM_LANES-1:0][63:0] result_fmax;
wire [NUM_LANES-1:0][63:0] result_fsgnj; reg [NUM_LANES-1:0][63:0] result_fsgnj;
wire [NUM_LANES-1:0][63:0] result_fsgnjn; reg [NUM_LANES-1:0][63:0] result_fsgnjn;
wire [NUM_LANES-1:0][63:0] result_fsgnjx; reg [NUM_LANES-1:0][63:0] result_fsgnjx;
reg [NUM_LANES-1:0][63:0] result_fmvx; reg [NUM_LANES-1:0][63:0] result_fmvx;
reg [NUM_LANES-1:0][63:0] result_fmvf; reg [NUM_LANES-1:0][63:0] result_fmvf;
@ -381,15 +381,15 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
wire fncp_valid = (valid_in && core_select == FPU_NCP); wire fncp_valid = (valid_in && core_select == FPU_NCP);
wire fncp_ready = per_core_ready_out[FPU_NCP] || ~per_core_valid_out[FPU_NCP]; wire fncp_ready = per_core_ready_out[FPU_NCP] || ~per_core_valid_out[FPU_NCP];
wire fncp_fire = fncp_valid && fncp_ready; wire fncp_fire = fncp_valid && fncp_ready;
always @(*) begin always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_fclss (fncp_fire, int'(dst_fmt), operands[0][i], result_fclss[i]); dpi_fclss (fncp_fire, int'(dst_fmt), operands[0][i], result_fclss[i]);
dpi_fle (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]); dpi_fle (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]);
dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]); dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]);
dpi_feq (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]); dpi_feq (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]);
dpi_fmin (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]); dpi_fmin (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]);
dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]); dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]);
dpi_fsgnj (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnj[i]); dpi_fsgnj (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnj[i]);
dpi_fsgnjn (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]); dpi_fsgnjn (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]);
dpi_fsgnjx (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]); dpi_fsgnjx (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]);
@ -431,7 +431,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
.data_in ({fncp_valid, tag_in, has_fflags_fncp, result_fncp, fflags_merged}), .data_in ({fncp_valid, tag_in, has_fflags_fncp, result_fncp, fflags_merged}),
.data_out ({per_core_valid_out[FPU_NCP], per_core_tag_out[FPU_NCP], per_core_has_fflags[FPU_NCP], per_core_result[FPU_NCP], per_core_fflags[FPU_NCP]}) .data_out ({per_core_valid_out[FPU_NCP], per_core_tag_out[FPU_NCP], per_core_has_fflags[FPU_NCP], per_core_result[FPU_NCP], per_core_fflags[FPU_NCP]})
); );
assign per_core_ready_in[FPU_NCP] = fncp_ready; assign per_core_ready_in[FPU_NCP] = fncp_ready;
end end
@ -443,15 +443,15 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (2), .NUM_INPUTS (2),
.DATAW (RSP_DATAW), .DATAW (RSP_DATAW),
.ARBITER ("R"), .ARBITER ("R"),
.OUT_BUF (0) .OUT_BUF (0)
) div_sqrt_arb ( ) div_sqrt_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in ({sqrt_valid_out, div_valid_out}), .valid_in ({sqrt_valid_out, div_valid_out}),
.ready_in ({sqrt_ready_out, div_ready_out}), .ready_in ({sqrt_ready_out, div_ready_out}),
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out}, .data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
{div_result, div_has_fflags, div_fflags, div_tag_out}}), {div_result, div_has_fflags, div_fflags, div_tag_out}}),
.data_out ({per_core_result[FPU_DIVSQRT], per_core_has_fflags[FPU_DIVSQRT], per_core_fflags[FPU_DIVSQRT], per_core_tag_out[FPU_DIVSQRT]}), .data_out ({per_core_result[FPU_DIVSQRT], per_core_has_fflags[FPU_DIVSQRT], per_core_fflags[FPU_DIVSQRT], per_core_tag_out[FPU_DIVSQRT]}),
.valid_out (per_core_valid_out[FPU_DIVSQRT]), .valid_out (per_core_valid_out[FPU_DIVSQRT]),
@ -469,13 +469,13 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (NUM_FPC), .NUM_INPUTS (NUM_FPC),
.DATAW (RSP_DATAW), .DATAW (RSP_DATAW),
.ARBITER ("R"), .ARBITER ("F"),
.OUT_BUF (OUT_BUF) .OUT_BUF (OUT_BUF)
) rsp_arb ( ) rsp_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (per_core_valid_out), .valid_in (per_core_valid_out),
.ready_in (per_core_ready_out), .ready_in (per_core_ready_out),
.data_in (per_core_data_out), .data_in (per_core_data_out),
.data_out ({result, has_fflags, fflags, tag_out}), .data_out ({result, has_fflags, fflags, tag_out}),

View file

@ -289,14 +289,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
end end
wire [NUM_LANES-1:0][31:0] result_s; wire [NUM_LANES-1:0][31:0] result_s;
wire [1:0] op_ret_int_out; wire [1:0] op_ret_int_out;
`UNUSED_VAR (op_ret_int_out) `UNUSED_VAR (op_ret_int_out)
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (NUM_FPC), .NUM_INPUTS (NUM_FPC),
.DATAW (RSP_DATAW + 2), .DATAW (RSP_DATAW + 2),
.ARBITER ("R"), .ARBITER ("F"),
.OUT_BUF (OUT_BUF) .OUT_BUF (OUT_BUF)
) rsp_arb ( ) rsp_arb (
.clk (clk), .clk (clk),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -21,7 +21,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
parameter TAG_WIDTH = 1 parameter TAG_WIDTH = 1
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
output wire ready_in, output wire ready_in,
input wire valid_in, input wire valid_in,
@ -29,7 +29,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0] mask_in, input wire [NUM_LANES-1:0] mask_in,
input wire [TAG_WIDTH-1:0] tag_in, input wire [TAG_WIDTH-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm, input wire [`INST_FRM_BITS-1:0] frm,
input wire is_madd, input wire is_madd,
@ -39,7 +39,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0][31:0] dataa, input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab, input wire [NUM_LANES-1:0][31:0] datab,
input wire [NUM_LANES-1:0][31:0] datac, input wire [NUM_LANES-1:0][31:0] datac,
output wire [NUM_LANES-1:0][31:0] result, output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags, output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags, output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -52,11 +52,11 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
`UNUSED_VAR (frm) `UNUSED_VAR (frm)
wire [NUM_LANES-1:0][3*32-1:0] data_in; wire [NUM_LANES-1:0][3*32-1:0] data_in;
wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
wire pe_enable; wire pe_enable;
wire [NUM_PES-1:0][3*32-1:0] pe_data_in; wire [NUM_PES-1:0][3*32-1:0] pe_data_in;
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
@ -66,7 +66,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
always @(*) begin always @(*) begin
if (is_madd) begin if (is_madd) begin
// MADD / MSUB / NMADD / NMSUB // MADD / MSUB / NMADD / NMSUB
a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i]; a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
b[i] = datab[i]; b[i] = datab[i];
c[i] = (is_neg ^ is_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i]; c[i] = (is_neg ^ is_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i];
end else begin end else begin
@ -81,7 +81,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
b[i] = dataa[i]; b[i] = dataa[i];
c[i] = is_sub ? {~datab[i][31], datab[i][30:0]} : datab[i]; c[i] = is_sub ? {~datab[i][31], datab[i][30:0]} : datab[i];
end end
end end
end end
end end
@ -90,15 +90,15 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
assign data_in[i][32 +: 32] = b[i]; assign data_in[i][32 +: 32] = b[i];
assign data_in[i][64 +: 32] = c[i]; assign data_in[i][64 +: 32] = c[i];
end end
VX_pe_serializer #( VX_pe_serializer #(
.NUM_LANES (NUM_LANES), .NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES), .NUM_PES (NUM_PES),
.LATENCY (`LATENCY_FMA), .LATENCY (`LATENCY_FMA),
.DATA_IN_WIDTH(3*32), .DATA_IN_WIDTH(3*32),
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
.TAG_WIDTH (NUM_LANES + TAG_WIDTH), .TAG_WIDTH (NUM_LANES + TAG_WIDTH),
.PE_REG (1) .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0)
) pe_serializer ( ) pe_serializer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -123,7 +123,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
fflags_t [NUM_LANES-1:0] per_lane_fflags; fflags_t [NUM_LANES-1:0] per_lane_fflags;
`ifdef QUARTUS `ifdef QUARTUS
for (genvar i = 0; i < NUM_PES; ++i) begin for (genvar i = 0; i < NUM_PES; ++i) begin
acl_fmadd fmadd ( acl_fmadd fmadd (
.clk (clk), .clk (clk),
@ -136,7 +136,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
); );
assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x; assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x;
end end
assign has_fflags = 0; assign has_fflags = 0;
assign per_lane_fflags = 'x; assign per_lane_fflags = 'x;
@ -144,7 +144,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
for (genvar i = 0; i < NUM_PES; ++i) begin for (genvar i = 0; i < NUM_PES; ++i) begin
wire [2:0] tuser; wire [2:0] tuser;
xil_fma fma ( xil_fma fma (
.aclk (clk), .aclk (clk),
.aclken (pe_enable), .aclken (pe_enable),
@ -172,15 +172,15 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
`UNUSED_VAR (r) `UNUSED_VAR (r)
fflags_t f; fflags_t f;
always @(*) begin always @(*) begin
dpi_fmadd ( dpi_fmadd (
pe_enable, pe_enable,
int'(0), int'(0),
{32'hffffffff, pe_data_in[i][0 +: 32]}, {32'hffffffff, pe_data_in[i][0 +: 32]},
{32'hffffffff, pe_data_in[i][32 +: 32]}, {32'hffffffff, pe_data_in[i][32 +: 32]},
{32'hffffffff, pe_data_in[i][64 +: 32]}, {32'hffffffff, pe_data_in[i][64 +: 32]},
frm, frm,
r, r,
f f
); );
end end

View file

@ -105,7 +105,7 @@ module VX_fpu_fpnew
`UNUSED_VAR (fmt) `UNUSED_VAR (fmt)
always @(*) begin always @(*) begin
fpu_op = 'x; fpu_op = fpnew_pkg::operation_e'('x);
fpu_rnd = frm; fpu_rnd = frm;
fpu_op_mod = 0; fpu_op_mod = 0;
fpu_has_fflags = 1; fpu_has_fflags = 1;

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,17 +15,14 @@
interface VX_commit_sched_if (); interface VX_commit_sched_if ();
wire [`ISSUE_WIDTH-1:0] committed; wire [`NUM_WARPS-1:0] committed_warps;
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] committed_wid;
modport master ( modport master (
output committed, output committed_warps
output committed_wid
); );
modport slave ( modport slave (
input committed, input committed_warps
input committed_wid
); );
endinterface endinterface

View file

@ -13,11 +13,14 @@
`include "VX_define.vh" `include "VX_define.vh"
interface VX_decode_if import VX_gpu_pkg::*; (); interface VX_decode_if import VX_gpu_pkg::*; #(
parameter NUM_WARPS = `NUM_WARPS,
parameter NW_WIDTH = `LOG2UP(NUM_WARPS)
);
typedef struct packed { typedef struct packed {
logic [`UUID_WIDTH-1:0] uuid; logic [`UUID_WIDTH-1:0] uuid;
logic [`NW_WIDTH-1:0] wid; logic [NW_WIDTH-1:0] wid;
logic [`NUM_THREADS-1:0] tmask; logic [`NUM_THREADS-1:0] tmask;
logic [`PC_BITS-1:0] PC; logic [`PC_BITS-1:0] PC;
logic [`EX_BITS-1:0] ex_type; logic [`EX_BITS-1:0] ex_type;
@ -34,7 +37,7 @@ interface VX_decode_if import VX_gpu_pkg::*; ();
data_t data; data_t data;
logic ready; logic ready;
`ifndef L1_ENABLE `ifndef L1_ENABLE
wire [`NUM_WARPS-1:0] ibuf_pop; wire [NUM_WARPS-1:0] ibuf_pop;
`endif `endif
modport master ( modport master (

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -13,39 +13,29 @@
`include "VX_define.vh" `include "VX_define.vh"
interface VX_pipeline_perf_if (); interface VX_pipeline_perf_if import VX_gpu_pkg::*; ();
wire [`PERF_CTR_BITS-1:0] sched_idles; sched_perf_t sched;
wire [`PERF_CTR_BITS-1:0] sched_stalls; issue_perf_t issue;
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
wire [`PERF_CTR_BITS-1:0] scb_stalls;
wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS];
wire [`PERF_CTR_BITS-1:0] sfu_uses [`NUM_SFU_UNITS];
wire [`PERF_CTR_BITS-1:0] ifetches; wire [`PERF_CTR_BITS-1:0] ifetches;
wire [`PERF_CTR_BITS-1:0] loads; wire [`PERF_CTR_BITS-1:0] loads;
wire [`PERF_CTR_BITS-1:0] stores; wire [`PERF_CTR_BITS-1:0] stores;
wire [`PERF_CTR_BITS-1:0] ifetch_latency; wire [`PERF_CTR_BITS-1:0] ifetch_latency;
wire [`PERF_CTR_BITS-1:0] load_latency; wire [`PERF_CTR_BITS-1:0] load_latency;
modport schedule ( modport master (
output sched_idles, output sched,
output sched_stalls output issue,
); output ifetches,
output loads,
modport issue ( output stores,
output ibf_stalls, output ifetch_latency,
output scb_stalls, output load_latency
output units_uses,
output sfu_uses
); );
modport slave ( modport slave (
input sched_idles, input sched,
input sched_stalls, input issue,
input ibf_stalls,
input scb_stalls,
input units_uses,
input sfu_uses,
input ifetches, input ifetches,
input loads, input loads,
input stores, input stores,

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,11 +14,11 @@
`include "VX_define.vh" `include "VX_define.vh"
`TRACING_OFF `TRACING_OFF
module VX_avs_adapter #( module VX_avs_adapter #(
parameter DATA_WIDTH = 1, parameter DATA_WIDTH = 1,
parameter ADDR_WIDTH = 1, parameter ADDR_WIDTH = 1,
parameter BURST_WIDTH = 1, parameter BURST_WIDTH = 1,
parameter NUM_BANKS = 1, parameter NUM_BANKS = 1,
parameter TAG_WIDTH = 1, parameter TAG_WIDTH = 1,
parameter RD_QUEUE_SIZE = 1, parameter RD_QUEUE_SIZE = 1,
parameter REQ_OUT_BUF = 0, parameter REQ_OUT_BUF = 0,
@ -29,15 +29,15 @@ module VX_avs_adapter #(
// Memory request // Memory request
input wire mem_req_valid, input wire mem_req_valid,
input wire mem_req_rw, input wire mem_req_rw,
input wire [DATA_WIDTH/8-1:0] mem_req_byteen, input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
input wire [ADDR_WIDTH-1:0] mem_req_addr, input wire [ADDR_WIDTH-1:0] mem_req_addr,
input wire [DATA_WIDTH-1:0] mem_req_data, input wire [DATA_WIDTH-1:0] mem_req_data,
input wire [TAG_WIDTH-1:0] mem_req_tag, input wire [TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_ready, output wire mem_req_ready,
// Memory response // Memory response
output wire mem_rsp_valid, output wire mem_rsp_valid,
output wire [DATA_WIDTH-1:0] mem_rsp_data, output wire [DATA_WIDTH-1:0] mem_rsp_data,
output wire [TAG_WIDTH-1:0] mem_rsp_tag, output wire [TAG_WIDTH-1:0] mem_rsp_tag,
input wire mem_rsp_ready, input wire mem_rsp_ready,
@ -60,7 +60,7 @@ module VX_avs_adapter #(
localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS; localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS;
// Requests handling ////////////////////////////////////////////////////// // Requests handling //////////////////////////////////////////////////////
wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop; wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out;
wire [NUM_BANKS-1:0] req_queue_going_full; wire [NUM_BANKS-1:0] req_queue_going_full;
@ -70,38 +70,40 @@ module VX_avs_adapter #(
wire [NUM_BANKS-1:0] bank_req_ready; wire [NUM_BANKS-1:0] bank_req_ready;
if (NUM_BANKS > 1) begin if (NUM_BANKS > 1) begin
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
end else begin end else begin
assign req_bank_sel = '0; assign req_bank_sel = '0;
end end
assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS]; assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS];
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i); assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i);
end end
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
VX_pending_size #( VX_pending_size #(
.SIZE (RD_QUEUE_SIZE) .SIZE (RD_QUEUE_SIZE)
) pending_size ( ) pending_size (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.incr (req_queue_push[i]), .incr (req_queue_push[i]),
.decr (req_queue_pop[i]), .decr (req_queue_pop[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (alm_empty),
.full (req_queue_going_full[i]), .full (req_queue_going_full[i]),
.size (req_queue_size[i]), `UNUSED_PIN (alm_full),
`UNUSED_PIN (empty) .size (req_queue_size[i])
); );
`UNUSED_VAR (req_queue_size) `UNUSED_VAR (req_queue_size)
VX_fifo_queue #( VX_fifo_queue #(
.DATAW (TAG_WIDTH), .DATAW (TAG_WIDTH),
.DEPTH (RD_QUEUE_SIZE) .DEPTH (RD_QUEUE_SIZE)
) rd_req_queue ( ) rd_req_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.push (req_queue_push[i]), .push (req_queue_push[i]),
.pop (req_queue_pop[i]), .pop (req_queue_pop[i]),
.data_in (mem_req_tag), .data_in (mem_req_tag),
.data_out (req_queue_tag_out[i]), .data_out (req_queue_tag_out[i]),
@ -111,9 +113,9 @@ module VX_avs_adapter #(
`UNUSED_PIN (alm_full), `UNUSED_PIN (alm_full),
`UNUSED_PIN (size) `UNUSED_PIN (size)
); );
end end
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
wire valid_out; wire valid_out;
wire rw_out; wire rw_out;
wire [DATA_SIZE-1:0] byteen_out; wire [DATA_SIZE-1:0] byteen_out;
@ -174,7 +176,7 @@ module VX_avs_adapter #(
.reset (reset), .reset (reset),
.push (avs_readdatavalid[i]), .push (avs_readdatavalid[i]),
.pop (req_queue_pop[i]), .pop (req_queue_pop[i]),
.data_in (avs_readdata[i]), .data_in (avs_readdata[i]),
.data_out (rsp_queue_data_out[i]), .data_out (rsp_queue_data_out[i]),
.empty (rsp_queue_empty[i]), .empty (rsp_queue_empty[i]),
`UNUSED_PIN (full), `UNUSED_PIN (full),
@ -183,7 +185,7 @@ module VX_avs_adapter #(
`UNUSED_PIN (size) `UNUSED_PIN (size)
); );
end end
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign rsp_arb_valid_in[i] = !rsp_queue_empty[i]; assign rsp_arb_valid_in[i] = !rsp_queue_empty[i];
assign rsp_arb_data_in[i] = {rsp_queue_data_out[i], req_queue_tag_out[i]}; assign rsp_arb_data_in[i] = {rsp_queue_data_out[i], req_queue_tag_out[i]};

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,22 +16,21 @@
`TRACING_OFF `TRACING_OFF
module VX_cyclic_arbiter #( module VX_cyclic_arbiter #(
parameter NUM_REQS = 1, parameter NUM_REQS = 1,
parameter LOCK_ENABLE = 0,
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire [NUM_REQS-1:0] requests, input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index, output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot, output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid, output wire grant_valid,
input wire grant_unlock input wire grant_ready
); );
if (NUM_REQS == 1) begin if (NUM_REQS == 1) begin
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
assign grant_index = '0; assign grant_index = '0;
assign grant_onehot = requests; assign grant_onehot = requests;
assign grant_valid = requests[0]; assign grant_valid = requests[0];
@ -45,10 +44,10 @@ module VX_cyclic_arbiter #(
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
grant_index_r <= '0; grant_index_r <= '0;
end else begin end else begin
if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin
grant_index_r <= '0; grant_index_r <= '0;
end else if (!LOCK_ENABLE || ~grant_valid || grant_unlock) begin end else if (~grant_valid || grant_ready) begin
grant_index_r <= grant_index_r + LOG_NUM_REQS'(1); grant_index_r <= grant_index_r + LOG_NUM_REQS'(1);
end end
end end
@ -60,11 +59,11 @@ module VX_cyclic_arbiter #(
grant_onehot_r[grant_index_r] = 1'b1; grant_onehot_r[grant_index_r] = 1'b1;
end end
assign grant_index = grant_index_r; assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = requests[grant_index_r]; assign grant_valid = requests[grant_index_r];
end end
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,20 +17,21 @@
module VX_dp_ram #( module VX_dp_ram #(
parameter DATAW = 1, parameter DATAW = 1,
parameter SIZE = 1, parameter SIZE = 1,
parameter ADDR_MIN = 0,
parameter WRENW = 1, parameter WRENW = 1,
parameter OUT_REG = 0, parameter OUT_REG = 0,
parameter NO_RWCHECK = 0, parameter NO_RWCHECK = 0,
parameter LUTRAM = 0, parameter LUTRAM = 0,
parameter INIT_ENABLE = 0, parameter INIT_ENABLE = 0,
parameter INIT_FILE = "", parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0, parameter [DATAW-1:0] INIT_VALUE = 0,
parameter ADDRW = `LOG2UP(SIZE) parameter ADDRW = `LOG2UP(SIZE)
) ( ) (
input wire clk, input wire clk,
input wire read, input wire read,
input wire write, input wire write,
input wire [WRENW-1:0] wren, input wire [WRENW-1:0] wren,
input wire [ADDRW-1:0] waddr, input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata, input wire [DATAW-1:0] wdata,
input wire [ADDRW-1:0] raddr, input wire [ADDRW-1:0] raddr,
output wire [DATAW-1:0] rdata output wire [DATAW-1:0] rdata
@ -48,16 +49,16 @@ module VX_dp_ram #(
ram[i] = INIT_VALUE; \ ram[i] = INIT_VALUE; \
end \ end \
end end
`UNUSED_VAR (read) `UNUSED_VAR (read)
`ifdef SYNTHESIS `ifdef SYNTHESIS
if (WRENW > 1) begin if (WRENW > 1) begin
`ifdef QUARTUS `ifdef QUARTUS
if (LUTRAM != 0) begin if (LUTRAM != 0) begin
if (OUT_REG != 0) begin if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r; reg [DATAW-1:0] rdata_r;
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0]; `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
@ -72,7 +73,7 @@ module VX_dp_ram #(
end end
assign rdata = rdata_r; assign rdata = rdata_r;
end else begin end else begin
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0]; `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
@ -87,7 +88,7 @@ module VX_dp_ram #(
end else begin end else begin
if (OUT_REG != 0) begin if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r; reg [DATAW-1:0] rdata_r;
reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0]; reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
@ -103,7 +104,7 @@ module VX_dp_ram #(
assign rdata = rdata_r; assign rdata = rdata_r;
end else begin end else begin
if (NO_RWCHECK != 0) begin if (NO_RWCHECK != 0) begin
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0]; `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
@ -115,7 +116,7 @@ module VX_dp_ram #(
end end
assign rdata = ram[raddr]; assign rdata = ram[raddr];
end else begin end else begin
reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0]; reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
@ -132,9 +133,9 @@ module VX_dp_ram #(
`else `else
// default synthesis // default synthesis
if (LUTRAM != 0) begin if (LUTRAM != 0) begin
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0]; `USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
if (OUT_REG != 0) begin if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r; reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
@ -161,7 +162,7 @@ module VX_dp_ram #(
end end
end else begin end else begin
if (OUT_REG != 0) begin if (OUT_REG != 0) begin
reg [DATAW-1:0] ram [SIZE-1:0]; reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
reg [DATAW-1:0] rdata_r; reg [DATAW-1:0] rdata_r;
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
@ -178,7 +179,7 @@ module VX_dp_ram #(
assign rdata = rdata_r; assign rdata = rdata_r;
end else begin end else begin
if (NO_RWCHECK != 0) begin if (NO_RWCHECK != 0) begin
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0]; `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
@ -190,7 +191,7 @@ module VX_dp_ram #(
end end
assign rdata = ram[raddr]; assign rdata = ram[raddr];
end else begin end else begin
reg [DATAW-1:0] ram [SIZE-1:0]; reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
@ -208,9 +209,9 @@ module VX_dp_ram #(
end else begin end else begin
// (WRENW == 1) // (WRENW == 1)
if (LUTRAM != 0) begin if (LUTRAM != 0) begin
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0]; `USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
if (OUT_REG != 0) begin if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r; reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
@ -231,7 +232,7 @@ module VX_dp_ram #(
end end
end else begin end else begin
if (OUT_REG != 0) begin if (OUT_REG != 0) begin
reg [DATAW-1:0] ram [SIZE-1:0]; reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
reg [DATAW-1:0] rdata_r; reg [DATAW-1:0] rdata_r;
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
@ -245,7 +246,7 @@ module VX_dp_ram #(
assign rdata = rdata_r; assign rdata = rdata_r;
end else begin end else begin
if (NO_RWCHECK != 0) begin if (NO_RWCHECK != 0) begin
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0]; `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
@ -254,7 +255,7 @@ module VX_dp_ram #(
end end
assign rdata = ram[raddr]; assign rdata = ram[raddr];
end else begin end else begin
reg [DATAW-1:0] ram [SIZE-1:0]; reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
@ -265,10 +266,10 @@ module VX_dp_ram #(
end end
end end
end end
end end
`else `else
// RAM emulation // RAM emulation
reg [DATAW-1:0] ram [SIZE-1:0]; reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
wire [DATAW-1:0] ram_n; wire [DATAW-1:0] ram_n;
@ -276,8 +277,8 @@ module VX_dp_ram #(
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
end end
if (OUT_REG != 0) begin if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r; reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
ram[waddr] <= ram_n; ram[waddr] <= ram_n;
@ -287,7 +288,7 @@ module VX_dp_ram #(
end end
end end
assign rdata = rdata_r; assign rdata = rdata_r;
end else begin end else begin
reg [DATAW-1:0] prev_data; reg [DATAW-1:0] prev_data;
reg [ADDRW-1:0] prev_waddr; reg [ADDRW-1:0] prev_waddr;
reg prev_write; reg prev_write;
@ -298,7 +299,7 @@ module VX_dp_ram #(
prev_write <= (| wren); prev_write <= (| wren);
prev_data <= ram[waddr]; prev_data <= ram[waddr];
prev_waddr <= waddr; prev_waddr <= waddr;
end end
if (LUTRAM || !NO_RWCHECK) begin if (LUTRAM || !NO_RWCHECK) begin
`UNUSED_VAR (prev_write) `UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data) `UNUSED_VAR (prev_data)

View file

@ -0,0 +1,115 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_platform.vh"
`TRACING_OFF
module VX_dp_ram_rst #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter ADDR_MIN = 0,
parameter WRENW = 1,
parameter OUT_REG = 0,
parameter NO_RWCHECK = 0,
parameter LUTRAM = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0,
parameter ADDRW = `LOG2UP(SIZE)
) (
input wire clk,
input wire reset,
input wire read,
input wire write,
input wire [WRENW-1:0] wren,
input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata,
input wire [ADDRW-1:0] raddr,
output wire [DATAW-1:0] rdata
);
localparam WSELW = DATAW / WRENW;
`STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter"))
`define RAM_INITIALIZATION \
if (INIT_ENABLE != 0) begin \
if (INIT_FILE != "") begin \
initial $readmemh(INIT_FILE, ram); \
end else begin \
initial \
for (integer i = 0; i < SIZE; ++i) \
ram[i] = INIT_VALUE; \
end \
end
`UNUSED_VAR (read)
// RAM emulation
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
wire [DATAW-1:0] ram_n;
for (genvar i = 0; i < WRENW; ++i) begin
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
end
if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (reset) begin
for (integer i = 0; i < SIZE; ++i) begin
ram[i] <= DATAW'(INIT_VALUE);
end
rdata_r <= '0;
end else begin
if (write) begin
ram[waddr] <= ram_n;
end
if (read) begin
rdata_r <= ram[raddr];
end
end
end
assign rdata = rdata_r;
end else begin
reg [DATAW-1:0] prev_data;
reg [ADDRW-1:0] prev_waddr;
reg prev_write;
always @(posedge clk) begin
if (reset) begin
for (integer i = 0; i < SIZE; ++i) begin
ram[i] <= DATAW'(INIT_VALUE);
end
prev_write <= 0;
prev_data <= '0;
prev_waddr <= '0;
end else begin
if (write) begin
ram[waddr] <= ram_n;
end
prev_write <= (| wren);
prev_data <= ram[waddr];
prev_waddr <= waddr;
end
end
if (LUTRAM || !NO_RWCHECK) begin
`UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data)
`UNUSED_VAR (prev_waddr)
assign rdata = ram[raddr];
end else begin
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
end
end
endmodule
`TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -19,14 +19,14 @@ module VX_elastic_buffer #(
parameter SIZE = 1, parameter SIZE = 1,
parameter OUT_REG = 0, parameter OUT_REG = 0,
parameter LUTRAM = 0 parameter LUTRAM = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire valid_in, input wire valid_in,
output wire ready_in, output wire ready_in,
input wire [DATAW-1:0] data_in, input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out, output wire [DATAW-1:0] data_out,
input wire ready_out, input wire ready_out,
output wire valid_out output wire valid_out
@ -55,7 +55,7 @@ module VX_elastic_buffer #(
.ready_out (ready_out) .ready_out (ready_out)
); );
end else if (SIZE == 2) begin end else if (SIZE == 2 && LUTRAM == 0) begin
VX_skid_buffer #( VX_skid_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
@ -71,9 +71,9 @@ module VX_elastic_buffer #(
.data_out (data_out), .data_out (data_out),
.ready_out (ready_out) .ready_out (ready_out)
); );
end else begin end else begin
wire empty, full; wire empty, full;
wire [DATAW-1:0] data_out_t; wire [DATAW-1:0] data_out_t;
@ -93,7 +93,7 @@ module VX_elastic_buffer #(
.push (push), .push (push),
.pop (pop), .pop (pop),
.data_in(data_in), .data_in(data_in),
.data_out(data_out_t), .data_out(data_out_t),
.empty (empty), .empty (empty),
.full (full), .full (full),
`UNUSED_PIN (alm_empty), `UNUSED_PIN (alm_empty),
@ -105,15 +105,15 @@ module VX_elastic_buffer #(
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (OUT_REG == 2) .SIZE ((OUT_REG == 2) ? 1 : 0)
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (~empty), .valid_in (~empty),
.data_in (data_out_t), .data_in (data_out_t),
.ready_in (ready_out_t), .ready_in (ready_out_t),
.valid_out (valid_out), .valid_out (valid_out),
.data_out (data_out), .data_out (data_out),
.ready_out (ready_out) .ready_out (ready_out)
); );

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,53 +16,52 @@
`TRACING_OFF `TRACING_OFF
module VX_fair_arbiter #( module VX_fair_arbiter #(
parameter NUM_REQS = 1, parameter NUM_REQS = 1,
parameter LOCK_ENABLE = 0,
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire [NUM_REQS-1:0] requests, input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index, output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot, output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid, output wire grant_valid,
input wire grant_unlock input wire grant_ready
); );
if (NUM_REQS == 1) begin if (NUM_REQS == 1) begin
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
`UNUSED_VAR (grant_unlock) `UNUSED_VAR (grant_ready)
assign grant_index = '0; assign grant_index = '0;
assign grant_onehot = requests; assign grant_onehot = requests;
assign grant_valid = requests[0]; assign grant_valid = requests[0];
end else begin end else begin
reg [NUM_REQS-1:0] buffer; reg [NUM_REQS-1:0] grant_mask;
wire [NUM_REQS-1:0] buffer_qual = buffer & requests; wire [NUM_REQS-1:0] requests_rem = requests & ~grant_mask;
wire [NUM_REQS-1:0] requests_qual = (| buffer) ? buffer_qual : requests; wire rem_valid = (| requests_rem);
wire [NUM_REQS-1:0] buffer_n = requests_qual & ~grant_onehot; wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_rem : requests;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
buffer <= '0; grant_mask <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin end else if (grant_ready) begin
buffer <= buffer_n; grant_mask <= rem_valid ? (grant_mask | grant_onehot) : grant_onehot;
end end
end end
VX_priority_arbiter #( VX_priority_arbiter #(
.NUM_REQS (NUM_REQS) .NUM_REQS (NUM_REQS)
) priority_arbiter ( ) priority_arbiter (
.requests (requests_qual), .requests (requests_qual),
.grant_index (grant_index), .grant_index (grant_index),
.grant_onehot (grant_onehot), .grant_onehot (grant_onehot),
.grant_valid (grant_valid) .grant_valid (grant_valid)
); );
end end
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -22,28 +22,28 @@ module VX_fifo_queue #(
parameter OUT_REG = 0, parameter OUT_REG = 0,
parameter LUTRAM = 1, parameter LUTRAM = 1,
parameter SIZEW = `CLOG2(DEPTH+1) parameter SIZEW = `CLOG2(DEPTH+1)
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire push, input wire push,
input wire pop, input wire pop,
input wire [DATAW-1:0] data_in, input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out, output wire [DATAW-1:0] data_out,
output wire empty, output wire empty,
output wire alm_empty, output wire alm_empty,
output wire full, output wire full,
output wire alm_full, output wire alm_full,
output wire [SIZEW-1:0] size output wire [SIZEW-1:0] size
); );
localparam ADDRW = `CLOG2(DEPTH); localparam ADDRW = `CLOG2(DEPTH);
`STATIC_ASSERT(ALM_FULL > 0, ("alm_full must be greater than 0!")) `STATIC_ASSERT(ALM_FULL > 0, ("alm_full must be greater than 0!"))
`STATIC_ASSERT(ALM_FULL < DEPTH, ("alm_full must be smaller than size!")) `STATIC_ASSERT(ALM_FULL < DEPTH, ("alm_full must be smaller than size!"))
`STATIC_ASSERT(ALM_EMPTY > 0, ("alm_empty must be greater than 0!")) `STATIC_ASSERT(ALM_EMPTY > 0, ("alm_empty must be greater than 0!"))
`STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!")) `STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!"))
`STATIC_ASSERT(`IS_POW2(DEPTH), ("size must be a power of 2!")) `STATIC_ASSERT(`IS_POW2(DEPTH), ("size must be a power of 2!"))
if (DEPTH == 1) begin if (DEPTH == 1) begin
reg [DATAW-1:0] head_r; reg [DATAW-1:0] head_r;
@ -52,7 +52,7 @@ module VX_fifo_queue #(
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
head_r <= '0; head_r <= '0;
size_r <= '0; size_r <= '0;
end else begin end else begin
`ASSERT(~push || ~full, ("runtime error: writing to a full queue")); `ASSERT(~push || ~full, ("runtime error: writing to a full queue"));
`ASSERT(~pop || ~empty, ("runtime error: reading an empty queue")); `ASSERT(~pop || ~empty, ("runtime error: reading an empty queue"));
@ -63,11 +63,11 @@ module VX_fifo_queue #(
end else if (pop) begin end else if (pop) begin
size_r <= '0; size_r <= '0;
end end
if (push) begin if (push) begin
head_r <= data_in; head_r <= data_in;
end end
end end
end end
assign data_out = head_r; assign data_out = head_r;
assign empty = (size_r == 0); assign empty = (size_r == 0);
@ -77,7 +77,7 @@ module VX_fifo_queue #(
assign size = size_r; assign size = size_r;
end else begin end else begin
reg empty_r, alm_empty_r; reg empty_r, alm_empty_r;
reg full_r, alm_full_r; reg full_r, alm_full_r;
reg [ADDRW-1:0] used_r; reg [ADDRW-1:0] used_r;
@ -86,8 +86,8 @@ module VX_fifo_queue #(
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
empty_r <= 1; empty_r <= 1;
alm_empty_r <= 1; alm_empty_r <= 1;
full_r <= 0; full_r <= 0;
alm_full_r <= 0; alm_full_r <= 0;
used_r <= '0; used_r <= '0;
end else begin end else begin
@ -106,21 +106,21 @@ module VX_fifo_queue #(
end else if (pop) begin end else if (pop) begin
full_r <= 0; full_r <= 0;
if (used_r == ADDRW'(ALM_FULL)) if (used_r == ADDRW'(ALM_FULL))
alm_full_r <= 0; alm_full_r <= 0;
if (used_r == ADDRW'(1)) if (used_r == ADDRW'(1))
empty_r <= 1; empty_r <= 1;
if (used_r == ADDRW'(ALM_EMPTY+1)) if (used_r == ADDRW'(ALM_EMPTY+1))
alm_empty_r <= 1; alm_empty_r <= 1;
end end
used_r <= used_n; used_r <= used_n;
end end
end end
if (DEPTH == 2) begin if (DEPTH == 2 && LUTRAM == 0) begin
assign used_n = used_r ^ (push ^ pop); assign used_n = used_r ^ (push ^ pop);
if (0 == OUT_REG) begin if (0 == OUT_REG) begin
reg [1:0][DATAW-1:0] shift_reg; reg [1:0][DATAW-1:0] shift_reg;
@ -131,8 +131,8 @@ module VX_fifo_queue #(
end end
end end
assign data_out = shift_reg[!used_r[0]]; assign data_out = shift_reg[!used_r[0]];
end else begin end else begin
reg [DATAW-1:0] data_out_r; reg [DATAW-1:0] data_out_r;
@ -152,16 +152,16 @@ module VX_fifo_queue #(
assign data_out = data_out_r; assign data_out = data_out_r;
end end
end else begin end else begin
assign used_n = $signed(used_r) + ADDRW'($signed(2'(push) - 2'(pop))); assign used_n = $signed(used_r) + ADDRW'($signed(2'(push) - 2'(pop)));
if (0 == OUT_REG) begin if (0 == OUT_REG) begin
reg [ADDRW-1:0] rd_ptr_r; reg [ADDRW-1:0] rd_ptr_r;
reg [ADDRW-1:0] wr_ptr_r; reg [ADDRW-1:0] wr_ptr_r;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
rd_ptr_r <= '0; rd_ptr_r <= '0;
@ -169,7 +169,7 @@ module VX_fifo_queue #(
end else begin end else begin
wr_ptr_r <= wr_ptr_r + ADDRW'(push); wr_ptr_r <= wr_ptr_r + ADDRW'(push);
rd_ptr_r <= rd_ptr_r + ADDRW'(pop); rd_ptr_r <= rd_ptr_r + ADDRW'(pop);
end end
end end
VX_dp_ram #( VX_dp_ram #(
@ -179,8 +179,8 @@ module VX_fifo_queue #(
) dp_ram ( ) dp_ram (
.clk(clk), .clk(clk),
.read (1'b1), .read (1'b1),
.write (push), .write (push),
`UNUSED_PIN (wren), `UNUSED_PIN (wren),
.waddr (wr_ptr_r), .waddr (wr_ptr_r),
.wdata (data_in), .wdata (data_in),
.raddr (rd_ptr_r), .raddr (rd_ptr_r),
@ -196,18 +196,18 @@ module VX_fifo_queue #(
reg [ADDRW-1:0] rd_ptr_n_r; reg [ADDRW-1:0] rd_ptr_n_r;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
wr_ptr_r <= '0; wr_ptr_r <= '0;
rd_ptr_r <= '0; rd_ptr_r <= '0;
rd_ptr_n_r <= 1; rd_ptr_n_r <= 1;
end else begin end else begin
wr_ptr_r <= wr_ptr_r + ADDRW'(push); wr_ptr_r <= wr_ptr_r + ADDRW'(push);
if (pop) begin if (pop) begin
rd_ptr_r <= rd_ptr_n_r; rd_ptr_r <= rd_ptr_n_r;
if (DEPTH > 2) begin if (DEPTH > 2) begin
rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); rd_ptr_n_r <= rd_ptr_r + ADDRW'(2);
end else begin // (DEPTH == 2); end else begin // (DEPTH == 2);
rd_ptr_n_r <= ~rd_ptr_n_r; rd_ptr_n_r <= ~rd_ptr_n_r;
end end
end end
end end
@ -227,13 +227,13 @@ module VX_fifo_queue #(
) dp_ram ( ) dp_ram (
.clk (clk), .clk (clk),
.read (1'b1), .read (1'b1),
.write (push), .write (push),
`UNUSED_PIN (wren), `UNUSED_PIN (wren),
.waddr (wr_ptr_r), .waddr (wr_ptr_r),
.wdata (data_in), .wdata (data_in),
.raddr (rd_ptr_n_r), .raddr (rd_ptr_n_r),
.rdata (dout) .rdata (dout)
); );
always @(posedge clk) begin always @(posedge clk) begin
if (push && (empty_r || (going_empty && pop))) begin if (push && (empty_r || (going_empty && pop))) begin
@ -246,12 +246,12 @@ module VX_fifo_queue #(
assign data_out = dout_r; assign data_out = dout_r;
end end
end end
assign empty = empty_r; assign empty = empty_r;
assign alm_empty = alm_empty_r; assign alm_empty = alm_empty_r;
assign full = full_r; assign full = full_r;
assign alm_full = alm_full_r; assign alm_full = alm_full_r;
assign size = {full_r, used_r}; assign size = {full_r, used_r};
end end
endmodule endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,29 +16,27 @@
`TRACING_OFF `TRACING_OFF
module VX_generic_arbiter #( module VX_generic_arbiter #(
parameter NUM_REQS = 1, parameter NUM_REQS = 1,
parameter LOCK_ENABLE = 0, parameter `STRING TYPE = "P",
parameter `STRING TYPE = "P",
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire [NUM_REQS-1:0] requests, input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index, output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot, output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid, output wire grant_valid,
input wire grant_unlock input wire grant_ready
); );
if (TYPE == "P") begin if (TYPE == "P") begin
`UNUSED_PARAM (LOCK_ENABLE)
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
`UNUSED_VAR (grant_unlock) `UNUSED_VAR (grant_ready)
VX_priority_arbiter #( VX_priority_arbiter #(
.NUM_REQS (NUM_REQS) .NUM_REQS (NUM_REQS)
) priority_arbiter ( ) priority_arbiter (
.requests (requests), .requests (requests),
.grant_valid (grant_valid), .grant_valid (grant_valid),
.grant_index (grant_index), .grant_index (grant_index),
.grant_onehot (grant_onehot) .grant_onehot (grant_onehot)
@ -47,68 +45,64 @@ module VX_generic_arbiter #(
end else if (TYPE == "R") begin end else if (TYPE == "R") begin
VX_rr_arbiter #( VX_rr_arbiter #(
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS)
.LOCK_ENABLE (LOCK_ENABLE)
) rr_arbiter ( ) rr_arbiter (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.requests (requests), .requests (requests),
.grant_valid (grant_valid), .grant_valid (grant_valid),
.grant_index (grant_index), .grant_index (grant_index),
.grant_onehot (grant_onehot), .grant_onehot (grant_onehot),
.grant_unlock (grant_unlock) .grant_ready (grant_ready)
); );
end else if (TYPE == "F") begin end else if (TYPE == "F") begin
VX_fair_arbiter #( VX_fair_arbiter #(
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS)
.LOCK_ENABLE (LOCK_ENABLE)
) fair_arbiter ( ) fair_arbiter (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.requests (requests), .requests (requests),
.grant_valid (grant_valid), .grant_valid (grant_valid),
.grant_index (grant_index), .grant_index (grant_index),
.grant_onehot (grant_onehot), .grant_onehot (grant_onehot),
.grant_unlock (grant_unlock) .grant_ready (grant_ready)
); );
end else if (TYPE == "M") begin end else if (TYPE == "M") begin
VX_matrix_arbiter #( VX_matrix_arbiter #(
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS)
.LOCK_ENABLE (LOCK_ENABLE)
) matrix_arbiter ( ) matrix_arbiter (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.requests (requests), .requests (requests),
.grant_valid (grant_valid), .grant_valid (grant_valid),
.grant_index (grant_index), .grant_index (grant_index),
.grant_onehot (grant_onehot), .grant_onehot (grant_onehot),
.grant_unlock (grant_unlock) .grant_ready (grant_ready)
); );
end else if (TYPE == "C") begin end else if (TYPE == "C") begin
VX_cyclic_arbiter #( VX_cyclic_arbiter #(
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS)
.LOCK_ENABLE (LOCK_ENABLE)
) cyclic_arbiter ( ) cyclic_arbiter (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.requests (requests), .requests (requests),
.grant_valid (grant_valid), .grant_valid (grant_valid),
.grant_index (grant_index), .grant_index (grant_index),
.grant_onehot (grant_onehot), .grant_onehot (grant_onehot),
.grant_unlock (grant_unlock) .grant_ready (grant_ready)
); );
end else begin end else begin
`ERROR(("invalid parameter")); `ERROR(("invalid parameter"));
end end
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,52 +16,51 @@
`TRACING_OFF `TRACING_OFF
module VX_matrix_arbiter #( module VX_matrix_arbiter #(
parameter NUM_REQS = 1, parameter NUM_REQS = 1,
parameter LOCK_ENABLE = 0,
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire [NUM_REQS-1:0] requests, input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index, output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot, output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid, output wire grant_valid,
input wire grant_unlock input wire grant_ready
); );
if (NUM_REQS == 1) begin if (NUM_REQS == 1) begin
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
`UNUSED_VAR (grant_unlock) `UNUSED_VAR (grant_ready)
assign grant_index = '0; assign grant_index = '0;
assign grant_onehot = requests; assign grant_onehot = requests;
assign grant_valid = requests[0]; assign grant_valid = requests[0];
end else begin end else begin
reg [NUM_REQS-1:1] state [NUM_REQS-1:0]; reg [NUM_REQS-1:1] state [NUM_REQS-1:0];
wire [NUM_REQS-1:0] pri [NUM_REQS-1:0]; wire [NUM_REQS-1:0] pri [NUM_REQS-1:0];
wire [NUM_REQS-1:0] grant_unqual; wire [NUM_REQS-1:0] grant_unqual;
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
for (genvar j = 0; j < NUM_REQS; ++j) begin for (genvar j = 0; j < NUM_REQS; ++j) begin
if (j > i) begin if (j > i) begin
assign pri[j][i] = requests[i] && state[i][j]; assign pri[j][i] = requests[i] && state[i][j];
end end
else if (j < i) begin else if (j < i) begin
assign pri[j][i] = requests[i] && !state[j][i]; assign pri[j][i] = requests[i] && !state[j][i];
end end
else begin else begin
assign pri[j][i] = 0; assign pri[j][i] = 0;
end end
end end
assign grant_unqual[i] = requests[i] && !(| pri[i]); assign grant_unqual[i] = requests[i] && !(| pri[i]);
end end
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
for (genvar j = i + 1; j < NUM_REQS; ++j) begin for (genvar j = i + 1; j < NUM_REQS; ++j) begin
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
state[i][j] <= '0; state[i][j] <= '0;
end else begin end else begin
state[i][j] <= (state[i][j] || grant_unqual[j]) && !grant_unqual[i]; state[i][j] <= (state[i][j] || grant_unqual[j]) && !grant_unqual[i];
@ -70,20 +69,15 @@ module VX_matrix_arbiter #(
end end
end end
if (LOCK_ENABLE == 0) begin reg [NUM_REQS-1:0] grant_unqual_prev;
`UNUSED_VAR (grant_unlock) always @(posedge clk) begin
assign grant_onehot = grant_unqual; if (reset) begin
end else begin grant_unqual_prev <= '0;
reg [NUM_REQS-1:0] grant_unqual_prev; end else if (grant_ready) begin
always @(posedge clk) begin grant_unqual_prev <= grant_unqual;
if (reset) begin
grant_unqual_prev <= '0;
end else if (grant_unlock) begin
grant_unqual_prev <= grant_unqual;
end
end end
assign grant_onehot = grant_unlock ? grant_unqual : grant_unqual_prev;
end end
assign grant_onehot = grant_ready ? grant_unqual : grant_unqual_prev;
VX_onehot_encoder #( VX_onehot_encoder #(
.N (NUM_REQS) .N (NUM_REQS)
@ -96,6 +90,6 @@ module VX_matrix_arbiter #(
assign grant_valid = (| requests); assign grant_valid = (| requests);
end end
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -24,13 +24,13 @@ module VX_mem_coalescer #(
parameter TAG_WIDTH = 8, parameter TAG_WIDTH = 8,
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
parameter QUEUE_SIZE = 8, parameter QUEUE_SIZE = 8,
parameter DATA_IN_WIDTH = DATA_IN_SIZE * 8, parameter DATA_IN_WIDTH = DATA_IN_SIZE * 8,
parameter DATA_OUT_WIDTH= DATA_OUT_SIZE * 8, parameter DATA_OUT_WIDTH= DATA_OUT_SIZE * 8,
parameter OUT_REQS = (NUM_REQS * DATA_IN_WIDTH) / DATA_OUT_WIDTH, parameter DATA_RATIO = DATA_OUT_SIZE / DATA_IN_SIZE,
parameter BATCH_SIZE = DATA_OUT_SIZE / DATA_IN_SIZE, parameter DATA_RATIO_W = `LOG2UP(DATA_RATIO),
parameter BATCH_SIZE_W = `LOG2UP(BATCH_SIZE), parameter OUT_REQS = NUM_REQS / DATA_RATIO,
parameter OUT_ADDR_WIDTH= ADDR_WIDTH - BATCH_SIZE_W, parameter OUT_ADDR_WIDTH= ADDR_WIDTH - DATA_RATIO_W,
parameter QUEUE_ADDRW = `CLOG2(QUEUE_SIZE), parameter QUEUE_ADDRW = `CLOG2(QUEUE_SIZE),
parameter OUT_TAG_WIDTH = UUID_WIDTH + QUEUE_ADDRW parameter OUT_TAG_WIDTH = UUID_WIDTH + QUEUE_ADDRW
) ( ) (
@ -45,7 +45,7 @@ module VX_mem_coalescer #(
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr, input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr,
input wire [NUM_REQS-1:0][ATYPE_WIDTH-1:0] in_req_atype, input wire [NUM_REQS-1:0][ATYPE_WIDTH-1:0] in_req_atype,
input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data, input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data,
input wire [TAG_WIDTH-1:0] in_req_tag, input wire [TAG_WIDTH-1:0] in_req_tag,
output wire in_req_ready, output wire in_req_ready,
// Input response // Input response
@ -58,7 +58,7 @@ module VX_mem_coalescer #(
// Output request // Output request
output wire out_req_valid, output wire out_req_valid,
output wire out_req_rw, output wire out_req_rw,
output wire [OUT_REQS-1:0] out_req_mask, output wire [OUT_REQS-1:0] out_req_mask,
output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen, output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen,
output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr, output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr,
output wire [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype, output wire [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype,
@ -78,27 +78,27 @@ module VX_mem_coalescer #(
`STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter")) `STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter"))
`RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("invalid request mask")); `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("invalid request mask"));
`RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("invalid request mask")); `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("invalid request mask"));
localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH; localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH;
localparam NUM_REQS_W = `LOG2UP(NUM_REQS); localparam NUM_REQS_W = `LOG2UP(NUM_REQS);
// tag + mask + offest // tag + mask + offest
localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * BATCH_SIZE_W); localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * DATA_RATIO_W);
localparam STATE_SETUP = 0; localparam STATE_SETUP = 0;
localparam STATE_SEND = 1; localparam STATE_SEND = 1;
logic state_r, state_n;
logic out_req_valid_r, out_req_valid_n;
logic out_req_rw_r, out_req_rw_n;
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
logic [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
logic [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data_r, out_req_data_n;
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
logic in_req_ready_n; reg state_r, state_n;
reg out_req_valid_r, out_req_valid_n;
reg out_req_rw_r, out_req_rw_n;
reg [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
reg [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
reg [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
reg [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
reg in_req_ready_n;
wire ibuf_push; wire ibuf_push;
wire ibuf_pop; wire ibuf_pop;
@ -108,33 +108,45 @@ module VX_mem_coalescer #(
wire ibuf_empty; wire ibuf_empty;
wire [IBUF_DATA_WIDTH-1:0] ibuf_din; wire [IBUF_DATA_WIDTH-1:0] ibuf_din;
wire [IBUF_DATA_WIDTH-1:0] ibuf_dout; wire [IBUF_DATA_WIDTH-1:0] ibuf_dout;
logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n; logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n; logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n; logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n;
logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n;
logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n; logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n;
wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx; wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx;
wire [NUM_REQS-1:0][OUT_ADDR_WIDTH-1:0] in_addr_base; wire [NUM_REQS-1:0][OUT_ADDR_WIDTH-1:0] in_addr_base;
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] in_addr_offset; wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] in_addr_offset;
for (genvar i = 0; i < NUM_REQS; i++) begin for (genvar i = 0; i < NUM_REQS; i++) begin
assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:BATCH_SIZE_W]; assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:DATA_RATIO_W];
assign in_addr_offset[i] = in_req_addr[i][BATCH_SIZE_W-1:0]; assign in_addr_offset[i] = in_req_addr[i][DATA_RATIO_W-1:0];
end end
for (genvar i = 0; i < OUT_REQS; ++i) begin for (genvar i = 0; i < OUT_REQS; ++i) begin
wire [BATCH_SIZE-1:0] batch_mask = in_req_mask[BATCH_SIZE * i +: BATCH_SIZE] & ~processed_mask_r[BATCH_SIZE * i +: BATCH_SIZE]; wire [DATA_RATIO-1:0] batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & ~processed_mask_r[i * DATA_RATIO +: DATA_RATIO];
wire [BATCH_SIZE_W-1:0] batch_idx; wire [DATA_RATIO_W-1:0] batch_idx;
VX_priority_encoder #( VX_priority_encoder #(
.N (BATCH_SIZE) .N (DATA_RATIO)
) priority_encoder ( ) priority_encoder (
.data_in (batch_mask), .data_in (batch_mask),
.index (batch_idx), .index (batch_idx),
`UNUSED_PIN (onehot), `UNUSED_PIN (onehot),
.valid_out (batch_valid_n[i]) .valid_out (batch_valid_n[i])
); );
assign seed_idx[i] = NUM_REQS_W'(BATCH_SIZE * i) + NUM_REQS_W'(batch_idx); assign seed_idx[i] = NUM_REQS_W'(i * DATA_RATIO) + NUM_REQS_W'(batch_idx);
end
for (genvar i = 0; i < OUT_REQS; ++i) begin
assign seed_addr_n[i] = in_addr_base[seed_idx[i]];
assign seed_atype_n[i] = in_req_atype[seed_idx[i]];
end
for (genvar i = 0; i < OUT_REQS; ++i) begin
for (genvar j = 0; j < DATA_RATIO; ++j) begin
assign addr_matches_n[i * DATA_RATIO + j] = (in_addr_base[i * DATA_RATIO + j] == seed_addr_n[i]);
end
end end
always @(posedge clk) begin always @(posedge clk) begin
@ -144,12 +156,13 @@ module VX_mem_coalescer #(
out_req_valid_r <= 0; out_req_valid_r <= 0;
end else begin end else begin
state_r <= state_n; state_r <= state_n;
out_req_valid_r <= out_req_valid_n;
batch_valid_r <= batch_valid_n; batch_valid_r <= batch_valid_n;
seed_addr_r <= seed_addr_n; seed_addr_r <= seed_addr_n;
seed_atype_r <= seed_atype_n; seed_atype_r <= seed_atype_n;
out_req_rw_r <= out_req_rw_n; addr_matches_r <= addr_matches_n;
out_req_mask_r <= out_req_mask_n; out_req_valid_r <= out_req_valid_n;
out_req_mask_r <= out_req_mask_n;
out_req_rw_r <= out_req_rw_n;
out_req_addr_r <= out_req_addr_n; out_req_addr_r <= out_req_addr_n;
out_req_atype_r <= out_req_atype_n; out_req_atype_r <= out_req_atype_n;
out_req_byteen_r <= out_req_byteen_n; out_req_byteen_r <= out_req_byteen_n;
@ -159,84 +172,77 @@ module VX_mem_coalescer #(
end end
end end
logic [NUM_REQS-1:0] addr_matches; wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] req_data_merged;
always @(*) begin always @(*) begin
addr_matches = '0; req_byteen_merged = '0;
req_data_merged = 'x;
for (integer i = 0; i < OUT_REQS; ++i) begin for (integer i = 0; i < OUT_REQS; ++i) begin
for (integer j = 0; j < BATCH_SIZE; j++) begin for (integer j = 0; j < DATA_RATIO; ++j) begin
if (in_addr_base[BATCH_SIZE * i + j] == seed_addr_r[i]) begin if (current_pmask[i * DATA_RATIO + j]) begin
addr_matches[BATCH_SIZE * i + j] = 1; for (integer k = 0; k < DATA_IN_SIZE; ++k) begin
if (in_req_byteen[DATA_RATIO * i + j][k]) begin
req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1;
req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8];
end
end
end end
end end
end end
end end
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches; wire [OUT_REQS * DATA_RATIO - 1:0] pending_mask;
for (genvar i = 0; i < OUT_REQS * DATA_RATIO; ++i) begin
assign pending_mask[i] = in_req_mask[i] && ~addr_matches_r[i] && ~processed_mask_r[i];
end
wire batch_completed = ~(| pending_mask);
always @(*) begin always @(*) begin
state_n = state_r; state_n = state_r;
out_req_valid_n = out_req_valid_r; out_req_valid_n = out_req_valid_r;
seed_addr_n = seed_addr_r; out_req_mask_n = out_req_mask_r;
seed_atype_n = seed_atype_r; out_req_rw_n = out_req_rw_r;
out_req_rw_n = out_req_rw_r;
out_req_mask_n = out_req_mask_r;
out_req_addr_n = out_req_addr_r; out_req_addr_n = out_req_addr_r;
out_req_atype_n = out_req_atype_r; out_req_atype_n = out_req_atype_r;
out_req_byteen_n = out_req_byteen_r; out_req_byteen_n = out_req_byteen_r;
out_req_data_n = out_req_data_r; out_req_data_n = out_req_data_r;
out_req_tag_n = out_req_tag_r; out_req_tag_n = out_req_tag_r;
processed_mask_n = processed_mask_r; processed_mask_n = processed_mask_r;
in_req_ready_n = 0; in_req_ready_n = 0;
case (state_r) case (state_r)
STATE_SETUP: begin STATE_SETUP: begin
// find the next seed address
for (integer i = 0; i < OUT_REQS; ++i) begin
seed_addr_n[i] = in_addr_base[seed_idx[i]];
seed_atype_n[i] = in_req_atype[seed_idx[i]];
end
// wait for pending outgoing request to submit // wait for pending outgoing request to submit
if (out_req_valid && out_req_ready) begin if (out_req_valid && out_req_ready) begin
out_req_valid_n = 0; out_req_valid_n = 0;
end end
if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin
state_n = STATE_SEND; state_n = STATE_SEND;
end end
end end
default/*STATE_SEND*/: begin default/*STATE_SEND*/: begin
out_req_valid_n = 1; out_req_valid_n = 1;
out_req_rw_n = in_req_rw; out_req_mask_n = batch_valid_r;
out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; out_req_rw_n = in_req_rw;
in_req_ready_n = 1; out_req_addr_n = seed_addr_r;
out_req_byteen_n = '0; out_req_atype_n = seed_atype_r;
out_req_data_n = 'x; out_req_byteen_n= req_byteen_merged;
for (integer i = 0; i < OUT_REQS; ++i) begin out_req_data_n = req_data_merged;
for (integer j = 0; j < BATCH_SIZE; j++) begin out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr};
if (in_req_mask[BATCH_SIZE * i + j]) begin
if (addr_matches[BATCH_SIZE * i + j]) begin in_req_ready_n = batch_completed;
for (integer k = 0; k < DATA_IN_SIZE; ++k) begin
if (in_req_byteen[BATCH_SIZE * i + j][k]) begin if (batch_completed) begin
out_req_byteen_n[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_SIZE + k +: 1] = 1'b1;
out_req_data_n[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH + k * 8 +: 8] = in_req_data[BATCH_SIZE * i + j][k * 8 +: 8];
end
end
end else begin
if (!processed_mask_r[BATCH_SIZE * i + j]) begin
in_req_ready_n = 0;
end
end
end
end
out_req_mask_n[i] = batch_valid_r[i];
out_req_addr_n[i] = seed_addr_r[i];
out_req_atype_n[i]= seed_atype_r[i];
end
if (in_req_ready_n) begin
processed_mask_n = '0; processed_mask_n = '0;
end else begin end else begin
processed_mask_n = processed_mask_r | current_pmask; processed_mask_n = processed_mask_r | current_pmask;
end end
state_n = STATE_SETUP; state_n = STATE_SETUP;
end end
endcase endcase
@ -246,13 +252,15 @@ module VX_mem_coalescer #(
wire out_rsp_eop; wire out_rsp_eop;
assign ibuf_push = (state_r == STATE_SEND) && ~in_req_rw; wire req_sent = (state_r == STATE_SEND);
assign ibuf_push = req_sent && ~in_req_rw;
assign ibuf_pop = out_rsp_fire && out_rsp_eop; assign ibuf_pop = out_rsp_fire && out_rsp_eop;
assign ibuf_raddr = out_rsp_tag[QUEUE_ADDRW-1:0]; assign ibuf_raddr = out_rsp_tag[QUEUE_ADDRW-1:0];
wire [TAG_ID_WIDTH-1:0] ibuf_din_tag = in_req_tag[TAG_ID_WIDTH-1:0]; wire [TAG_ID_WIDTH-1:0] ibuf_din_tag = in_req_tag[TAG_ID_WIDTH-1:0];
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_din_offset = in_addr_offset; wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] ibuf_din_offset = in_addr_offset;
wire [NUM_REQS-1:0] ibuf_din_pmask = current_pmask; wire [NUM_REQS-1:0] ibuf_din_pmask = current_pmask;
assign ibuf_din = {ibuf_din_tag, ibuf_din_pmask, ibuf_din_offset}; assign ibuf_din = {ibuf_din_tag, ibuf_din_pmask, ibuf_din_offset};
@ -286,7 +294,7 @@ module VX_mem_coalescer #(
// unmerge responses // unmerge responses
reg [QUEUE_SIZE-1:0][OUT_REQS-1:0] rsp_rem_mask; reg [QUEUE_SIZE-1:0][OUT_REQS-1:0] rsp_rem_mask;
wire [OUT_REQS-1:0] rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~out_rsp_mask; wire [OUT_REQS-1:0] rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~out_rsp_mask;
assign out_rsp_eop = ~(| rsp_rem_mask_n); assign out_rsp_eop = ~(| rsp_rem_mask_n);
@ -299,21 +307,19 @@ module VX_mem_coalescer #(
end end
end end
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_dout_offset; wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] ibuf_dout_offset;
reg [NUM_REQS-1:0] ibuf_dout_pmask; wire [NUM_REQS-1:0] ibuf_dout_pmask;
wire [TAG_ID_WIDTH-1:0] ibuf_dout_tag; wire [TAG_ID_WIDTH-1:0] ibuf_dout_tag;
assign {ibuf_dout_tag, ibuf_dout_pmask, ibuf_dout_offset} = ibuf_dout; assign {ibuf_dout_tag, ibuf_dout_pmask, ibuf_dout_offset} = ibuf_dout;
logic [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n;
logic [NUM_REQS-1:0] in_rsp_mask_n;
always @(*) begin wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n;
for (integer i = 0; i < OUT_REQS; ++i) begin wire [NUM_REQS-1:0] in_rsp_mask_n;
for (integer j = 0; j < BATCH_SIZE; j++) begin
in_rsp_mask_n[BATCH_SIZE * i + j] = out_rsp_mask[i] && ibuf_dout_pmask[BATCH_SIZE * i + j]; for (genvar i = 0; i < OUT_REQS; ++i) begin
in_rsp_data_n[BATCH_SIZE * i + j] = out_rsp_data[i][ibuf_dout_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH]; for (genvar j = 0; j < DATA_RATIO; ++j) begin
end assign in_rsp_mask_n[i * DATA_RATIO + j] = out_rsp_mask[i] && ibuf_dout_pmask[i * DATA_RATIO + j];
assign in_rsp_data_n[i * DATA_RATIO + j] = out_rsp_data[i][ibuf_dout_offset[i * DATA_RATIO + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH];
end end
end end
@ -335,11 +341,11 @@ module VX_mem_coalescer #(
assign out_rsp_uuid = '0; assign out_rsp_uuid = '0;
end end
reg [NUM_REQS-1:0][BATCH_SIZE_W-1:0] out_req_offset; reg [NUM_REQS-1:0][DATA_RATIO_W-1:0] out_req_offset;
reg [NUM_REQS-1:0] out_req_pmask; reg [NUM_REQS-1:0] out_req_pmask;
always @(posedge clk) begin always @(posedge clk) begin
if (ibuf_push) begin if (req_sent) begin
out_req_offset <= ibuf_din_offset; out_req_offset <= ibuf_din_offset;
out_req_pmask <= ibuf_din_pmask; out_req_pmask <= ibuf_din_pmask;
end end
@ -351,30 +357,30 @@ module VX_mem_coalescer #(
if (out_req_fire) begin if (out_req_fire) begin
if (out_req_rw) begin if (out_req_rw) begin
`TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); `TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
`TRACE(1, (", atype=")); `TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS); `TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
`TRACE(1, (", byteen=")); `TRACE(1, (", byteen="));
`TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS); `TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS);
`TRACE(1, (", data=")); `TRACE(1, (", data="));
`TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS); `TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS);
end else begin end else begin
`TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); `TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
`TRACE(1, (", atype=")); `TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS); `TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
end end
`TRACE(1, (", offset=")); `TRACE(1, (", offset="));
`TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS); `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS);
`TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid)); `TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid));
if ($countones(out_req_pmask) > 1) begin if ($countones(out_req_pmask) > 1) begin
`TRACE(1, ("%t: *** %s: coalescing=%b (#%0d)\n", $time, INSTANCE_ID, out_req_pmask, out_req_uuid)); `TRACE(1, ("%t: *** %s: coalesced=%d (#%0d)\n", $time, INSTANCE_ID, $countones(out_req_pmask), out_req_uuid));
end end
end end
if (out_rsp_fire) begin if (out_rsp_fire) begin
`TRACE(1, ("%d: %s-out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)); `TRACE(1, ("%d: %s-out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask));
`TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS); `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS);
`TRACE(1, (", offset=")); `TRACE(1, (", offset="));
`TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS); `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS);
`TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid)); `TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid));
end end

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -23,7 +23,7 @@ module VX_mem_scheduler #(
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE), parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
parameter ATYPE_WIDTH = 1, parameter ATYPE_WIDTH = 1,
parameter TAG_WIDTH = 8, parameter TAG_WIDTH = 8,
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
parameter CORE_QUEUE_SIZE= 8, parameter CORE_QUEUE_SIZE= 8,
parameter MEM_QUEUE_SIZE= CORE_QUEUE_SIZE, parameter MEM_QUEUE_SIZE= CORE_QUEUE_SIZE,
parameter RSP_PARTIAL = 0, parameter RSP_PARTIAL = 0,
@ -54,7 +54,7 @@ module VX_mem_scheduler #(
input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data, input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data,
input wire [TAG_WIDTH-1:0] core_req_tag, input wire [TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready, output wire core_req_ready,
output wire core_req_empty, output wire core_req_empty,
output wire core_req_sent, output wire core_req_sent,
// Core response // Core response
@ -81,7 +81,7 @@ module VX_mem_scheduler #(
input wire mem_rsp_valid, input wire mem_rsp_valid,
input wire [MEM_CHANNELS-1:0] mem_rsp_mask, input wire [MEM_CHANNELS-1:0] mem_rsp_mask,
input wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_rsp_data, input wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_rsp_data,
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag, input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready output wire mem_rsp_ready
); );
localparam BATCH_SEL_WIDTH = `UP(MEM_BATCH_BITS); localparam BATCH_SEL_WIDTH = `UP(MEM_BATCH_BITS);
@ -110,7 +110,7 @@ module VX_mem_scheduler #(
wire reqq_valid; wire reqq_valid;
wire [CORE_REQS-1:0] reqq_mask; wire [CORE_REQS-1:0] reqq_mask;
wire reqq_rw; wire reqq_rw;
wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen; wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen;
wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr; wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr;
wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype; wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype;
@ -118,7 +118,7 @@ module VX_mem_scheduler #(
wire [REQQ_TAG_WIDTH-1:0] reqq_tag; wire [REQQ_TAG_WIDTH-1:0] reqq_tag;
wire reqq_ready; wire reqq_ready;
wire reqq_valid_s; wire reqq_valid_s;
wire [MERGED_REQS-1:0] reqq_mask_s; wire [MERGED_REQS-1:0] reqq_mask_s;
wire reqq_rw_s; wire reqq_rw_s;
wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s; wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s;
@ -139,9 +139,9 @@ module VX_mem_scheduler #(
wire mem_req_ready_s; wire mem_req_ready_s;
wire mem_rsp_valid_s; wire mem_rsp_valid_s;
wire [CORE_REQS-1:0] mem_rsp_mask_s; wire [CORE_CHANNELS-1:0] mem_rsp_mask_s;
wire [CORE_REQS-1:0][WORD_WIDTH-1:0] mem_rsp_data_s; wire [CORE_CHANNELS-1:0][WORD_WIDTH-1:0] mem_rsp_data_s;
wire [REQQ_TAG_WIDTH-1:0] mem_rsp_tag_s; wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
wire mem_rsp_ready_s; wire mem_rsp_ready_s;
wire crsp_valid; wire crsp_valid;
@ -159,7 +159,7 @@ module VX_mem_scheduler #(
wire ibuf_ready = (core_req_rw || ~ibuf_full); wire ibuf_ready = (core_req_rw || ~ibuf_full);
wire reqq_valid_in = core_req_valid && ibuf_ready; wire reqq_valid_in = core_req_valid && ibuf_ready;
wire reqq_ready_in; wire reqq_ready_in;
wire [REQQ_TAG_WIDTH-1:0] reqq_tag_u; wire [REQQ_TAG_WIDTH-1:0] reqq_tag_u;
if (UUID_WIDTH != 0) begin if (UUID_WIDTH != 0) begin
assign reqq_tag_u = {core_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; assign reqq_tag_u = {core_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr};
@ -169,7 +169,7 @@ module VX_mem_scheduler #(
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + ATYPE_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH), .DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + ATYPE_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
.SIZE (CORE_QUEUE_SIZE), .SIZE (CORE_QUEUE_SIZE),
.OUT_REG (1) .OUT_REG (1)
) req_queue ( ) req_queue (
.clk (clk), .clk (clk),
@ -188,7 +188,7 @@ module VX_mem_scheduler #(
// no pending requests // no pending requests
assign core_req_empty = !reqq_valid && ibuf_empty; assign core_req_empty = !reqq_valid && ibuf_empty;
// notify request submisison // notify request submisison
assign core_req_sent = reqq_valid && reqq_ready; assign core_req_sent = reqq_valid && reqq_ready;
// Index buffer /////////////////////////////////////////////////////////// // Index buffer ///////////////////////////////////////////////////////////
@ -219,15 +219,15 @@ module VX_mem_scheduler #(
`UNUSED_VAR (ibuf_empty) `UNUSED_VAR (ibuf_empty)
// Handle memory coalescing /////////////////////////////////////////////// // Handle memory coalescing ///////////////////////////////////////////////
if (COALESCE_ENABLE) begin if (COALESCE_ENABLE) begin
`RESET_RELAY (coalescer_reset, reset); `RESET_RELAY (coalescer_reset, reset);
VX_mem_coalescer #( VX_mem_coalescer #(
.INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)), .INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)),
.NUM_REQS (CORE_REQS), .NUM_REQS (CORE_REQS),
.DATA_IN_SIZE (WORD_SIZE), .DATA_IN_SIZE (WORD_SIZE),
.DATA_OUT_SIZE (LINE_SIZE), .DATA_OUT_SIZE (LINE_SIZE),
.ADDR_WIDTH (ADDR_WIDTH), .ADDR_WIDTH (ADDR_WIDTH),
@ -238,7 +238,7 @@ module VX_mem_scheduler #(
) coalescer ( ) coalescer (
.clk (clk), .clk (clk),
.reset (coalescer_reset), .reset (coalescer_reset),
// Input request // Input request
.in_req_valid (reqq_valid), .in_req_valid (reqq_valid),
.in_req_mask (reqq_mask), .in_req_mask (reqq_mask),
@ -280,7 +280,7 @@ module VX_mem_scheduler #(
assign reqq_valid_s = reqq_valid; assign reqq_valid_s = reqq_valid;
assign reqq_mask_s = reqq_mask; assign reqq_mask_s = reqq_mask;
assign reqq_rw_s = reqq_rw; assign reqq_rw_s = reqq_rw;
assign reqq_byteen_s= reqq_byteen; assign reqq_byteen_s= reqq_byteen;
assign reqq_addr_s = reqq_addr; assign reqq_addr_s = reqq_addr;
assign reqq_atype_s = reqq_atype; assign reqq_atype_s = reqq_atype;
@ -292,18 +292,18 @@ module VX_mem_scheduler #(
assign mem_rsp_mask_s = mem_rsp_mask; assign mem_rsp_mask_s = mem_rsp_mask;
assign mem_rsp_data_s = mem_rsp_data; assign mem_rsp_data_s = mem_rsp_data;
assign mem_rsp_tag_s = mem_rsp_tag; assign mem_rsp_tag_s = mem_rsp_tag;
assign mem_rsp_ready = mem_rsp_ready_s; assign mem_rsp_ready = mem_rsp_ready_s;
end end
// Handle memory requests ///////////////////////////////////////////////// // Handle memory requests /////////////////////////////////////////////////
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b;
wire [BATCH_SEL_WIDTH-1:0] req_batch_idx; wire [BATCH_SEL_WIDTH-1:0] req_batch_idx;
for (genvar i = 0; i < MEM_BATCHES; ++i) begin for (genvar i = 0; i < MEM_BATCHES; ++i) begin
@ -331,14 +331,19 @@ module VX_mem_scheduler #(
assign mem_req_addr_s = mem_req_addr_b[req_batch_idx]; assign mem_req_addr_s = mem_req_addr_b[req_batch_idx];
assign mem_req_atype_s = mem_req_atype_b[req_batch_idx]; assign mem_req_atype_s = mem_req_atype_b[req_batch_idx];
assign mem_req_data_s = mem_req_data_b[req_batch_idx]; assign mem_req_data_s = mem_req_data_b[req_batch_idx];
if (MEM_BATCHES != 1) begin if (MEM_BATCHES != 1) begin
reg [MEM_BATCH_BITS-1:0] req_batch_idx_r; reg [MEM_BATCH_BITS-1:0] req_batch_idx_r;
wire is_degenerate_batch = ~(| mem_req_mask_s);
wire mem_req_valid_b = reqq_valid_s && ~is_degenerate_batch;
wire mem_req_ready_b = mem_req_ready_s || is_degenerate_batch;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
req_batch_idx_r <= '0; req_batch_idx_r <= '0;
end else begin end else begin
if (reqq_valid_s && mem_req_ready_s) begin if (reqq_valid_s && mem_req_ready_b) begin
if (req_sent_all) begin if (req_sent_all) begin
req_batch_idx_r <= '0; req_batch_idx_r <= '0;
end else begin end else begin
@ -352,10 +357,10 @@ module VX_mem_scheduler #(
wire [MEM_BATCHES-1:0][MEM_BATCH_BITS-1:0] req_batch_idxs; wire [MEM_BATCHES-1:0][MEM_BATCH_BITS-1:0] req_batch_idxs;
wire [MEM_BATCH_BITS-1:0] req_batch_idx_last; wire [MEM_BATCH_BITS-1:0] req_batch_idx_last;
for (genvar i = 0; i < MEM_BATCHES; ++i) begin for (genvar i = 0; i < MEM_BATCHES; ++i) begin
assign req_batch_valids[i] = (| mem_req_mask_b[i]); assign req_batch_valids[i] = (| mem_req_mask_b[i]);
assign req_batch_idxs[i] = MEM_BATCH_BITS'(i); assign req_batch_idxs[i] = MEM_BATCH_BITS'(i);
end end
VX_find_first #( VX_find_first #(
.N (MEM_BATCHES), .N (MEM_BATCHES),
@ -368,21 +373,22 @@ module VX_mem_scheduler #(
`UNUSED_PIN (valid_out) `UNUSED_PIN (valid_out)
); );
assign req_batch_idx = req_batch_idx_r; assign mem_req_valid_s = mem_req_valid_b;
assign req_sent_all = mem_req_ready_s && (req_batch_idx_r == req_batch_idx_last); assign req_batch_idx = req_batch_idx_r;
assign req_sent_all = mem_req_ready_b && (req_batch_idx_r == req_batch_idx_last);
assign mem_req_tag_s = {reqq_tag_s, req_batch_idx}; assign mem_req_tag_s = {reqq_tag_s, req_batch_idx};
end else begin end else begin
assign mem_req_valid_s = reqq_valid_s;
assign req_batch_idx = '0; assign req_batch_idx = '0;
assign req_sent_all = mem_req_ready_s; assign req_sent_all = mem_req_ready_s;
assign mem_req_tag_s = reqq_tag_s; assign mem_req_tag_s = reqq_tag_s;
end end
assign mem_req_valid_s = reqq_valid_s;
assign reqq_ready_s = req_sent_all; assign reqq_ready_s = req_sent_all;
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + ATYPE_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH), .DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + ATYPE_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)), .SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
@ -415,7 +421,7 @@ module VX_mem_scheduler #(
localparam j = r % CORE_CHANNELS; localparam j = r % CORE_CHANNELS;
assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j]; assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j];
end end
assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask; assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask;
wire rsp_complete = ~(| rsp_rem_mask_n); wire rsp_complete = ~(| rsp_rem_mask_n);
@ -457,19 +463,19 @@ module VX_mem_scheduler #(
end else begin end else begin
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n; reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n;
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];
always @(*) begin always @(*) begin
rsp_store_n = rsp_store[ibuf_raddr]; rsp_store_n = rsp_store[ibuf_raddr];
for (integer i = 0; i < CORE_CHANNELS; ++i) begin for (integer i = 0; i < CORE_CHANNELS; ++i) begin
if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin
rsp_store_n[(rsp_batch_idx * CORE_CHANNELS + i) * WORD_WIDTH +: WORD_WIDTH] = mem_rsp_data_s[i]; rsp_store_n[(rsp_batch_idx * CORE_CHANNELS + i) * WORD_WIDTH +: WORD_WIDTH] = mem_rsp_data_s[i];
end end
end end
end end
always @(posedge clk) begin always @(posedge clk) begin
if (ibuf_push) begin if (ibuf_push) begin
rsp_orig_mask[ibuf_waddr] <= core_req_mask; rsp_orig_mask[ibuf_waddr] <= core_req_mask;
@ -490,10 +496,11 @@ module VX_mem_scheduler #(
end end
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
end end
if (UUID_WIDTH != 0) begin if (UUID_WIDTH != 0) begin
assign crsp_tag = {mem_rsp_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout}; assign crsp_tag = {mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout};
end else begin end else begin
assign crsp_tag = ibuf_dout; assign crsp_tag = ibuf_dout;
end end
@ -509,11 +516,11 @@ module VX_mem_scheduler #(
) rsp_buf ( ) rsp_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (crsp_valid), .valid_in (crsp_valid),
.ready_in (crsp_ready), .ready_in (crsp_ready),
.data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}), .data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}),
.data_out ({core_rsp_mask, core_rsp_sop, core_rsp_eop, core_rsp_data, core_rsp_tag}), .data_out ({core_rsp_mask, core_rsp_sop, core_rsp_eop, core_rsp_data, core_rsp_tag}),
.valid_out (core_rsp_valid), .valid_out (core_rsp_valid),
.ready_out (core_rsp_ready) .ready_out (core_rsp_ready)
); );
@ -541,14 +548,14 @@ module VX_mem_scheduler #(
end end
end end
if (ibuf_push) begin if (ibuf_push) begin
pending_reqs_time[ibuf_waddr] <= {req_dbg_uuid, ibuf_din, $time}; pending_reqs_time[ibuf_waddr] <= {req_dbg_uuid, ibuf_din, $time};
end end
for (integer i = 0; i < CORE_QUEUE_SIZE; ++i) begin for (integer i = 0; i < CORE_QUEUE_SIZE; ++i) begin
if (pending_reqs_valid[i]) begin if (pending_reqs_valid[i]) begin
`ASSERT(($time - pending_reqs_time[i][63:0]) < STALL_TIMEOUT, `ASSERT(($time - pending_reqs_time[i][63:0]) < STALL_TIMEOUT,
("%t: *** %s response timeout: tag=0x%0h (#%0d)", ("%t: *** %s response timeout: tag=0x%0h (#%0d)",
$time, INSTANCE_ID, pending_reqs_time[i][64 +: TAG_ID_WIDTH], pending_reqs_time[i][64+TAG_ID_WIDTH +: `UP(UUID_WIDTH)])); $time, INSTANCE_ID, pending_reqs_time[i][64 +: TAG_ID_WIDTH], pending_reqs_time[i][64+TAG_ID_WIDTH +: `UP(UUID_WIDTH)]));
end end
end end
@ -563,8 +570,8 @@ module VX_mem_scheduler #(
wire [`UP(UUID_WIDTH)-1:0] rsp_dbg_uuid; wire [`UP(UUID_WIDTH)-1:0] rsp_dbg_uuid;
if (UUID_WIDTH != 0) begin if (UUID_WIDTH != 0) begin
assign mem_req_dbg_uuid = mem_req_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH]; assign mem_req_dbg_uuid = mem_req_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
assign mem_rsp_dbg_uuid = mem_rsp_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH]; assign mem_rsp_dbg_uuid = mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
assign rsp_dbg_uuid = core_rsp_tag[TAG_WIDTH-1 -: UUID_WIDTH]; assign rsp_dbg_uuid = core_rsp_tag[TAG_WIDTH-1 -: UUID_WIDTH];
end else begin end else begin
assign mem_req_dbg_uuid = '0; assign mem_req_dbg_uuid = '0;
@ -572,25 +579,27 @@ module VX_mem_scheduler #(
assign rsp_dbg_uuid = '0; assign rsp_dbg_uuid = '0;
end end
wire [CORE_QUEUE_ADDRW-1:0] ibuf_waddr_s = mem_req_tag_s[MEM_BATCH_BITS +: CORE_QUEUE_ADDRW];
wire mem_req_fire_s = mem_req_valid_s && mem_req_ready_s; wire mem_req_fire_s = mem_req_valid_s && mem_req_ready_s;
always @(posedge clk) begin always @(posedge clk) begin
if (core_req_fire) begin if (core_req_fire) begin
if (core_req_rw) begin if (core_req_rw) begin
`TRACE(1, ("%d: %s-core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); `TRACE(1, ("%d: %s-core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask));
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
`TRACE(1, (", byteen=")); `TRACE(1, (", byteen="));
`TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS); `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS);
`TRACE(1, (", data=")); `TRACE(1, (", data="));
`TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS); `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS);
end else begin end else begin
`TRACE(1, ("%d: %s-core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); `TRACE(1, ("%d: %s-core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask));
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
end end
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)); `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid));
end end
if (core_rsp_valid && core_rsp_ready) begin if (core_rsp_valid && core_rsp_ready) begin
`TRACE(1, ("%d: %s-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)); `TRACE(1, ("%d: %s-core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop));
`TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS); `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS);
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)); `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid));
end end
@ -601,20 +610,20 @@ module VX_mem_scheduler #(
`TRACE(1, (", byteen=")); `TRACE(1, (", byteen="));
`TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS); `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS);
`TRACE(1, (", data=")); `TRACE(1, (", data="));
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS); `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS);
end else begin end else begin
`TRACE(1, ("%d: %s-mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); `TRACE(1, ("%d: %s-mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s));
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS); `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS);
end end
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr, req_batch_idx, mem_req_dbg_uuid)); `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid));
end end
if (mem_rsp_fire_s) begin if (mem_rsp_fire_s) begin
`TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)); `TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s));
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS); `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS);
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)); `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid));
end end
end end
`endif `endif
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -19,131 +19,36 @@ module VX_onehot_mux #(
parameter N = 1, parameter N = 1,
parameter MODEL = 1 parameter MODEL = 1
) ( ) (
input wire [N-1:0][DATAW-1:0] data_in, input wire [N-1:0][DATAW-1:0] data_in,
input wire [N-1:0] sel_in, input wire [N-1:0] sel_in,
output wire [DATAW-1:0] data_out output wire [DATAW-1:0] data_out
); );
if (N == 1) begin if (N == 1) begin
`UNUSED_VAR (sel_in) `UNUSED_VAR (sel_in)
assign data_out = data_in; assign data_out = data_in;
end else if (N == 2) begin end else if (MODEL == 1) begin
`UNUSED_VAR (sel_in) wire [N-1:0][DATAW-1:0] mask;
assign data_out = sel_in[0] ? data_in[0] : data_in[1]; for (genvar i = 0; i < N; ++i) begin
end else if (N == 3) begin assign mask[i] = {DATAW{sel_in[i]}} & data_in[i];
end
for (genvar i = 0; i < DATAW; ++i) begin
wire [N-1:0] gather;
for (genvar j = 0; j < N; ++j) begin
assign gather[j] = mask[j][i];
end
assign data_out[i] = (| gather);
end
end else if (MODEL == 2) begin
reg [DATAW-1:0] data_out_r; reg [DATAW-1:0] data_out_r;
always @(*) begin always @(*) begin
case (sel_in) data_out_r = 'x;
3'b001: data_out_r = data_in[0]; for (integer i = 0; i < N; ++i) begin
3'b010: data_out_r = data_in[1]; if (sel_in[i]) begin
3'b100: data_out_r = data_in[2]; data_out_r = data_in[i];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 4) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
4'b0001: data_out_r = data_in[0];
4'b0010: data_out_r = data_in[1];
4'b0100: data_out_r = data_in[2];
4'b1000: data_out_r = data_in[3];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 5) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
5'b00001: data_out_r = data_in[0];
5'b00010: data_out_r = data_in[1];
5'b00100: data_out_r = data_in[2];
5'b01000: data_out_r = data_in[3];
5'b10000: data_out_r = data_in[4];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 6) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
6'b000001: data_out_r = data_in[0];
6'b000010: data_out_r = data_in[1];
6'b000100: data_out_r = data_in[2];
6'b001000: data_out_r = data_in[3];
6'b010000: data_out_r = data_in[4];
6'b100000: data_out_r = data_in[5];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 7) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
7'b0000001: data_out_r = data_in[0];
7'b0000010: data_out_r = data_in[1];
7'b0000100: data_out_r = data_in[2];
7'b0001000: data_out_r = data_in[3];
7'b0010000: data_out_r = data_in[4];
7'b0100000: data_out_r = data_in[5];
7'b1000000: data_out_r = data_in[6];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 8) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
8'b00000001: data_out_r = data_in[0];
8'b00000010: data_out_r = data_in[1];
8'b00000100: data_out_r = data_in[2];
8'b00001000: data_out_r = data_in[3];
8'b00010000: data_out_r = data_in[4];
8'b00100000: data_out_r = data_in[5];
8'b01000000: data_out_r = data_in[6];
8'b10000000: data_out_r = data_in[7];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else begin
if (MODEL == 1) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
data_out_r = 'x;
for (integer i = 0; i < N; ++i) begin
if (sel_in[i]) begin
data_out_r = data_in[i];
end
end end
end end
assign data_out = data_out_r;
end else if (MODEL == 2) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
data_out_r = '0;
for (integer i = 0; i < N; ++i) begin
data_out_r |= {DATAW{sel_in[i]}} & data_in[i];
end
end
assign data_out = data_out_r;
end else if (MODEL == 3) begin
wire [N-1:0][DATAW-1:0] mask;
for (genvar i = 0; i < N; ++i) begin
assign mask[i] = {DATAW{sel_in[i]}} & data_in[i];
end
for (genvar i = 0; i < DATAW; ++i) begin
wire [N-1:0] gather;
for (genvar j = 0; j < N; ++j) begin
assign gather[j] = mask[j][i];
end
assign data_out[i] = (| gather);
end
end end
assign data_out = data_out_r;
end end
endmodule endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,8 +15,8 @@
`TRACING_OFF `TRACING_OFF
module VX_pe_serializer #( module VX_pe_serializer #(
parameter NUM_LANES = 1, parameter NUM_LANES = 1,
parameter NUM_PES = 1, parameter NUM_PES = 1,
parameter LATENCY = 1, parameter LATENCY = 1,
parameter DATA_IN_WIDTH = 1, parameter DATA_IN_WIDTH = 1,
parameter DATA_OUT_WIDTH = 1, parameter DATA_OUT_WIDTH = 1,
@ -28,12 +28,12 @@ module VX_pe_serializer #(
// input // input
input wire valid_in, input wire valid_in,
input wire [NUM_LANES-1:0][DATA_IN_WIDTH-1:0] data_in, input wire [NUM_LANES-1:0][DATA_IN_WIDTH-1:0] data_in,
input wire [TAG_WIDTH-1:0] tag_in, input wire [TAG_WIDTH-1:0] tag_in,
output wire ready_in, output wire ready_in,
// PE // PE
output wire pe_enable, output wire pe_enable,
output wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in, output wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in,
input wire [NUM_PES-1:0][DATA_OUT_WIDTH-1:0] pe_data_out, input wire [NUM_PES-1:0][DATA_OUT_WIDTH-1:0] pe_data_out,
@ -43,6 +43,7 @@ module VX_pe_serializer #(
output wire [TAG_WIDTH-1:0] tag_out, output wire [TAG_WIDTH-1:0] tag_out,
input wire ready_out input wire ready_out
); );
wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s;
wire valid_out_s; wire valid_out_s;
wire [TAG_WIDTH-1:0] tag_out_s; wire [TAG_WIDTH-1:0] tag_out_s;
wire enable; wire enable;
@ -59,6 +60,17 @@ module VX_pe_serializer #(
.data_out ({valid_out_s, tag_out_s}) .data_out ({valid_out_s, tag_out_s})
); );
VX_pipe_register #(
.DATAW (NUM_PES * DATA_IN_WIDTH),
.DEPTH (PE_REG)
) pe_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in (pe_data_in_s),
.data_out (pe_data_in)
);
if (NUM_LANES != NUM_PES) begin if (NUM_LANES != NUM_PES) begin
localparam BATCH_SIZE = NUM_LANES / NUM_PES; localparam BATCH_SIZE = NUM_LANES / NUM_PES;
@ -67,6 +79,10 @@ module VX_pe_serializer #(
reg [BATCH_SIZEW-1:0] batch_in_idx; reg [BATCH_SIZEW-1:0] batch_in_idx;
reg [BATCH_SIZEW-1:0] batch_out_idx; reg [BATCH_SIZEW-1:0] batch_out_idx;
for (genvar i = 0; i < NUM_PES; ++i) begin
assign pe_data_in_s[i] = data_in[batch_in_idx * NUM_PES + i];
end
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
batch_in_idx <= '0; batch_in_idx <= '0;
@ -81,45 +97,29 @@ module VX_pe_serializer #(
end end
end end
wire batch_in_done = (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-1)); wire batch_in_done = (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-1));
wire batch_out_done = (batch_out_idx == BATCH_SIZEW'(BATCH_SIZE-1)); wire batch_out_done = (batch_out_idx == BATCH_SIZEW'(BATCH_SIZE-1));
wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s;
for (genvar i = 0; i < NUM_PES; ++i) begin
assign pe_data_in_s[i] = data_in[batch_in_idx * NUM_PES + i];
end
VX_pipe_register #(
.DATAW (NUM_PES * DATA_IN_WIDTH),
.DEPTH (PE_REG)
) pe_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in (pe_data_in_s),
.data_out (pe_data_in)
);
reg valid_out_r; reg valid_out_r;
reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r; reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r;
reg [TAG_WIDTH-1:0] tag_out_r; reg [TAG_WIDTH-1:0] tag_out_r;
wire valid_out_b = valid_out_s && batch_out_done; wire valid_out_b = valid_out_s && batch_out_done;
wire enable_r = ready_out || ~valid_out; wire ready_out_b = ready_out || ~valid_out;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
valid_out_r <= 1'b0; valid_out_r <= 1'b0;
end else if (enable_r) begin end else if (ready_out_b) begin
valid_out_r <= valid_out_b; valid_out_r <= valid_out_b;
end end
if (enable_r) begin if (ready_out_b) begin
data_out_r[batch_out_idx] <= pe_data_out; data_out_r[batch_out_idx] <= pe_data_out;
tag_out_r <= tag_out_s; tag_out_r <= tag_out_s;
end end
end end
assign enable = (enable_r || ~valid_out_b); assign enable = ready_out_b || ~valid_out_b;
assign ready_in = enable && batch_in_done; assign ready_in = enable && batch_in_done;
assign pe_enable = enable; assign pe_enable = enable;
@ -130,16 +130,17 @@ module VX_pe_serializer #(
end else begin end else begin
assign pe_data_in_s = data_in;
assign enable = ready_out || ~valid_out; assign enable = ready_out || ~valid_out;
assign ready_in = enable; assign ready_in = enable;
assign pe_enable = enable; assign pe_enable = enable;
assign pe_data_in= data_in;
assign valid_out = valid_out_s; assign valid_out = valid_out_s;
assign data_out = pe_data_out; assign data_out = pe_data_out;
assign tag_out = tag_out_s; assign tag_out = tag_out_s;
end end
endmodule endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -13,44 +13,53 @@
`include "VX_platform.vh" `include "VX_platform.vh"
`TRACING_OFF //`TRACING_OFF
module VX_pending_size #( module VX_pending_size #(
parameter SIZE = 1, parameter SIZE = 1,
parameter INCRW = 1, parameter INCRW = 1,
parameter DECRW = 1, parameter DECRW = 1,
parameter SIZEW = `CLOG2(SIZE+1) parameter ALM_FULL = (SIZE - 1),
parameter ALM_EMPTY = 1,
parameter SIZEW = `CLOG2(SIZE+1)
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire [INCRW-1:0] incr, input wire [INCRW-1:0] incr,
input wire [DECRW-1:0] decr, input wire [DECRW-1:0] decr,
output wire empty, output wire empty,
output wire alm_empty,
output wire full, output wire full,
output wire alm_full,
output wire [SIZEW-1:0] size output wire [SIZEW-1:0] size
); );
`STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter")) `STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter: %d vs %d", INCRW, SIZEW))
`STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter")) `STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter: %d vs %d", DECRW, SIZEW))
localparam ADDRW = `LOG2UP(SIZE); localparam ADDRW = `LOG2UP(SIZE);
reg empty_r; reg empty_r, alm_empty_r;
reg full_r; reg full_r, alm_full_r;
if (INCRW != 1 || DECRW != 1) begin if (INCRW != 1 || DECRW != 1) begin
reg [SIZEW-1:0] size_r; reg [SIZEW-1:0] size_r;
wire [SIZEW-1:0] size_n;
assign size_n = size_r + SIZEW'(incr) - SIZEW'(decr); wire [SIZEW-1:0] size_n = size_r + SIZEW'(incr) - SIZEW'(decr);
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
size_r <= '0; empty_r <= 1;
empty_r <= 1; alm_empty_r <= 1;
full_r <= 0; alm_full_r <= 0;
full_r <= 0;
size_r <= '0;
end else begin end else begin
size_r <= size_n; `ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow"));
empty_r <= (size_n == SIZEW'(0)); `ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow"));
full_r <= (size_n == SIZEW'(SIZE)); size_r <= size_n;
empty_r <= (size_n == SIZEW'(0));
alm_empty_r <= (size_n == SIZEW'(ALM_EMPTY));
full_r <= (size_n == SIZEW'(SIZE));
alm_full_r <= (size_n == SIZEW'(ALM_FULL));
end end
end end
@ -59,30 +68,47 @@ module VX_pending_size #(
end else begin end else begin
reg [ADDRW-1:0] used_r; reg [ADDRW-1:0] used_r;
wire [ADDRW-1:0] used_n;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
used_r <= '0; empty_r <= 1;
empty_r <= 1; alm_empty_r <= 1;
full_r <= 0; full_r <= 0;
end else begin alm_full_r <= 0;
`ASSERT(~(incr && ~decr) || ~full, ("runtime error: incrementing full counter")); used_r <= '0;
`ASSERT(~(decr && ~incr) || ~empty, ("runtime error: decrementing empty counter")); end else begin
`ASSERT(~(incr && ~decr) || ~full, ("runtime error: counter overflow"));
`ASSERT(~(decr && ~incr) || ~empty, ("runtime error: counter underflow"));
if (incr) begin if (incr) begin
if (~decr) begin if (~decr) begin
empty_r <= 0; empty_r <= 0;
if (used_r == ADDRW'(ALM_EMPTY))
alm_empty_r <= 0;
if (used_r == ADDRW'(SIZE-1)) if (used_r == ADDRW'(SIZE-1))
full_r <= 1; full_r <= 1;
if (used_r == ADDRW'(ALM_FULL-1))
alm_full_r <= 1;
end end
end else if (decr) begin end else if (decr) begin
full_r <= 0;
if (used_r == ADDRW'(1)) if (used_r == ADDRW'(1))
empty_r <= 1; empty_r <= 1;
if (used_r == ADDRW'(ALM_EMPTY+1))
alm_empty_r <= 1;
full_r <= 0;
if (used_r == ADDRW'(ALM_FULL))
alm_full_r <= 0;
end end
used_r <= $signed(used_r) + ADDRW'($signed(2'(incr) - 2'(decr))); used_r <= used_n;
end end
end end
if (SIZE == 2) begin
assign used_n = used_r ^ (incr ^ decr);
end else begin
assign used_n = $signed(used_r) + ADDRW'($signed(2'(incr) - 2'(decr)));
end
if (SIZE > 1) begin if (SIZE > 1) begin
if (SIZEW > ADDRW) begin if (SIZEW > ADDRW) begin
assign size = {full_r, used_r}; assign size = {full_r, used_r};
@ -95,8 +121,10 @@ module VX_pending_size #(
end end
assign empty = empty_r; assign empty = empty_r;
assign full = full_r; assign alm_empty = alm_empty_r;
assign alm_full = alm_full_r;
assign full = full_r;
endmodule endmodule
`TRACING_ON //`TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -23,8 +23,8 @@ module VX_priority_arbiter #(
output wire [NUM_REQS-1:0] grant_onehot, output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid output wire grant_valid
); );
if (NUM_REQS == 1) begin if (NUM_REQS == 1) begin
assign grant_index = '0; assign grant_index = '0;
assign grant_onehot = requests; assign grant_onehot = requests;
assign grant_valid = requests[0]; assign grant_valid = requests[0];
@ -41,6 +41,6 @@ module VX_priority_arbiter #(
); );
end end
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,24 +16,23 @@
`TRACING_OFF `TRACING_OFF
module VX_rr_arbiter #( module VX_rr_arbiter #(
parameter NUM_REQS = 1, parameter NUM_REQS = 1,
parameter LOCK_ENABLE = 0,
parameter MODEL = 1, parameter MODEL = 1,
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire [NUM_REQS-1:0] requests, input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index, output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot, output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid, output wire grant_valid,
input wire grant_unlock input wire grant_ready
); );
if (NUM_REQS == 1) begin if (NUM_REQS == 1) begin
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
`UNUSED_VAR (grant_unlock) `UNUSED_VAR (grant_ready)
assign grant_index = '0; assign grant_index = '0;
assign grant_onehot = requests; assign grant_onehot = requests;
assign grant_valid = requests[0]; assign grant_valid = requests[0];
@ -41,7 +40,7 @@ module VX_rr_arbiter #(
end else if (NUM_REQS == 2) begin end else if (NUM_REQS == 2) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state; reg [LOG_NUM_REQS-1:0] state;
always @(*) begin always @(*) begin
@ -52,279 +51,279 @@ module VX_rr_arbiter #(
endcase endcase
end end
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
state <= '0; state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin end else if (grant_ready) begin
state <= grant_index_r; state <= grant_index_r;
end end
end end
assign grant_index = grant_index_r; assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end /*else if (NUM_REQS == 3) begin end /*else if (NUM_REQS == 3) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state; reg [LOG_NUM_REQS-1:0] state;
always @(*) begin always @(*) begin
casez ({state, requests}) casez ({state, requests})
5'b00_001, 5'b00_001,
5'b01_0?1, 5'b01_0?1,
5'b10_??1: begin grant_onehot_r = 3'b001; grant_index_r = LOG_NUM_REQS'(0); end 5'b10_??1: begin grant_onehot_r = 3'b001; grant_index_r = LOG_NUM_REQS'(0); end
5'b00_?1?, 5'b00_?1?,
5'b01_010, 5'b01_010,
5'b10_?10: begin grant_onehot_r = 3'b010; grant_index_r = LOG_NUM_REQS'(1); end 5'b10_?10: begin grant_onehot_r = 3'b010; grant_index_r = LOG_NUM_REQS'(1); end
default: begin grant_onehot_r = 3'b100; grant_index_r = LOG_NUM_REQS'(2); end default: begin grant_onehot_r = 3'b100; grant_index_r = LOG_NUM_REQS'(2); end
endcase endcase
end end
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
state <= '0; state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin end else if (grant_ready) begin
state <= grant_index_r; state <= grant_index_r;
end end
end end
assign grant_index = grant_index_r; assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end */else if (NUM_REQS == 4) begin end */else if (NUM_REQS == 4) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state; reg [LOG_NUM_REQS-1:0] state;
always @(*) begin always @(*) begin
casez ({state, requests}) casez ({state, requests})
6'b00_0001, 6'b00_0001,
6'b01_00?1, 6'b01_00?1,
6'b10_0??1, 6'b10_0??1,
6'b11_???1: begin grant_onehot_r = 4'b0001; grant_index_r = LOG_NUM_REQS'(0); end 6'b11_???1: begin grant_onehot_r = 4'b0001; grant_index_r = LOG_NUM_REQS'(0); end
6'b00_??1?, 6'b00_??1?,
6'b01_0010, 6'b01_0010,
6'b10_0?10, 6'b10_0?10,
6'b11_??10: begin grant_onehot_r = 4'b0010; grant_index_r = LOG_NUM_REQS'(1); end 6'b11_??10: begin grant_onehot_r = 4'b0010; grant_index_r = LOG_NUM_REQS'(1); end
6'b00_?10?, 6'b00_?10?,
6'b01_?1??, 6'b01_?1??,
6'b10_0100, 6'b10_0100,
6'b11_?100: begin grant_onehot_r = 4'b0100; grant_index_r = LOG_NUM_REQS'(2); end 6'b11_?100: begin grant_onehot_r = 4'b0100; grant_index_r = LOG_NUM_REQS'(2); end
default: begin grant_onehot_r = 4'b1000; grant_index_r = LOG_NUM_REQS'(3); end default: begin grant_onehot_r = 4'b1000; grant_index_r = LOG_NUM_REQS'(3); end
endcase endcase
end end
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
state <= '0; state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin end else if (grant_ready) begin
state <= grant_index_r; state <= grant_index_r;
end end
end end
assign grant_index = grant_index_r; assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end /*else if (NUM_REQS == 5) begin end /*else if (NUM_REQS == 5) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state; reg [LOG_NUM_REQS-1:0] state;
always @(*) begin always @(*) begin
casez ({state, requests}) casez ({state, requests})
8'b000_00001, 8'b000_00001,
8'b001_000?1, 8'b001_000?1,
8'b010_00??1, 8'b010_00??1,
8'b011_0???1, 8'b011_0???1,
8'b100_????1: begin grant_onehot_r = 5'b00001; grant_index_r = LOG_NUM_REQS'(0); end 8'b100_????1: begin grant_onehot_r = 5'b00001; grant_index_r = LOG_NUM_REQS'(0); end
8'b000_???1?, 8'b000_???1?,
8'b001_00010, 8'b001_00010,
8'b010_00?10, 8'b010_00?10,
8'b011_0??10, 8'b011_0??10,
8'b100_???10: begin grant_onehot_r = 5'b00010; grant_index_r = LOG_NUM_REQS'(1); end 8'b100_???10: begin grant_onehot_r = 5'b00010; grant_index_r = LOG_NUM_REQS'(1); end
8'b000_??10?, 8'b000_??10?,
8'b001_??1??, 8'b001_??1??,
8'b010_00100, 8'b010_00100,
8'b011_0?100, 8'b011_0?100,
8'b100_??100: begin grant_onehot_r = 5'b00100; grant_index_r = LOG_NUM_REQS'(2); end 8'b100_??100: begin grant_onehot_r = 5'b00100; grant_index_r = LOG_NUM_REQS'(2); end
8'b000_?100?, 8'b000_?100?,
8'b001_?10??, 8'b001_?10??,
8'b010_?1???, 8'b010_?1???,
8'b011_01000, 8'b011_01000,
8'b100_?1000: begin grant_onehot_r = 5'b01000; grant_index_r = LOG_NUM_REQS'(3); end 8'b100_?1000: begin grant_onehot_r = 5'b01000; grant_index_r = LOG_NUM_REQS'(3); end
default: begin grant_onehot_r = 5'b10000; grant_index_r = LOG_NUM_REQS'(4); end default: begin grant_onehot_r = 5'b10000; grant_index_r = LOG_NUM_REQS'(4); end
endcase endcase
end end
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
state <= '0; state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin end else if (grant_ready) begin
state <= grant_index_r; state <= grant_index_r;
end end
end end
assign grant_index = grant_index_r; assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end else if (NUM_REQS == 6) begin end else if (NUM_REQS == 6) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state; reg [LOG_NUM_REQS-1:0] state;
always @(*) begin always @(*) begin
casez ({state, requests}) casez ({state, requests})
9'b000_000001, 9'b000_000001,
9'b001_0000?1, 9'b001_0000?1,
9'b010_000??1, 9'b010_000??1,
9'b011_00???1, 9'b011_00???1,
9'b100_0????1, 9'b100_0????1,
9'b101_?????1: begin grant_onehot_r = 6'b000001; grant_index_r = LOG_NUM_REQS'(0); end 9'b101_?????1: begin grant_onehot_r = 6'b000001; grant_index_r = LOG_NUM_REQS'(0); end
9'b000_????1?, 9'b000_????1?,
9'b001_000010, 9'b001_000010,
9'b010_000?10, 9'b010_000?10,
9'b011_00??10, 9'b011_00??10,
9'b100_0???10, 9'b100_0???10,
9'b101_????10: begin grant_onehot_r = 6'b000010; grant_index_r = LOG_NUM_REQS'(1); end 9'b101_????10: begin grant_onehot_r = 6'b000010; grant_index_r = LOG_NUM_REQS'(1); end
9'b000_???10?, 9'b000_???10?,
9'b001_???1??, 9'b001_???1??,
9'b010_000100, 9'b010_000100,
9'b011_00?100, 9'b011_00?100,
9'b100_0??100, 9'b100_0??100,
9'b101_???100: begin grant_onehot_r = 6'b000100; grant_index_r = LOG_NUM_REQS'(2); end 9'b101_???100: begin grant_onehot_r = 6'b000100; grant_index_r = LOG_NUM_REQS'(2); end
9'b000_??100?, 9'b000_??100?,
9'b001_??10??, 9'b001_??10??,
9'b010_??1???, 9'b010_??1???,
9'b011_001000, 9'b011_001000,
9'b100_0?1000, 9'b100_0?1000,
9'b101_??1000: begin grant_onehot_r = 6'b001000; grant_index_r = LOG_NUM_REQS'(3); end 9'b101_??1000: begin grant_onehot_r = 6'b001000; grant_index_r = LOG_NUM_REQS'(3); end
9'b000_?1000?, 9'b000_?1000?,
9'b001_?100??, 9'b001_?100??,
9'b010_?10???, 9'b010_?10???,
9'b011_?1????, 9'b011_?1????,
9'b100_010000, 9'b100_010000,
9'b101_?10000: begin grant_onehot_r = 6'b010000; grant_index_r = LOG_NUM_REQS'(4); end 9'b101_?10000: begin grant_onehot_r = 6'b010000; grant_index_r = LOG_NUM_REQS'(4); end
default: begin grant_onehot_r = 6'b100000; grant_index_r = LOG_NUM_REQS'(5); end default: begin grant_onehot_r = 6'b100000; grant_index_r = LOG_NUM_REQS'(5); end
endcase endcase
end end
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
state <= '0; state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin end else if (grant_ready) begin
state <= grant_index_r; state <= grant_index_r;
end end
end end
assign grant_index = grant_index_r; assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end else if (NUM_REQS == 7) begin end else if (NUM_REQS == 7) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state; reg [LOG_NUM_REQS-1:0] state;
always @(*) begin always @(*) begin
casez ({state, requests}) casez ({state, requests})
10'b000_000001, 10'b000_000001,
10'b001_0000?1, 10'b001_0000?1,
10'b010_000??1, 10'b010_000??1,
10'b011_00???1, 10'b011_00???1,
10'b100_00???1, 10'b100_00???1,
10'b101_0????1, 10'b101_0????1,
10'b110_?????1: begin grant_onehot_r = 7'b0000001; grant_index_r = LOG_NUM_REQS'(0); end 10'b110_?????1: begin grant_onehot_r = 7'b0000001; grant_index_r = LOG_NUM_REQS'(0); end
10'b000_?????1?, 10'b000_?????1?,
10'b001_0000010, 10'b001_0000010,
10'b010_0000?10, 10'b010_0000?10,
10'b011_000??10, 10'b011_000??10,
10'b100_00???10, 10'b100_00???10,
10'b101_0????10, 10'b101_0????10,
10'b110_?????10: begin grant_onehot_r = 7'b0000010; grant_index_r = LOG_NUM_REQS'(1); end 10'b110_?????10: begin grant_onehot_r = 7'b0000010; grant_index_r = LOG_NUM_REQS'(1); end
10'b000_????10?, 10'b000_????10?,
10'b001_????1??, 10'b001_????1??,
10'b010_0000100, 10'b010_0000100,
10'b011_000?100, 10'b011_000?100,
10'b100_00??100, 10'b100_00??100,
10'b101_0???100, 10'b101_0???100,
10'b110_????100: begin grant_onehot_r = 7'b0000100; grant_index_r = LOG_NUM_REQS'(2); end 10'b110_????100: begin grant_onehot_r = 7'b0000100; grant_index_r = LOG_NUM_REQS'(2); end
10'b000_???100?, 10'b000_???100?,
10'b001_???10??, 10'b001_???10??,
10'b010_???1???, 10'b010_???1???,
10'b011_0001000, 10'b011_0001000,
10'b100_00?1000, 10'b100_00?1000,
10'b101_0??1000, 10'b101_0??1000,
10'b110_???1000: begin grant_onehot_r = 7'b0001000; grant_index_r = LOG_NUM_REQS'(3); end 10'b110_???1000: begin grant_onehot_r = 7'b0001000; grant_index_r = LOG_NUM_REQS'(3); end
10'b000_??1000?, 10'b000_??1000?,
10'b001_??100??, 10'b001_??100??,
10'b010_??10???, 10'b010_??10???,
10'b011_??1????, 10'b011_??1????,
10'b100_0010000, 10'b100_0010000,
10'b101_0?10000, 10'b101_0?10000,
10'b110_??10000: begin grant_onehot_r = 7'b0010000; grant_index_r = LOG_NUM_REQS'(4); end 10'b110_??10000: begin grant_onehot_r = 7'b0010000; grant_index_r = LOG_NUM_REQS'(4); end
10'b000_?10000?, 10'b000_?10000?,
10'b001_?1000??, 10'b001_?1000??,
10'b010_?100???, 10'b010_?100???,
10'b011_?10????, 10'b011_?10????,
10'b100_?1?????, 10'b100_?1?????,
10'b101_0100000, 10'b101_0100000,
10'b110_?100000: begin grant_onehot_r = 7'b0100000; grant_index_r = LOG_NUM_REQS'(5); end 10'b110_?100000: begin grant_onehot_r = 7'b0100000; grant_index_r = LOG_NUM_REQS'(5); end
default: begin grant_onehot_r = 7'b1000000; grant_index_r = LOG_NUM_REQS'(6); end default: begin grant_onehot_r = 7'b1000000; grant_index_r = LOG_NUM_REQS'(6); end
endcase endcase
end end
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
state <= '0; state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin end else if (grant_ready) begin
state <= grant_index_r; state <= grant_index_r;
end end
end end
assign grant_index = grant_index_r; assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end */else if (NUM_REQS == 8) begin end */else if (NUM_REQS == 8) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state; reg [LOG_NUM_REQS-1:0] state;
always @(*) begin always @(*) begin
casez ({state, requests}) casez ({state, requests})
11'b000_00000001, 11'b000_00000001,
11'b001_000000?1, 11'b001_000000?1,
11'b010_00000??1, 11'b010_00000??1,
11'b011_0000???1, 11'b011_0000???1,
11'b100_000????1, 11'b100_000????1,
11'b101_00?????1, 11'b101_00?????1,
11'b110_0??????1, 11'b110_0??????1,
11'b111_???????1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end 11'b111_???????1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end
11'b000_??????1?, 11'b000_??????1?,
11'b001_00000010, 11'b001_00000010,
11'b010_00000?10, 11'b010_00000?10,
11'b011_0000??10, 11'b011_0000??10,
11'b100_000???10, 11'b100_000???10,
11'b101_00????10, 11'b101_00????10,
11'b110_0?????10, 11'b110_0?????10,
11'b111_??????10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end 11'b111_??????10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end
11'b000_?????10?, 11'b000_?????10?,
11'b001_?????1??, 11'b001_?????1??,
11'b010_00000100, 11'b010_00000100,
11'b011_0000?100, 11'b011_0000?100,
11'b100_000??100, 11'b100_000??100,
11'b101_00???100, 11'b101_00???100,
11'b110_0????100, 11'b110_0????100,
11'b111_?????100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end 11'b111_?????100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end
11'b000_????100?, 11'b000_????100?,
11'b001_????10??, 11'b001_????10??,
@ -362,20 +361,20 @@ module VX_rr_arbiter #(
endcase endcase
end end
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
state <= '0; state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin end else if (grant_ready) begin
state <= grant_index_r; state <= grant_index_r;
end end
end end
assign grant_index = grant_index_r; assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end else if (MODEL == 1) begin end else if (MODEL == 1) begin
`IGNORE_UNOPTFLAT_BEGIN `IGNORE_UNOPTFLAT_BEGIN
wire [NUM_REQS-1:0] mask_higher_pri_regs, unmask_higher_pri_regs; wire [NUM_REQS-1:0] mask_higher_pri_regs, unmask_higher_pri_regs;
`IGNORE_UNOPTFLAT_END `IGNORE_UNOPTFLAT_END
@ -385,12 +384,18 @@ module VX_rr_arbiter #(
wire [NUM_REQS-1:0] req_masked = requests & pointer_reg; wire [NUM_REQS-1:0] req_masked = requests & pointer_reg;
assign mask_higher_pri_regs[NUM_REQS-1:1] = mask_higher_pri_regs[NUM_REQS-2:0] | req_masked[NUM_REQS-2:0];
assign mask_higher_pri_regs[0] = 1'b0; assign mask_higher_pri_regs[0] = 1'b0;
for (genvar i = 1; i < NUM_REQS; ++i) begin
assign mask_higher_pri_regs[i] = mask_higher_pri_regs[i-1] | req_masked[i-1];
end
assign grant_masked[NUM_REQS-1:0] = req_masked[NUM_REQS-1:0] & ~mask_higher_pri_regs[NUM_REQS-1:0]; assign grant_masked[NUM_REQS-1:0] = req_masked[NUM_REQS-1:0] & ~mask_higher_pri_regs[NUM_REQS-1:0];
assign unmask_higher_pri_regs[NUM_REQS-1:1] = unmask_higher_pri_regs[NUM_REQS-2:0] | requests[NUM_REQS-2:0];
assign unmask_higher_pri_regs[0] = 1'b0; assign unmask_higher_pri_regs[0] = 1'b0;
for (genvar i = 1; i < NUM_REQS; ++i) begin
assign unmask_higher_pri_regs[i] = unmask_higher_pri_regs[i-1] | requests[i-1];
end
assign grant_unmasked[NUM_REQS-1:0] = requests[NUM_REQS-1:0] & ~unmask_higher_pri_regs[NUM_REQS-1:0]; assign grant_unmasked[NUM_REQS-1:0] = requests[NUM_REQS-1:0] & ~unmask_higher_pri_regs[NUM_REQS-1:0];
wire no_req_masked = ~(|req_masked); wire no_req_masked = ~(|req_masked);
@ -399,7 +404,7 @@ module VX_rr_arbiter #(
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
pointer_reg <= {NUM_REQS{1'b1}}; pointer_reg <= {NUM_REQS{1'b1}};
end else if (!LOCK_ENABLE || grant_unlock) begin end else if (grant_ready) begin
if (|req_masked) begin if (|req_masked) begin
pointer_reg <= mask_higher_pri_regs; pointer_reg <= mask_higher_pri_regs;
end else if (|requests) begin end else if (|requests) begin
@ -410,22 +415,22 @@ module VX_rr_arbiter #(
end end
end end
assign grant_valid = (| requests); assign grant_valid = (| requests);
VX_onehot_encoder #( VX_onehot_encoder #(
.N (NUM_REQS) .N (NUM_REQS)
) onehot_encoder ( ) onehot_encoder (
.data_in (grant_onehot), .data_in (grant_onehot),
.data_out (grant_index), .data_out (grant_index),
`UNUSED_PIN (valid_out) `UNUSED_PIN (valid_out)
); );
end else begin end else begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
reg [NUM_REQS-1:0] state; reg [NUM_REQS-1:0] state;
always @(*) begin always @(*) begin
grant_index_r = 'x; grant_index_r = 'x;
grant_onehot_r = 'x; grant_onehot_r = 'x;
@ -440,18 +445,18 @@ module VX_rr_arbiter #(
end end
end end
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
state <= '0; state <= '0;
end else if (!LOCK_ENABLE || grant_unlock) begin end else if (grant_ready) begin
state <= grant_index_r; state <= grant_index_r;
end end
end end
assign grant_index = grant_index_r; assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end end
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,17 +17,18 @@
module VX_sp_ram #( module VX_sp_ram #(
parameter DATAW = 1, parameter DATAW = 1,
parameter SIZE = 1, parameter SIZE = 1,
parameter ADDR_MIN = 0,
parameter WRENW = 1, parameter WRENW = 1,
parameter OUT_REG = 0, parameter OUT_REG = 0,
parameter NO_RWCHECK = 0, parameter NO_RWCHECK = 0,
parameter LUTRAM = 0, parameter LUTRAM = 0,
parameter INIT_ENABLE = 0, parameter INIT_ENABLE = 0,
parameter INIT_FILE = "", parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0, parameter [DATAW-1:0] INIT_VALUE = 0,
parameter ADDRW = `LOG2UP(SIZE) parameter ADDRW = `LOG2UP(SIZE)
) ( ) (
input wire clk, input wire clk,
input wire read, input wire read,
input wire write, input wire write,
input wire [WRENW-1:0] wren, input wire [WRENW-1:0] wren,
input wire [ADDRW-1:0] addr, input wire [ADDRW-1:0] addr,
@ -37,6 +38,7 @@ module VX_sp_ram #(
VX_dp_ram #( VX_dp_ram #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (SIZE), .SIZE (SIZE),
.ADDR_MIN (ADDR_MIN),
.WRENW (WRENW), .WRENW (WRENW),
.OUT_REG (OUT_REG), .OUT_REG (OUT_REG),
.NO_RWCHECK (NO_RWCHECK), .NO_RWCHECK (NO_RWCHECK),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -20,7 +20,8 @@ module VX_stream_arb #(
parameter DATAW = 1, parameter DATAW = 1,
parameter `STRING ARBITER = "P", parameter `STRING ARBITER = "P",
parameter MAX_FANOUT = `MAX_FANOUT, parameter MAX_FANOUT = `MAX_FANOUT,
parameter OUT_BUF = 0 , parameter OUT_BUF = 0,
parameter LUTRAM = 0,
parameter NUM_REQS = `CDIV(NUM_INPUTS, NUM_OUTPUTS), parameter NUM_REQS = `CDIV(NUM_INPUTS, NUM_OUTPUTS),
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS), parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
parameter NUM_REQS_W = `UP(LOG_NUM_REQS) parameter NUM_REQS_W = `UP(LOG_NUM_REQS)
@ -42,7 +43,7 @@ module VX_stream_arb #(
if (NUM_OUTPUTS > 1) begin if (NUM_OUTPUTS > 1) begin
// (#inputs > #outputs) and (#outputs > 1) // (#inputs > #outputs) and (#outputs > 1)
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
localparam BATCH_BEGIN = i * NUM_REQS; localparam BATCH_BEGIN = i * NUM_REQS;
@ -57,7 +58,8 @@ module VX_stream_arb #(
.DATAW (DATAW), .DATAW (DATAW),
.ARBITER (ARBITER), .ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT), .MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF) .OUT_BUF (OUT_BUF),
.LUTRAM (LUTRAM)
) arb_slice ( ) arb_slice (
.clk (clk), .clk (clk),
.reset (slice_reset), .reset (slice_reset),
@ -81,8 +83,8 @@ module VX_stream_arb #(
wire [NUM_BATCHES-1:0] valid_tmp; wire [NUM_BATCHES-1:0] valid_tmp;
wire [NUM_BATCHES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp; wire [NUM_BATCHES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp;
wire [NUM_BATCHES-1:0] ready_tmp; wire [NUM_BATCHES-1:0] ready_tmp;
for (genvar i = 0; i < NUM_BATCHES; ++i) begin for (genvar i = 0; i < NUM_BATCHES; ++i) begin
localparam BATCH_BEGIN = i * MAX_FANOUT; localparam BATCH_BEGIN = i * MAX_FANOUT;
@ -97,18 +99,19 @@ module VX_stream_arb #(
if (MAX_FANOUT != 1) begin if (MAX_FANOUT != 1) begin
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (BATCH_SIZE), .NUM_INPUTS (BATCH_SIZE),
.NUM_OUTPUTS (1), .NUM_OUTPUTS (1),
.DATAW (DATAW), .DATAW (DATAW),
.ARBITER (ARBITER), .ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT), .MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF) .OUT_BUF (3), // registered output
.LUTRAM (LUTRAM)
) fanout_slice_arb ( ) fanout_slice_arb (
.clk (clk), .clk (clk),
.reset (slice_reset), .reset (slice_reset),
.valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]), .valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]),
.data_in (data_in[BATCH_END-1: BATCH_BEGIN]), .data_in (data_in[BATCH_END-1: BATCH_BEGIN]),
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]), .ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]),
.valid_out (valid_tmp[i]), .valid_out (valid_tmp[i]),
.data_out (data_tmp_u), .data_out (data_tmp_u),
.sel_out (sel_tmp_u), .sel_out (sel_tmp_u),
.ready_out (ready_tmp[i]) .ready_out (ready_tmp[i])
@ -123,11 +126,12 @@ module VX_stream_arb #(
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (NUM_BATCHES), .NUM_INPUTS (NUM_BATCHES),
.NUM_OUTPUTS (1), .NUM_OUTPUTS (1),
.DATAW (DATAW + LOG_NUM_REQS2), .DATAW (DATAW + LOG_NUM_REQS2),
.ARBITER (ARBITER), .ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT), .MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF) .OUT_BUF (OUT_BUF),
.LUTRAM (LUTRAM)
) fanout_join_arb ( ) fanout_join_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -150,16 +154,15 @@ module VX_stream_arb #(
wire valid_in_r; wire valid_in_r;
wire [DATAW-1:0] data_in_r; wire [DATAW-1:0] data_in_r;
wire ready_in_r; wire ready_in_r;
wire arb_valid; wire arb_valid;
wire [NUM_REQS_W-1:0] arb_index; wire [NUM_REQS_W-1:0] arb_index;
wire [NUM_REQS-1:0] arb_onehot; wire [NUM_REQS-1:0] arb_onehot;
wire arb_ready; wire arb_ready;
VX_generic_arbiter #( VX_generic_arbiter #(
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS),
.LOCK_ENABLE (1), .TYPE (ARBITER)
.TYPE (ARBITER)
) arbiter ( ) arbiter (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -167,21 +170,30 @@ module VX_stream_arb #(
.grant_valid (arb_valid), .grant_valid (arb_valid),
.grant_index (arb_index), .grant_index (arb_index),
.grant_onehot (arb_onehot), .grant_onehot (arb_onehot),
.grant_unlock (arb_ready) .grant_ready (arb_ready)
); );
assign valid_in_r = arb_valid; assign valid_in_r = arb_valid;
assign data_in_r = data_in[arb_index];
assign arb_ready = ready_in_r; assign arb_ready = ready_in_r;
VX_onehot_mux #(
.DATAW (DATAW),
.N (NUM_REQS)
) onehot_mux (
.data_in (data_in),
.sel_in (arb_onehot),
.data_out (data_in_r)
);
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
assign ready_in[i] = ready_in_r & arb_onehot[i]; assign ready_in[i] = ready_in_r && arb_onehot[i];
end end
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (LOG_NUM_REQS + DATAW), .DATAW (LOG_NUM_REQS + DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (LUTRAM)
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -214,7 +226,8 @@ module VX_stream_arb #(
.DATAW (DATAW), .DATAW (DATAW),
.ARBITER (ARBITER), .ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT), .MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF) .OUT_BUF (OUT_BUF),
.LUTRAM (LUTRAM)
) arb_slice ( ) arb_slice (
.clk (clk), .clk (clk),
.reset (slice_reset), .reset (slice_reset),
@ -248,19 +261,20 @@ module VX_stream_arb #(
.DATAW (DATAW), .DATAW (DATAW),
.ARBITER (ARBITER), .ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT), .MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF) .OUT_BUF (3), // registered output
.LUTRAM (LUTRAM)
) fanout_fork_arb ( ) fanout_fork_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (valid_in), .valid_in (valid_in),
.ready_in (ready_in), .ready_in (ready_in),
.data_in (data_in), .data_in (data_in),
.data_out (data_tmp), .data_out (data_tmp),
.valid_out (valid_tmp), .valid_out (valid_tmp),
.ready_out (ready_tmp), .ready_out (ready_tmp),
`UNUSED_PIN (sel_out) `UNUSED_PIN (sel_out)
); );
for (genvar i = 0; i < NUM_BATCHES; ++i) begin for (genvar i = 0; i < NUM_BATCHES; ++i) begin
localparam BATCH_BEGIN = i * MAX_FANOUT; localparam BATCH_BEGIN = i * MAX_FANOUT;
@ -271,11 +285,12 @@ module VX_stream_arb #(
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (1), .NUM_INPUTS (1),
.NUM_OUTPUTS (BATCH_SIZE), .NUM_OUTPUTS (BATCH_SIZE),
.DATAW (DATAW), .DATAW (DATAW),
.ARBITER (ARBITER), .ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT), .MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF) .OUT_BUF (OUT_BUF),
.LUTRAM (LUTRAM)
) fanout_slice_arb ( ) fanout_slice_arb (
.clk (clk), .clk (clk),
.reset (slice_reset), .reset (slice_reset),
@ -293,25 +308,24 @@ module VX_stream_arb #(
// (#inputs == 1) and (#outputs <= max_fanout) // (#inputs == 1) and (#outputs <= max_fanout)
wire [NUM_OUTPUTS-1:0] ready_in_r; wire [NUM_OUTPUTS-1:0] ready_in_r;
wire [NUM_OUTPUTS-1:0] arb_requests; wire [NUM_OUTPUTS-1:0] arb_requests;
wire arb_valid; wire arb_valid;
wire [NUM_OUTPUTS-1:0] arb_onehot; wire [NUM_OUTPUTS-1:0] arb_onehot;
wire arb_ready; wire arb_ready;
VX_generic_arbiter #( VX_generic_arbiter #(
.NUM_REQS (NUM_OUTPUTS), .NUM_REQS (NUM_OUTPUTS),
.LOCK_ENABLE (1), .TYPE (ARBITER)
.TYPE (ARBITER)
) arbiter ( ) arbiter (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.requests (arb_requests), .requests (arb_requests),
.grant_valid (arb_valid), .grant_valid (arb_valid),
`UNUSED_PIN (grant_index), `UNUSED_PIN (grant_index),
.grant_onehot (arb_onehot), .grant_onehot (arb_onehot),
.grant_unlock (arb_ready) .grant_ready (arb_ready)
); );
assign arb_requests = ready_in_r; assign arb_requests = ready_in_r;
@ -320,9 +334,10 @@ module VX_stream_arb #(
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (LUTRAM)
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -337,7 +352,7 @@ module VX_stream_arb #(
end end
assign sel_out = 0; assign sel_out = 0;
end else begin end else begin
// #Inputs == #Outputs // #Inputs == #Outputs
@ -349,7 +364,8 @@ module VX_stream_arb #(
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (LUTRAM)
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (out_buf_reset), .reset (out_buf_reset),
@ -363,6 +379,6 @@ module VX_stream_arb #(
assign sel_out[i] = NUM_REQS_W'(i); assign sel_out[i] = NUM_REQS_W'(i);
end end
end end
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,9 +15,9 @@
`TRACING_OFF `TRACING_OFF
module VX_stream_pack #( module VX_stream_pack #(
parameter NUM_REQS = 1, parameter NUM_REQS = 1,
parameter DATA_WIDTH = 1, parameter DATA_WIDTH = 1,
parameter TAG_WIDTH = 1, parameter TAG_WIDTH = 1,
parameter TAG_SEL_BITS = 0, parameter TAG_SEL_BITS = 0,
parameter `STRING ARBITER = "P", parameter `STRING ARBITER = "P",
parameter OUT_BUF = 0 parameter OUT_BUF = 0
@ -38,47 +38,48 @@ module VX_stream_pack #(
output wire [TAG_WIDTH-1:0] tag_out, output wire [TAG_WIDTH-1:0] tag_out,
input wire ready_out input wire ready_out
); );
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS);
if (NUM_REQS > 1) begin if (NUM_REQS > 1) begin
wire [LOG_NUM_REQS-1:0] grant_index; wire [NUM_REQS-1:0] grant_onehot;
wire grant_valid; wire grant_valid;
wire grant_ready; wire grant_ready;
VX_generic_arbiter #( VX_generic_arbiter #(
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS),
.LOCK_ENABLE (1), .TYPE (ARBITER)
.TYPE (ARBITER)
) arbiter ( ) arbiter (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.requests (valid_in), .requests (valid_in),
.grant_valid (grant_valid), .grant_valid (grant_valid),
.grant_index (grant_index), `UNUSED_PIN (grant_index),
`UNUSED_PIN (grant_onehot), .grant_onehot(grant_onehot),
.grant_unlock(grant_ready) .grant_ready (grant_ready)
); );
reg [NUM_REQS-1:0] valid_sel; wire [TAG_WIDTH-1:0] tag_sel;
reg [NUM_REQS-1:0] ready_sel;
wire ready_unqual;
wire [TAG_WIDTH-1:0] tag_sel = tag_in[grant_index]; VX_onehot_mux #(
.DATAW (TAG_WIDTH),
always @(*) begin .N (NUM_REQS)
valid_sel = '0; ) onehot_mux (
ready_sel = '0; .data_in (tag_in),
for (integer i = 0; i < NUM_REQS; ++i) begin .sel_in (grant_onehot),
if (tag_in[i][TAG_SEL_BITS-1:0] == tag_sel[TAG_SEL_BITS-1:0]) begin .data_out (tag_sel)
valid_sel[i] = valid_in[i]; );
ready_sel[i] = ready_unqual;
end wire [NUM_REQS-1:0] tag_matches;
end
end for (genvar i = 0; i < NUM_REQS; ++i) begin
assign tag_matches[i] = (tag_in[i][TAG_SEL_BITS-1:0] == tag_sel[TAG_SEL_BITS-1:0]);
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign ready_in[i] = grant_ready & tag_matches[i];
end
wire [NUM_REQS-1:0] mask_sel = valid_in & tag_matches;
assign grant_ready = ready_unqual;
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (NUM_REQS + TAG_WIDTH + (NUM_REQS * DATA_WIDTH)), .DATAW (NUM_REQS + TAG_WIDTH + (NUM_REQS * DATA_WIDTH)),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
@ -86,16 +87,14 @@ module VX_stream_pack #(
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (grant_valid), .valid_in (grant_valid),
.data_in ({valid_sel, tag_sel, data_in}), .data_in ({mask_sel, tag_sel, data_in}),
.ready_in (ready_unqual), .ready_in (grant_ready),
.valid_out (valid_out), .valid_out (valid_out),
.data_out ({mask_out, tag_out, data_out}), .data_out ({mask_out, tag_out, data_out}),
.ready_out (ready_out) .ready_out (ready_out)
); );
assign ready_in = ready_sel;
end else begin end else begin
`UNUSED_VAR (clk) `UNUSED_VAR (clk)

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -22,6 +22,7 @@ module VX_stream_xbar #(
parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS), parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS),
parameter ARBITER = "P", parameter ARBITER = "P",
parameter OUT_BUF = 0, parameter OUT_BUF = 0,
parameter LUTRAM = 0,
parameter MAX_FANOUT = `MAX_FANOUT, parameter MAX_FANOUT = `MAX_FANOUT,
parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1) parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1)
) ( ) (
@ -36,7 +37,7 @@ module VX_stream_xbar #(
output wire [NUM_INPUTS-1:0] ready_in, output wire [NUM_INPUTS-1:0] ready_in,
output wire [NUM_OUTPUTS-1:0] valid_out, output wire [NUM_OUTPUTS-1:0] valid_out,
output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out, output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out,
output wire [NUM_OUTPUTS-1:0][IN_WIDTH-1:0] sel_out, output wire [NUM_OUTPUTS-1:0][IN_WIDTH-1:0] sel_out,
input wire [NUM_OUTPUTS-1:0] ready_out input wire [NUM_OUTPUTS-1:0] ready_out
); );
@ -66,7 +67,8 @@ module VX_stream_xbar #(
.DATAW (DATAW), .DATAW (DATAW),
.ARBITER (ARBITER), .ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT), .MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF) .OUT_BUF (OUT_BUF),
.LUTRAM (LUTRAM)
) xbar_arb ( ) xbar_arb (
.clk (clk), .clk (clk),
.reset (slice_reset), .reset (slice_reset),
@ -94,7 +96,8 @@ module VX_stream_xbar #(
.DATAW (DATAW), .DATAW (DATAW),
.ARBITER (ARBITER), .ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT), .MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF) .OUT_BUF (OUT_BUF),
.LUTRAM (LUTRAM)
) xbar_arb ( ) xbar_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -124,13 +127,14 @@ module VX_stream_xbar #(
assign ready_in = ready_out_r[sel_in]; assign ready_in = ready_out_r[sel_in];
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
`RESET_RELAY (out_buf_reset, reset); `RESET_RELAY (out_buf_reset, reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (LUTRAM)
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (out_buf_reset), .reset (out_buf_reset),
@ -152,7 +156,8 @@ module VX_stream_xbar #(
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (LUTRAM)
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -172,7 +177,7 @@ module VX_stream_xbar #(
// compute inputs collision // compute inputs collision
// we have a collision when there exists a valid transfer with multiple input candicates // we have a collision when there exists a valid transfer with multiple input candicates
// we count the unique duplicates each cycle. // we count the unique duplicates each cycle.
reg [NUM_INPUTS-1:0] per_cycle_collision, per_cycle_collision_r; reg [NUM_INPUTS-1:0] per_cycle_collision, per_cycle_collision_r;
wire [`CLOG2(NUM_INPUTS+1)-1:0] collision_count; wire [`CLOG2(NUM_INPUTS+1)-1:0] collision_count;
reg [PERF_CTR_BITS-1:0] collisions_r; reg [PERF_CTR_BITS-1:0] collisions_r;
@ -182,14 +187,14 @@ module VX_stream_xbar #(
for (integer i = 0; i < NUM_INPUTS; ++i) begin for (integer i = 0; i < NUM_INPUTS; ++i) begin
for (integer j = 1; j < (NUM_INPUTS-i); ++j) begin for (integer j = 1; j < (NUM_INPUTS-i); ++j) begin
per_cycle_collision[i] |= valid_in[i] per_cycle_collision[i] |= valid_in[i]
&& valid_in[j+i] && valid_in[j+i]
&& (sel_in[i] == sel_in[j+i]) && (sel_in[i] == sel_in[j+i])
&& (ready_in[i] | ready_in[j+i]); && (ready_in[i] | ready_in[j+i]);
end end
end end
end end
`BUFFER(per_cycle_collision_r, per_cycle_collision); `BUFFER(per_cycle_collision_r, per_cycle_collision);
`POP_COUNT(collision_count, per_cycle_collision_r); `POP_COUNT(collision_count, per_cycle_collision_r);
always @(posedge clk) begin always @(posedge clk) begin

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,10 +17,10 @@ module VX_local_mem import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "", parameter `STRING INSTANCE_ID = "",
// Size of cache in bytes // Size of cache in bytes
parameter SIZE = (1024*16*8), parameter SIZE = (1024*16*8),
// Number of Word requests per cycle // Number of Word requests per cycle
parameter NUM_REQS = 4, parameter NUM_REQS = 4,
// Number of banks // Number of banks
parameter NUM_BANKS = 4, parameter NUM_BANKS = 4,
@ -33,8 +33,11 @@ module VX_local_mem import VX_gpu_pkg::*; #(
parameter UUID_WIDTH = 0, parameter UUID_WIDTH = 0,
// Request tag size // Request tag size
parameter TAG_WIDTH = 16 parameter TAG_WIDTH = 16,
) (
// Response buffer
parameter OUT_BUF = 0
) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -59,7 +62,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
localparam REQ_DATAW = 1 + BANK_ADDR_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH; localparam REQ_DATAW = 1 + BANK_ADDR_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH;
localparam RSP_DATAW = WORD_WIDTH + TAG_WIDTH; localparam RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
`STATIC_ASSERT(ADDR_WIDTH == (BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS)), ("invalid parameter")) `STATIC_ASSERT(ADDR_WIDTH == (BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS)), ("invalid parameter"))
// bank selection // bank selection
@ -70,7 +73,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
end end
end else begin end else begin
assign req_bank_idx = 0; assign req_bank_idx = 0;
end end
// bank addressing // bank addressing
@ -83,18 +86,18 @@ module VX_local_mem import VX_gpu_pkg::*; #(
// bank requests dispatch // bank requests dispatch
wire [NUM_BANKS-1:0] per_bank_req_valid; wire [NUM_BANKS-1:0] per_bank_req_valid;
wire [NUM_BANKS-1:0] per_bank_req_rw; wire [NUM_BANKS-1:0] per_bank_req_rw;
wire [NUM_BANKS-1:0][BANK_ADDR_WIDTH-1:0] per_bank_req_addr; wire [NUM_BANKS-1:0][BANK_ADDR_WIDTH-1:0] per_bank_req_addr;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_req_byteen; wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_req_byteen;
wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_req_data; wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_req_data;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_req_tag; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_req_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx;
wire [NUM_BANKS-1:0] per_bank_req_ready; wire [NUM_BANKS-1:0] per_bank_req_ready;
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_all; wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_all;
wire [NUM_REQS-1:0] req_valid_in; wire [NUM_REQS-1:0] req_valid_in;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in; wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
wire [NUM_REQS-1:0] req_ready_in; wire [NUM_REQS-1:0] req_ready_in;
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
@ -104,13 +107,13 @@ module VX_local_mem import VX_gpu_pkg::*; #(
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_valid_in[i] = mem_bus_if[i].req_valid; assign req_valid_in[i] = mem_bus_if[i].req_valid;
assign req_data_in[i] = { assign req_data_in[i] = {
mem_bus_if[i].req_data.rw, mem_bus_if[i].req_data.rw,
req_bank_addr[i], req_bank_addr[i],
mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.byteen,
mem_bus_if[i].req_data.data, mem_bus_if[i].req_data.data,
mem_bus_if[i].req_data.tag}; mem_bus_if[i].req_data.tag};
assign mem_bus_if[i].req_ready = req_ready_in[i]; assign mem_bus_if[i].req_ready = req_ready_in[i];
end end
VX_stream_xbar #( VX_stream_xbar #(
.NUM_INPUTS (NUM_REQS), .NUM_INPUTS (NUM_REQS),
@ -138,10 +141,10 @@ module VX_local_mem import VX_gpu_pkg::*; #(
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign { assign {
per_bank_req_rw[i], per_bank_req_rw[i],
per_bank_req_addr[i], per_bank_req_addr[i],
per_bank_req_byteen[i], per_bank_req_byteen[i],
per_bank_req_data[i], per_bank_req_data[i],
per_bank_req_tag[i]} = per_bank_req_data_all[i]; per_bank_req_tag[i]} = per_bank_req_data_all[i];
end end
@ -149,13 +152,13 @@ module VX_local_mem import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0] per_bank_rsp_valid; wire [NUM_BANKS-1:0] per_bank_rsp_valid;
wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_rsp_data; wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_rsp_data;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_rsp_idx; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_rsp_idx;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_rsp_ready; wire [NUM_BANKS-1:0] per_bank_rsp_ready;
`RESET_RELAY (bank_reset, reset); `RESET_RELAY (bank_reset, reset);
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
VX_sp_ram #( VX_sp_ram #(
.DATAW (WORD_WIDTH), .DATAW (WORD_WIDTH),
.SIZE (WORDS_PER_BANK), .SIZE (WORDS_PER_BANK),
@ -165,7 +168,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
.read (1'b1), .read (1'b1),
.write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]), .write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]),
.wren (per_bank_req_byteen[i]), .wren (per_bank_req_byteen[i]),
.addr (per_bank_req_addr[i]), .addr (per_bank_req_addr[i]),
.wdata (per_bank_req_data[i]), .wdata (per_bank_req_data[i]),
.rdata (per_bank_rsp_data[i]) .rdata (per_bank_rsp_data[i])
); );
@ -193,7 +196,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
// bank responses gather // bank responses gather
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_all; wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_all;
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign per_bank_rsp_data_all[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]}; assign per_bank_rsp_data_all[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
end end
@ -206,7 +209,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
.NUM_INPUTS (NUM_BANKS), .NUM_INPUTS (NUM_BANKS),
.NUM_OUTPUTS (NUM_REQS), .NUM_OUTPUTS (NUM_REQS),
.DATAW (RSP_DATAW), .DATAW (RSP_DATAW),
.OUT_BUF (2) .OUT_BUF (OUT_BUF)
) rsp_xbar ( ) rsp_xbar (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -302,38 +305,38 @@ module VX_local_mem import VX_gpu_pkg::*; #(
assign per_bank_rsp_uuid[i] = 0; assign per_bank_rsp_uuid[i] = 0;
end end
end end
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
always @(posedge clk) begin always @(posedge clk) begin
if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin
if (mem_bus_if[i].req_data.rw) begin if (mem_bus_if[i].req_data.rw) begin
`TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", `TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])); $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i]));
end else begin end else begin
`TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", `TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i])); $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i]));
end end
end end
if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin
`TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n", `TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i])); $time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i]));
end end
end end
end end
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
always @(posedge clk) begin always @(posedge clk) begin
if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin
if (per_bank_req_rw[i]) begin if (per_bank_req_rw[i]) begin
`TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", `TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i])); $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i]));
end else begin end else begin
`TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", `TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i])); $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i]));
end end
end end
if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin
`TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n", `TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i])); $time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i]));
end end
end end

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,20 +17,19 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
parameter NUM_REQS = 1, parameter NUM_REQS = 1,
parameter DATA_SIZE = 1, parameter DATA_SIZE = 1,
parameter TAG_WIDTH = 1, parameter TAG_WIDTH = 1,
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, parameter ADDR_WIDTH = 1,
parameter REQ_OUT_BUF = 0, parameter REQ_OUT_BUF = 0,
parameter RSP_OUT_BUF = 0, parameter RSP_OUT_BUF = 0,
parameter `STRING ARBITER = "R", parameter `STRING ARBITER = "R",
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS) parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire [`UP(LOG_NUM_REQS)-1:0] bus_sel, input wire [`UP(LOG_NUM_REQS)-1:0] bus_sel,
VX_mem_bus_if.slave bus_in_if, VX_mem_bus_if.slave bus_in_if,
VX_mem_bus_if.master bus_out_if [NUM_REQS] VX_mem_bus_if.master bus_out_if [NUM_REQS]
); );
localparam ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE));
localparam DATA_WIDTH = (8 * DATA_SIZE); localparam DATA_WIDTH = (8 * DATA_SIZE);
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH; localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH;
@ -40,7 +39,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
wire [NUM_REQS-1:0] req_valid_out; wire [NUM_REQS-1:0] req_valid_out;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_out; wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_out;
wire [NUM_REQS-1:0] req_ready_out; wire [NUM_REQS-1:0] req_ready_out;
VX_stream_switch #( VX_stream_switch #(
.NUM_OUTPUTS (NUM_REQS), .NUM_OUTPUTS (NUM_REQS),
.DATAW (REQ_DATAW), .DATAW (REQ_DATAW),
@ -49,7 +48,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.sel_in (bus_sel), .sel_in (bus_sel),
.valid_in (bus_in_if.req_valid), .valid_in (bus_in_if.req_valid),
.data_in (bus_in_if.req_data), .data_in (bus_in_if.req_data),
.ready_in (bus_in_if.req_ready), .ready_in (bus_in_if.req_ready),
.valid_out (req_valid_out), .valid_out (req_valid_out),
@ -68,7 +67,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
wire [NUM_REQS-1:0] rsp_valid_in; wire [NUM_REQS-1:0] rsp_valid_in;
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_in; wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_in;
wire [NUM_REQS-1:0] rsp_ready_in; wire [NUM_REQS-1:0] rsp_ready_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
assign rsp_valid_in[i] = bus_out_if[i].rsp_valid; assign rsp_valid_in[i] = bus_out_if[i].rsp_valid;
assign rsp_data_in[i] = bus_out_if[i].rsp_data; assign rsp_data_in[i] = bus_out_if[i].rsp_data;
@ -77,15 +76,15 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (NUM_REQS), .NUM_INPUTS (NUM_REQS),
.DATAW (RSP_DATAW), .DATAW (RSP_DATAW),
.ARBITER (ARBITER), .ARBITER (ARBITER),
.OUT_BUF (RSP_OUT_BUF) .OUT_BUF (RSP_OUT_BUF)
) rsp_arb ( ) rsp_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (rsp_valid_in), .valid_in (rsp_valid_in),
.data_in (rsp_data_in), .data_in (rsp_data_in),
.ready_in (rsp_ready_in), .ready_in (rsp_ready_in),
.valid_out (bus_in_if.rsp_valid), .valid_out (bus_in_if.rsp_valid),
.data_out (bus_in_if.rsp_data), .data_out (bus_in_if.rsp_data),
.ready_out (bus_in_if.rsp_ready), .ready_out (bus_in_if.rsp_ready),

View file

@ -12,7 +12,6 @@ SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/opae
RTL_DIR := $(VORTEX_HOME)/hw/rtl RTL_DIR := $(VORTEX_HOME)/hw/rtl
DPI_DIR := $(VORTEX_HOME)/hw/dpi DPI_DIR := $(VORTEX_HOME)/hw/dpi
AFU_DIR := $(RTL_DIR)/afu/opae AFU_DIR := $(RTL_DIR)/afu/opae
THIRD_PARTY_DIR := $(VORTEX_HOME)/third_party
SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts
IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY)
@ -76,19 +75,19 @@ endif
# Debugigng # Debugigng
ifdef DEBUG ifdef DEBUG
ifeq ($(TARGET), fpga) ifneq ($(TARGET), fpga)
CFLAGS += -DNDEBUG -DSCOPE $(DBG_SCOPE_FLAGS) CFLAGS += -DNDEBUG
SCOPE_JSON += $(BUILD_DIR)/scope.json
else else
CFLAGS += $(DBG_TRACE_FLAGS) CFLAGS += $(DBG_TRACE_FLAGS)
endif endif
else else
CFLAGS += -DNDEBUG CFLAGS += -DNDEBUG
endif endif
# Enable scope analyzer # Enable scope analyzer
ifdef SCOPE ifdef SCOPE
CFLAGS += -DSCOPE CFLAGS += -DSCOPE $(DBG_SCOPE_FLAGS)
SCOPE_JSON += $(BUILD_DIR)/scope.json
endif endif
# Enable perf counters # Enable perf counters
@ -128,7 +127,7 @@ ifeq ($(TARGET), asesim)
afu_sim_setup -s $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/synth afu_sim_setup -s $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/synth
else else
afu_synth_setup -s $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/synth afu_synth_setup -s $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/synth
endif endif
build: ip-gen setup $(SCOPE_JSON) build: ip-gen setup $(SCOPE_JSON)
ifeq ($(TARGET), asesim) ifeq ($(TARGET), asesim)
@ -145,5 +144,5 @@ scope-json: $(BUILD_DIR)/scope.json
$(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml
$(SCRIPT_DIR)/scope.py $(BUILD_DIR)/vortex.xml -o $(BUILD_DIR)/scope.json $(SCRIPT_DIR)/scope.py $(BUILD_DIR)/vortex.xml -o $(BUILD_DIR)/scope.json
clean: clean:
rm -rf vortex_afu.h $(BUILD_DIR) rm -rf vortex_afu.h $(BUILD_DIR)

Some files were not shown because too many files have changed in this diff Show more