merge from master branch

This commit is contained in:
Jaewon Lee 2024-09-12 10:32:02 -04:00
commit e91eb4aed4
124 changed files with 1933 additions and 1718 deletions

View file

@ -219,7 +219,9 @@ jobs:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
needs: build_vm needs: build_vm
strategy: strategy:
fail-fast: false
matrix: matrix:
name: [regression, opencl, cache, config1, config2, debug, stress, vm]
xlen: [32, 64] xlen: [32, 64]
steps: steps:
@ -267,4 +269,4 @@ jobs:
steps: steps:
- name: Check Completion - name: Check Completion
run: echo "All matrix jobs passed" run: echo "All matrix jobs passed"

View file

@ -44,10 +44,10 @@ clean: clean-build
$(MAKE) -C $(VORTEX_HOME)/third_party clean $(MAKE) -C $(VORTEX_HOME)/third_party clean
# Install setup # Install setup
KERNEL_INC_DST = $(PREFIX)/kernel/include KERNEL_INC_DST = $(INSTALLDIR)/kernel/include
KERNEL_LIB_DST = $(PREFIX)/kernel/lib$(XLEN) KERNEL_LIB_DST = $(INSTALLDIR)/kernel/lib$(XLEN)
RUNTIME_INC_DST = $(PREFIX)/runtime/include RUNTIME_INC_DST = $(INSTALLDIR)/runtime/include
RUNTIME_LIB_DST = $(PREFIX)/runtime/lib RUNTIME_LIB_DST = $(INSTALLDIR)/runtime/lib
KERNEL_HEADERS = $(wildcard $(VORTEX_HOME)/kernel/include/*.h) KERNEL_HEADERS = $(wildcard $(VORTEX_HOME)/kernel/include/*.h)
KERNEL_LIBS = $(wildcard kernel/*.a) KERNEL_LIBS = $(wildcard kernel/*.a)

View file

@ -1,5 +1,3 @@
[![Build Status](https://travis-ci.com/vortexgpgpu/vortex.svg?branch=master)](https://travis-ci.com/vortexgpgpu/vortex)
# Vortex GPGPU # Vortex GPGPU
Vortex is a full-stack open-source RISC-V GPGPU. Vortex is a full-stack open-source RISC-V GPGPU.
@ -47,20 +45,20 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
- [Yosys](https://github.com/YosysHQ/yosys) - [Yosys](https://github.com/YosysHQ/yosys)
- [Sv2v](https://github.com/zachjs/sv2v) - [Sv2v](https://github.com/zachjs/sv2v)
### Install development tools ### Install development tools
``` ```sh
sudo apt-get install build-essential sudo apt-get install build-essential
sudo apt-get install binutils sudo apt-get install binutils
sudo apt-get install python sudo apt-get install python
sudo apt-get install uuid-dev sudo apt-get install uuid-dev
sudo apt-get install git sudo apt-get install git
``` ```
### Install Vortex codebase ### Install Vortex codebase
```sh
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
cd vortex
``` ```
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git -b vortex_vm
cd vortex
```
### Configure your build folder ### Configure your build folder
```sh
# #
# By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir. # By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir.
# This is the example for volvo server # This is the example for volvo server
@ -72,38 +70,45 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR
# Run the following instead to enable virtual memory feature in compilation # Run the following instead to enable virtual memory feature in compilation
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR --vm_enable=1 ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR --vm_enable=1
```
### Install prebuilt toolchain ### Install prebuilt toolchain
# We will use the precomipled tools in volvo toolchanin directory # We will use the precomipled tools in volvo toolchanin directory
### set environment variables ### set environment variables
```sh
# should always run before using the toolchain! # should always run before using the toolchain!
source ./ci/toolchain_env.sh source ./ci/toolchain_env.sh
```
### Building Vortex ### Building Vortex
make -s ```sh
make -s
```
### Quick demo running vecadd OpenCL kernel on 2 cores ### Quick demo running vecadd OpenCL kernel on 2 cores
$ ./ci/blackbox.sh --cores=2 --app=vecadd ```sh
./ci/blackbox.sh --cores=2 --app=vecadd
```
### Common Developer Tips ### Common Developer Tips
- Installing Vortex kernel and runtime libraries to use with external tools requires passing --prefix=<install-path> to the configure script. - Installing Vortex kernel and runtime libraries to use with external tools requires passing --prefix=<install-path> to the configure script.
```sh ```sh
$ ../configure --xlen=32 --tooldir=$HOME/tools --prefix=<install-path> ../configure --xlen=32 --tooldir=$HOME/tools --prefix=<install-path>
$ make -s make -s
$ make install make install
`````` ```
- Building Vortex 64-bit simply requires using --xlen=64 configure option. - Building Vortex 64-bit simply requires using --xlen=64 configure option.
```sh ```sh
$ ../configure --xlen=32 --tooldir=$HOME/tools ../configure --xlen=32 --tooldir=$HOME/tools
``` ```
- Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source <build-path>/ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login. - Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source <build-path>/ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login.
```sh ```sh
$ echo "source <build-path>/ci/toolchain_env.sh" >> ~/.bashrc echo "source <build-path>/ci/toolchain_env.sh" >> ~/.bashrc
``` ```
- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder. - Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder.
```sh ```sh
$ ../configure ../configure
``` ```
- To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information. - To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information.
```sh ```sh
$ ./ci/blackbox.sh --app=demo --debug=3 ./ci/blackbox.sh --app=demo --debug=3
``` ```
- For additional information, check out the /docs. - For additional information, check out the /docs.

View file

@ -23,6 +23,8 @@ rm -f blackbox.*.cache
XLEN=${XLEN:=@XLEN@} XLEN=${XLEN:=@XLEN@}
XSIZE=$((XLEN / 8))
echo "Vortex Regression Test: XLEN=$XLEN" echo "Vortex Regression Test: XLEN=$XLEN"
unittest() unittest()
@ -99,11 +101,11 @@ regression()
# test global barrier # test global barrier
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2 CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tgbar" --cores=2 CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tgbar" --cores=2
# test local barrier # test local barrier
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tbar" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar"
echo "regression tests done!" echo "regression tests done!"
} }
@ -148,32 +150,54 @@ vm(){
echo "vm tests done!" echo "vm tests done!"
} }
test_csv_trace() cache()
{ {
# test CSV trace generation echo "begin cache tests..."
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-simx-32im > run_simx.log
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
diff trace_rtlsim.csv trace_simx.csv
# clean build
make -C sim/simx clean
make -C sim/rtlsim clean
}
debug() # disable local memory
{ CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
echo "begin debugging tests..." CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
test_csv_trace # disable L1 cache
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" # reduce l1 line size
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1" CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
echo "debugging tests done!" # test cache ways
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache banking
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test writeback
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
# cache clustering
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2
# L2/L3
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=4 --l2cache --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
echo "begin cache tests..."
} }
config1() config1()
@ -189,10 +213,12 @@ config1()
./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge ./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge
# cores clustering # cores clustering
./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1" ./ci/blackbox.sh --driver=rtlsim --cores=4 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1" CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
# issue width # issue width
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
@ -212,22 +238,19 @@ config1()
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
# FPU's PE scaling
CONFIGS="-DFMA_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfmadd"
CONFIGS="-DFCVT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tftoi"
CONFIGS="-DFDIV_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfdiv"
CONFIGS="-DFSQRT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfsqrt"
CONFIGS="-DFNCP_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfclamp"
# LSU scaling # LSU scaling
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
# L2/L3
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1"
# multiple L1 caches per socket
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=8 --warps=1 --threads=2
echo "configuration-1 tests done!" echo "configuration-1 tests done!"
} }
@ -262,55 +285,63 @@ config2()
# disabling ZICOND extension # disabling ZICOND extension
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
# disable local memory
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
# test AXI bus # test AXI bus
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=demo AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=mstress
# disable L1 cache
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
# reduce l1 line size
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=simx --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache ways
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache banking
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test 128-bit MEM block # test 128-bit MEM block
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=demo CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress
# test XLEN-bit MEM block
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=mstress
# test memory coalescing
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
# test single-bank DRAM # test single-bank DRAM
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=demo CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
# test 27-bit DRAM address # test 27-bit DRAM address
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=demo CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress
echo "configuration-2 tests done!" echo "configuration-2 tests done!"
} }
test_csv_trace()
{
# test CSV trace generation
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-simx-32im > run_simx.log
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
diff trace_rtlsim.csv trace_simx.csv
# clean build
make -C sim/simx clean
make -C sim/rtlsim clean
}
debug()
{
echo "begin debugging tests..."
test_csv_trace
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
echo "debugging tests done!"
}
stress() stress()
{ {
echo "begin stress tests..." echo "begin stress tests..."
# test verilator reset values # test verilator reset values
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache
echo "stress tests done!" echo "stress tests done!"
@ -329,19 +360,14 @@ synthesis()
show_usage() show_usage()
{ {
echo "Vortex Regression Test" echo "Vortex Regression Test"
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]" echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]"
} }
start=$SECONDS
declare -a tests=() declare -a tests=()
clean=0 clean=0
while [ "$1" != "" ]; do while [ "$1" != "" ]; do
case $1 in case $1 in
--vm )
tests+=("vm")
;;
--clean ) --clean )
clean=1 clean=1
;; ;;
@ -360,6 +386,12 @@ while [ "$1" != "" ]; do
--opencl ) --opencl )
tests+=("opencl") tests+=("opencl")
;; ;;
--cache )
tests+=("cache")
;;
--vm )
tests+=("vm")
;;
--config1 ) --config1 )
tests+=("config1") tests+=("config1")
;; ;;
@ -382,6 +414,7 @@ while [ "$1" != "" ]; do
tests+=("kernel") tests+=("kernel")
tests+=("regression") tests+=("regression")
tests+=("opencl") tests+=("opencl")
tests+=("cache")
tests+=("config1") tests+=("config1")
tests+=("config2") tests+=("config2")
tests+=("debug") tests+=("debug")
@ -405,6 +438,8 @@ then
make -s make -s
fi fi
start=$SECONDS
for test in "${tests[@]}"; do for test in "${tests[@]}"; do
$test $test
done done

View file

@ -19,6 +19,8 @@ import csv
import re import re
import inspect import inspect
configs = None
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description='CPU trace log to CSV format converter.') parser = argparse.ArgumentParser(description='CPU trace log to CSV format converter.')
parser.add_argument('-t', '--type', default='simx', help='log type (rtlsim or simx)') parser.add_argument('-t', '--type', default='simx', help='log type (rtlsim or simx)')
@ -26,6 +28,24 @@ def parse_args():
parser.add_argument('log', help='Input log file') parser.add_argument('log', help='Input log file')
return parser.parse_args() return parser.parse_args()
def load_config(filename):
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=0x([0-9a-fA-F]+), num_barriers=(\d+)"
with open(filename, 'r') as file:
for line in file:
config_match = re.search(config_pattern, line)
if config_match:
config = {
'num_threads': int(config_match.group(1)),
'num_warps': int(config_match.group(2)),
'num_cores': int(config_match.group(3)),
'num_clusters': int(config_match.group(4)),
'socket_size': int(config_match.group(5)),
'local_mem_base': int(config_match.group(6), 16),
'num_barriers': int(config_match.group(7)),
}
return config
return None
def parse_simx(log_lines): def parse_simx(log_lines):
pc_pattern = r"PC=(0x[0-9a-fA-F]+)" pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
instr_pattern = r"Instr (0x[0-9a-fA-F]+):" instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
@ -46,10 +66,10 @@ def parse_simx(log_lines):
instr_data = {} instr_data = {}
instr_data["lineno"] = lineno instr_data["lineno"] = lineno
instr_data["PC"] = re.search(pc_pattern, line).group(1) instr_data["PC"] = re.search(pc_pattern, line).group(1)
instr_data["core_id"] = re.search(core_id_pattern, line).group(1) instr_data["core_id"] = int(re.search(core_id_pattern, line).group(1))
instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1) instr_data["warp_id"] = int(re.search(warp_id_pattern, line).group(1))
instr_data["tmask"] = re.search(tmask_pattern, line).group(1) instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
instr_data["uuid"] = re.search(uuid_pattern, line).group(1) instr_data["uuid"] = int(re.search(uuid_pattern, line).group(1))
elif line.startswith("DEBUG Instr"): elif line.startswith("DEBUG Instr"):
instr_data["instr"] = re.search(instr_pattern, line).group(1) instr_data["instr"] = re.search(instr_pattern, line).group(1)
instr_data["opcode"] = re.search(opcode_pattern, line).group(1) instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
@ -60,6 +80,7 @@ def parse_simx(log_lines):
instr_data["destination"] = re.search(destination_pattern, line).group(1) instr_data["destination"] = re.search(destination_pattern, line).group(1)
except Exception as e: except Exception as e:
print("Error at line {}: {}".format(lineno, e)) print("Error at line {}: {}".format(lineno, e))
instr_data = None
if instr_data: if instr_data:
entries.append(instr_data) entries.append(instr_data)
return entries return entries
@ -95,7 +116,7 @@ def append_value(text, reg, value, tmask_arr, sep):
return text, sep return text, sep
def parse_rtlsim(log_lines): def parse_rtlsim(log_lines):
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=(\d+), num_barriers=(\d+)" global configs
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)" line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
pc_pattern = r"PC=(0x[0-9a-fA-F]+)" pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
instr_pattern = r"instr=(0x[0-9a-fA-F]+)" instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
@ -117,36 +138,20 @@ def parse_rtlsim(log_lines):
uuid_pattern = r"#(\d+)" uuid_pattern = r"#(\d+)"
entries = [] entries = []
instr_data = {} instr_data = {}
num_threads = 0 num_cores = configs['num_cores']
num_warps = 0 socket_size = configs['socket_size']
num_cores = 0 num_sockets = (num_cores + socket_size - 1) // socket_size
num_clusters = 0
socket_size = 0
local_mem_base = 0
num_barriers = 0
num_sockets = 0
for lineno, line in enumerate(log_lines, start=1): for lineno, line in enumerate(log_lines, start=1):
try: try:
config_match = re.search(config_pattern, line)
if config_match:
num_threads = int(config_match.group(1))
num_warps = int(config_match.group(2))
num_cores = int(config_match.group(3))
num_clusters = int(config_match.group(4))
socket_size = int(config_match.group(5))
local_mem_base = int(config_match.group(6))
num_barriers = int(config_match.group(7))
num_sockets = (num_cores + socket_size - 1) // socket_size
continue
line_match = re.search(line_pattern, line) line_match = re.search(line_pattern, line)
if line_match: if line_match:
PC = re.search(pc_pattern, line).group(1) PC = re.search(pc_pattern, line).group(1)
warp_id = re.search(warp_id_pattern, line).group(1) warp_id = int(re.search(warp_id_pattern, line).group(1))
tmask = re.search(tmask_pattern, line).group(1) tmask = re.search(tmask_pattern, line).group(1)
uuid = re.search(uuid_pattern, line).group(1) uuid = int(re.search(uuid_pattern, line).group(1))
cluster_id = line_match.group(1) cluster_id = int(line_match.group(1))
socket_id = line_match.group(2) socket_id = int(line_match.group(2))
core_id = line_match.group(3) core_id = int(line_match.group(3))
stage = line_match.group(4) stage = line_match.group(4)
if stage == "decode": if stage == "decode":
trace = {} trace = {}
@ -273,7 +278,9 @@ def split_log_file(log_filename):
return sublogs return sublogs
def main(): def main():
global configs
args = parse_args() args = parse_args()
configs = load_config(args.log)
sublogs = split_log_file(args.log) sublogs = split_log_file(args.log)
write_csv(sublogs, args.csv, args.type) write_csv(sublogs, args.csv, args.type)

View file

@ -19,7 +19,7 @@ TOOLDIR ?= @TOOLDIR@
OSVERSION ?= @OSVERSION@ OSVERSION ?= @OSVERSION@
PREFIX ?= @PREFIX@ INSTALLDIR ?= @INSTALLDIR@
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex

4
configure vendored
View file

@ -63,7 +63,7 @@ copy_files() {
filename_no_ext="${filename%.in}" filename_no_ext="${filename%.in}"
dest_file="$dest_dir/$filename_no_ext" dest_file="$dest_dir/$filename_no_ext"
mkdir -p "$dest_dir" mkdir -p "$dest_dir"
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file" sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file"
# apply permissions to bash scripts # apply permissions to bash scripts
read -r firstline < "$dest_file" read -r firstline < "$dest_file"
if [[ "$firstline" =~ ^#!.*bash ]]; then if [[ "$firstline" =~ ^#!.*bash ]]; then
@ -178,4 +178,4 @@ THIRD_PARTY_DIR=$SCRIPT_DIR/third_party
copy_files "$SCRIPT_DIR" "$CURRENT_DIR" copy_files "$SCRIPT_DIR" "$CURRENT_DIR"
echo "VM Enable: "$VM_ENABLE echo "VM Enable: "$VM_ENABLE

View file

@ -53,9 +53,9 @@ A waveform trace `trace.vcd` will be generated in the current directory during t
## Analyzing Vortex trace log ## Analyzing Vortex trace log
When debugging Vortex RTL or SimX Simulator, reading the trace run.log file can be overwhelming when the trace gets really large. When debugging Vortex RTL or SimX Simulator, reading the trace run.log file can be overwhelming when the trace gets really large.
We provide a trace sanitizer tool under ./hw/scripts/trace_csv.py that you can use to convert the large trace into a CSV file containing all the instructions that executed with their source and destination operands. To increase compatibility between traces you will need to initialize RTLSIM's GPRs to zero by defining GPR_RESET. We provide a trace sanitizer tool under ./hw/scripts/trace_csv.py that you can use to convert the large trace into a CSV file containing all the instructions that executed with their source and destination operands.
$ CONFIGS="-DGPR_RESET" ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=3 --log=run_rtlsim.log $ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=3 --log=run_rtlsim.log
$ ./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv $ ./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=3 --log=run_simx.log $ ./ci/blackbox.sh --driver=simx --app=demo --debug=3 --log=run_simx.log

View file

@ -96,10 +96,11 @@ module VX_cluster import VX_gpu_pkg::*; #(
.CRSQ_SIZE (`L2_CRSQ_SIZE), .CRSQ_SIZE (`L2_CRSQ_SIZE),
.MSHR_SIZE (`L2_MSHR_SIZE), .MSHR_SIZE (`L2_MSHR_SIZE),
.MRSQ_SIZE (`L2_MRSQ_SIZE), .MRSQ_SIZE (`L2_MRSQ_SIZE),
.MREQ_SIZE (`L2_MREQ_SIZE), .MREQ_SIZE (`L2_WRITEBACK ? `L2_MSHR_SIZE : `L2_MREQ_SIZE),
.TAG_WIDTH (L2_TAG_WIDTH), .TAG_WIDTH (L2_TAG_WIDTH),
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.WRITEBACK (`L2_WRITEBACK), .WRITEBACK (`L2_WRITEBACK),
.DIRTY_BYTES (`L2_WRITEBACK),
.UUID_WIDTH (`UUID_WIDTH), .UUID_WIDTH (`UUID_WIDTH),
.CORE_OUT_BUF (2), .CORE_OUT_BUF (2),
.MEM_OUT_BUF (2), .MEM_OUT_BUF (2),

View file

@ -217,7 +217,7 @@
`ifndef IO_COUT_ADDR `ifndef IO_COUT_ADDR
`define IO_COUT_ADDR `IO_BASE_ADDR `define IO_COUT_ADDR `IO_BASE_ADDR
`endif `endif
`define IO_COUT_SIZE `MEM_BLOCK_SIZE `define IO_COUT_SIZE 64
`ifndef IO_MPM_ADDR `ifndef IO_MPM_ADDR
`define IO_MPM_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE) `define IO_MPM_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE)
@ -685,7 +685,7 @@
// Number of Banks // Number of Banks
`ifndef L3_NUM_BANKS `ifndef L3_NUM_BANKS
`define L3_NUM_BANKS `MIN(4, `NUM_CLUSTERS) `define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS)
`endif `endif
// Core Response Queue Size // Core Response Queue Size
@ -718,6 +718,15 @@
`define L3_WRITEBACK 0 `define L3_WRITEBACK 0
`endif `endif
`ifndef MEMORY_BANKS
`define MEMORY_BANKS 8
`endif
// Number of Memory Ports from LLC
`ifndef NUM_MEM_PORTS
`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS)
`endif
// ISA Extensions ///////////////////////////////////////////////////////////// // ISA Extensions /////////////////////////////////////////////////////////////
`ifdef EXT_A_ENABLE `ifdef EXT_A_ENABLE

View file

@ -238,11 +238,11 @@
`define RESET_RELAY(dst, src) \ `define RESET_RELAY(dst, src) \
`RESET_RELAY_EX (dst, src, 1, 0) `RESET_RELAY_EX (dst, src, 1, 0)
// size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2 // size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2, 5 -> 2
`define TO_OUT_BUF_SIZE(out_reg) `MIN(out_reg, 2) `define TO_OUT_BUF_SIZE(s) `MIN(s, 2)
// reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2 // reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2, 5 > 3
`define TO_OUT_BUF_REG(out_reg) ((out_reg & 1) + ((out_reg >> 2) << 1)) `define TO_OUT_BUF_REG(s) ((s < 2) ? s : (s - 2))
`define REPEAT(n,f,s) `_REPEAT_``n(f,s) `define REPEAT(n,f,s) `_REPEAT_``n(f,s)
`define _REPEAT_0(f,s) `define _REPEAT_0(f,s)

View file

@ -145,11 +145,12 @@ module VX_socket import VX_gpu_pkg::*; #(
.CRSQ_SIZE (`DCACHE_CRSQ_SIZE), .CRSQ_SIZE (`DCACHE_CRSQ_SIZE),
.MSHR_SIZE (`DCACHE_MSHR_SIZE), .MSHR_SIZE (`DCACHE_MSHR_SIZE),
.MRSQ_SIZE (`DCACHE_MRSQ_SIZE), .MRSQ_SIZE (`DCACHE_MRSQ_SIZE),
.MREQ_SIZE (`DCACHE_MREQ_SIZE), .MREQ_SIZE (`DCACHE_WRITEBACK ? `DCACHE_MSHR_SIZE : `DCACHE_MREQ_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH), .TAG_WIDTH (DCACHE_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH), .UUID_WIDTH (`UUID_WIDTH),
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.WRITEBACK (`DCACHE_WRITEBACK), .WRITEBACK (`DCACHE_WRITEBACK),
.DIRTY_BYTES (`DCACHE_WRITEBACK),
.NC_ENABLE (1), .NC_ENABLE (1),
.CORE_OUT_BUF (2), .CORE_OUT_BUF (2),
.MEM_OUT_BUF (2) .MEM_OUT_BUF (2)
@ -178,8 +179,6 @@ module VX_socket import VX_gpu_pkg::*; #(
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[0], icache_mem_bus_if, L1_MEM_TAG_WIDTH, ICACHE_MEM_TAG_WIDTH); `ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[0], icache_mem_bus_if, L1_MEM_TAG_WIDTH, ICACHE_MEM_TAG_WIDTH);
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[1], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH); `ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[1], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
`RESET_RELAY (mem_arb_reset, reset);
VX_mem_arb #( VX_mem_arb #(
.NUM_INPUTS (2), .NUM_INPUTS (2),
.DATA_SIZE (`L1_LINE_SIZE), .DATA_SIZE (`L1_LINE_SIZE),
@ -190,7 +189,7 @@ module VX_socket import VX_gpu_pkg::*; #(
.RSP_OUT_BUF (2) .RSP_OUT_BUF (2)
) mem_arb ( ) mem_arb (
.clk (clk), .clk (clk),
.reset (mem_arb_reset), .reset (reset),
.bus_in_if (l1_mem_bus_if), .bus_in_if (l1_mem_bus_if),
.bus_out_if (l1_mem_arb_bus_if) .bus_out_if (l1_mem_arb_bus_if)
); );

View file

@ -166,6 +166,10 @@
`define VX_CSR_MPM_MEM_WRITES_H 12'hB99 `define VX_CSR_MPM_MEM_WRITES_H 12'hB99
`define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency `define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency
`define VX_CSR_MPM_MEM_LT_H 12'hB9A `define VX_CSR_MPM_MEM_LT_H 12'hB9A
`define VX_CSR_MPM_MEM_BANK_CNTR 12'hB1E // memory bank requests
`define VX_CSR_MPM_MEM_BANK_CNTR_H 12'hB9E
`define VX_CSR_MPM_MEM_BANK_TICK 12'hB1F // memory ticks
`define VX_CSR_MPM_MEM_BANK_TICK_H 12'hB9F
// PERF: lmem // PERF: lmem
`define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads `define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads
`define VX_CSR_MPM_LMEM_READS_H 12'hB9B `define VX_CSR_MPM_LMEM_READS_H 12'hB9B

View file

@ -80,10 +80,11 @@ module Vortex import VX_gpu_pkg::*; (
.CRSQ_SIZE (`L3_CRSQ_SIZE), .CRSQ_SIZE (`L3_CRSQ_SIZE),
.MSHR_SIZE (`L3_MSHR_SIZE), .MSHR_SIZE (`L3_MSHR_SIZE),
.MRSQ_SIZE (`L3_MRSQ_SIZE), .MRSQ_SIZE (`L3_MRSQ_SIZE),
.MREQ_SIZE (`L3_MREQ_SIZE), .MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE),
.TAG_WIDTH (L2_MEM_TAG_WIDTH), .TAG_WIDTH (L2_MEM_TAG_WIDTH),
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.WRITEBACK (`L3_WRITEBACK), .WRITEBACK (`L3_WRITEBACK),
.DIRTY_BYTES (`L3_WRITEBACK),
.UUID_WIDTH (`UUID_WIDTH), .UUID_WIDTH (`UUID_WIDTH),
.CORE_OUT_BUF (2), .CORE_OUT_BUF (2),
.MEM_OUT_BUF (2), .MEM_OUT_BUF (2),
@ -192,12 +193,12 @@ module Vortex import VX_gpu_pkg::*; (
always @(posedge clk) begin always @(posedge clk) begin
if (mem_req_fire) begin if (mem_req_fire) begin
if (mem_req_rw) if (mem_req_rw)
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)); `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data));
else else
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)); `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen));
end end
if (mem_rsp_fire) begin if (mem_rsp_fire) begin
`TRACE(1, ("%d: MEM Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag, mem_rsp_data)); `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data));
end end
end end
`endif `endif

View file

@ -240,13 +240,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
MMIO_CMD_ARG0: begin MMIO_CMD_ARG0: begin
cmd_args[0] <= 64'(cp2af_sRxPort.c0.data); cmd_args[0] <= 64'(cp2af_sRxPort.c0.data);
`ifdef DBG_TRACE_AFU `ifdef DBG_TRACE_AFU
`TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%0h\n", $time, 64'(cp2af_sRxPort.c0.data))); `TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)));
`endif `endif
end end
MMIO_CMD_ARG1: begin MMIO_CMD_ARG1: begin
cmd_args[1] <= 64'(cp2af_sRxPort.c0.data); cmd_args[1] <= 64'(cp2af_sRxPort.c0.data);
`ifdef DBG_TRACE_AFU `ifdef DBG_TRACE_AFU
`TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%0h\n", $time, 64'(cp2af_sRxPort.c0.data))); `TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)));
`endif `endif
end end
MMIO_CMD_ARG2: begin MMIO_CMD_ARG2: begin
@ -263,13 +263,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
`ifdef SCOPE `ifdef SCOPE
MMIO_SCOPE_WRITE: begin MMIO_SCOPE_WRITE: begin
`ifdef DBG_TRACE_AFU `ifdef DBG_TRACE_AFU
`TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%0h\n", $time, cmd_scope_wdata)); `TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata));
`endif `endif
end end
`endif `endif
default: begin default: begin
`ifdef DBG_TRACE_AFU `ifdef DBG_TRACE_AFU
`TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%0h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data))); `TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)));
`endif `endif
end end
endcase endcase
@ -305,14 +305,14 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
MMIO_SCOPE_READ: begin MMIO_SCOPE_READ: begin
mmio_tx.data <= cmd_scope_rdata; mmio_tx.data <= cmd_scope_rdata;
`ifdef DBG_TRACE_AFU `ifdef DBG_TRACE_AFU
`TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%0h\n", $time, cmd_scope_rdata)); `TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata));
`endif `endif
end end
`endif `endif
MMIO_DEV_CAPS: begin MMIO_DEV_CAPS: begin
mmio_tx.data <= dev_caps; mmio_tx.data <= dev_caps;
`ifdef DBG_TRACE_AFU `ifdef DBG_TRACE_AFU
`TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%0h\n", $time, dev_caps)); `TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps));
`endif `endif
end end
MMIO_ISA_CAPS: begin MMIO_ISA_CAPS: begin
@ -580,8 +580,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.TAG_WIDTH (AVS_REQ_TAGW+1) .TAG_WIDTH (AVS_REQ_TAGW+1)
) mem_bus_if[1](); ) mem_bus_if[1]();
`RESET_RELAY (mem_arb_reset, reset);
VX_mem_arb #( VX_mem_arb #(
.NUM_INPUTS (2), .NUM_INPUTS (2),
.DATA_SIZE (LMEM_DATA_SIZE), .DATA_SIZE (LMEM_DATA_SIZE),
@ -592,7 +590,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.RSP_OUT_BUF (0) .RSP_OUT_BUF (0)
) mem_arb ( ) mem_arb (
.clk (clk), .clk (clk),
.reset (mem_arb_reset), .reset (reset),
.bus_in_if (cci_vx_mem_bus_if), .bus_in_if (cci_vx_mem_bus_if),
.bus_out_if (mem_bus_if) .bus_out_if (mem_bus_if)
); );
@ -760,7 +758,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE); cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE);
end end
`ifdef DBG_TRACE_AFU `ifdef DBG_TRACE_AFU
`TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%0h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)); `TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data));
`endif `endif
end end
@ -778,14 +776,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
end end
end end
`RESET_RELAY (cci_rdq_reset, reset);
VX_fifo_queue #( VX_fifo_queue #(
.DATAW (CCI_RD_QUEUE_DATAW), .DATAW (CCI_RD_QUEUE_DATAW),
.DEPTH (CCI_RD_QUEUE_SIZE) .DEPTH (CCI_RD_QUEUE_SIZE)
) cci_rd_req_queue ( ) cci_rd_req_queue (
.clk (clk), .clk (clk),
.reset (cci_rdq_reset), .reset (reset),
.push (cci_rdq_push), .push (cci_rdq_push),
.pop (cci_rdq_pop), .pop (cci_rdq_pop),
.data_in (cci_rdq_din), .data_in (cci_rdq_din),
@ -906,7 +902,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
cci_wr_req_done <= 1; cci_wr_req_done <= 1;
end end
`ifdef DBG_TRACE_AFU `ifdef DBG_TRACE_AFU
`TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%0h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)); `TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data));
`endif `endif
end end
@ -1093,13 +1089,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
always @(posedge clk) begin always @(posedge clk) begin
for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin
if (avs_write[i] && ~avs_waitrequest[i]) begin if (avs_write[i] && ~avs_waitrequest[i]) begin
`TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i])); `TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]));
end end
if (avs_read[i] && ~avs_waitrequest[i]) begin if (avs_read[i] && ~avs_waitrequest[i]) begin
`TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i])); `TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i]));
end end
if (avs_readdatavalid[i]) begin if (avs_readdatavalid[i]) begin
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%0h\n", $time, i, avs_readdata[i])); `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i]));
end end
end end
end end

View file

@ -377,13 +377,13 @@ module VX_afu_wrap #(
`TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])); `TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]));
end end
if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin
`TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%0h\n", $time, i, m_axi_mem_wdata_a[i])); `TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i]));
end end
if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin
`TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])); `TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i]));
end end
if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])); `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]));
end end
end end
end end

View file

@ -14,6 +14,7 @@
`include "VX_cache_define.vh" `include "VX_cache_define.vh"
module VX_bank_flush #( module VX_bank_flush #(
parameter BANK_ID = 0,
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE = 1024, parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
@ -27,33 +28,36 @@ module VX_bank_flush #(
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire flush_in_valid, input wire flush_begin,
output wire flush_in_ready, output wire flush_end,
output wire flush_out_init, output wire flush_init,
output wire flush_out_valid, output wire flush_valid,
output wire [`CS_LINE_SEL_BITS-1:0] flush_out_line, output wire [`CS_LINE_SEL_BITS-1:0] flush_line,
output wire [NUM_WAYS-1:0] flush_out_way, output wire [NUM_WAYS-1:0] flush_way,
input wire flush_out_ready, input wire flush_ready,
input wire mshr_empty input wire mshr_empty,
input wire bank_empty
); );
parameter CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0); // ways interation is only needed when eviction is enabled
localparam CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);
parameter STATE_IDLE = 2'd0; localparam STATE_IDLE = 0;
parameter STATE_INIT = 2'd1; localparam STATE_INIT = 1;
parameter STATE_FLUSH = 2'd2; localparam STATE_WAIT1 = 2;
localparam STATE_FLUSH = 3;
localparam STATE_WAIT2 = 4;
localparam STATE_DONE = 5;
reg [2:0] state_r, state_n;
reg [CTR_WIDTH-1:0] counter_r; reg [CTR_WIDTH-1:0] counter_r;
reg [1:0] state_r, state_n;
reg flush_in_ready_r, flush_in_ready_n;
always @(*) begin always @(*) begin
state_n = state_r; state_n = state_r;
flush_in_ready_n = 0;
case (state_r) case (state_r)
// STATE_IDLE STATE_IDLE: begin
default: begin if (flush_begin) begin
if (flush_in_valid && mshr_empty) begin state_n = STATE_WAIT1;
state_n = STATE_FLUSH;
end end
end end
STATE_INIT: begin STATE_INIT: begin
@ -61,25 +65,41 @@ module VX_bank_flush #(
state_n = STATE_IDLE; state_n = STATE_IDLE;
end end
end end
STATE_FLUSH: begin STATE_WAIT1: begin
if (counter_r == ((2 ** CTR_WIDTH)-1)) begin // wait for pending requests to complete
state_n = STATE_IDLE; if (mshr_empty) begin
flush_in_ready_n = 1; state_n = STATE_FLUSH;
end end
end end
STATE_FLUSH: begin
if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
state_n = (BANK_ID == 0) ? STATE_DONE : STATE_WAIT2;
end
end
STATE_WAIT2: begin
// ensure the bank is empty before notifying the cache flush unit,
// because the flush request to lower caches only goes through bank0
// and it is important that request gets send out last.
if (bank_empty) begin
state_n = STATE_DONE;
end
end
STATE_DONE: begin
// generate a completion pulse
state_n = STATE_IDLE;
end
endcase endcase
end end
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
state_r <= STATE_INIT; state_r <= STATE_INIT;
counter_r <= '0; counter_r <= '0;
flush_in_ready_r <= '0;
end else begin end else begin
state_r <= state_n; state_r <= state_n;
flush_in_ready_r <= flush_in_ready_n;
if (state_r != STATE_IDLE) begin if (state_r != STATE_IDLE) begin
if ((state_r == STATE_INIT) || flush_out_ready) begin if ((state_r == STATE_INIT)
|| ((state_r == STATE_FLUSH) && flush_ready)) begin
counter_r <= counter_r + CTR_WIDTH'(1); counter_r <= counter_r + CTR_WIDTH'(1);
end end
end else begin end else begin
@ -88,22 +108,20 @@ module VX_bank_flush #(
end end
end end
assign flush_in_ready = flush_in_ready_r; assign flush_end = (state_r == STATE_DONE);
assign flush_init = (state_r == STATE_INIT);
assign flush_out_init = (state_r == STATE_INIT); assign flush_valid = (state_r == STATE_FLUSH);
assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0];
assign flush_out_valid = (state_r == STATE_FLUSH);
assign flush_out_line = counter_r[`CS_LINE_SEL_BITS-1:0];
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin
reg [NUM_WAYS-1:0] flush_out_way_r; reg [NUM_WAYS-1:0] flush_way_r;
always @(*) begin always @(*) begin
flush_out_way_r = '0; flush_way_r = '0;
flush_out_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1; flush_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
end end
assign flush_out_way = flush_out_way_r; assign flush_way = flush_way_r;
end else begin end else begin
assign flush_out_way = {NUM_WAYS{1'b1}}; assign flush_way = {NUM_WAYS{1'b1}};
end end
endmodule endmodule

View file

@ -45,6 +45,9 @@ module VX_cache import VX_gpu_pkg::*; #(
// Enable cache writeback // Enable cache writeback
parameter WRITEBACK = 0, parameter WRITEBACK = 0,
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
// Request debug identifier // Request debug identifier
parameter UUID_WIDTH = 0, parameter UUID_WIDTH = 0,
@ -69,8 +72,13 @@ module VX_cache import VX_gpu_pkg::*; #(
VX_mem_bus_if.master mem_bus_if VX_mem_bus_if.master mem_bus_if
); );
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter")) `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter: number of banks must be power of 2"))
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter")) `STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable"))
`STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback"))
// In writeback mode, memory fill response may issue a new memory request to handle evicted blocks.
// We need to ensure that the memory request queue never fills up to avoid deadlock.
`STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE"))
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS); localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS); localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
@ -101,26 +109,23 @@ module VX_cache import VX_gpu_pkg::*; #(
.TAG_WIDTH (TAG_WIDTH) .TAG_WIDTH (TAG_WIDTH)
) core_bus2_if[NUM_REQS](); ) core_bus2_if[NUM_REQS]();
wire [NUM_BANKS-1:0] per_bank_flush_valid; wire [NUM_BANKS-1:0] per_bank_flush_begin;
wire [NUM_BANKS-1:0] per_bank_flush_ready; wire [NUM_BANKS-1:0] per_bank_flush_end;
wire [NUM_BANKS-1:0] per_bank_core_req_fire; wire [NUM_BANKS-1:0] per_bank_core_req_fire;
// this reset relay is required to sync with bank initialization
`RESET_RELAY (flush_reset, reset);
VX_cache_flush #( VX_cache_flush #(
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency .BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
) flush_unit ( ) flush_unit (
.clk (clk), .clk (clk),
.reset (flush_reset), .reset (reset),
.core_bus_in_if (core_bus_if), .core_bus_in_if (core_bus_if),
.core_bus_out_if (core_bus2_if), .core_bus_out_if (core_bus2_if),
.bank_req_fire (per_bank_core_req_fire), .bank_req_fire (per_bank_core_req_fire),
.flush_valid (per_bank_flush_valid), .flush_begin (per_bank_flush_begin),
.flush_ready (per_bank_flush_ready) .flush_end (per_bank_flush_end)
); );
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@ -131,9 +136,9 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
wire [NUM_REQS-1:0] core_rsp_ready_s; wire [NUM_REQS-1:0] core_rsp_ready_s;
for (genvar i = 0; i < NUM_REQS; ++i) begin `RESET_RELAY_EX (core_rsp_reset, reset, NUM_REQS, `MAX_FANOUT);
`RESET_RELAY (core_rsp_reset, reset); for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH), .DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
@ -141,7 +146,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
) core_rsp_buf ( ) core_rsp_buf (
.clk (clk), .clk (clk),
.reset (core_rsp_reset), .reset (core_rsp_reset[i]),
.valid_in (core_rsp_valid_s[i]), .valid_in (core_rsp_valid_s[i]),
.ready_in (core_rsp_ready_s[i]), .ready_in (core_rsp_ready_s[i]),
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}), .data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
@ -165,15 +170,13 @@ module VX_cache import VX_gpu_pkg::*; #(
wire mem_bus_if_flush; wire mem_bus_if_flush;
`RESET_RELAY (mem_req_reset, reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
) mem_req_buf ( ) mem_req_buf (
.clk (clk), .clk (clk),
.reset (mem_req_reset), .reset (reset),
.valid_in (mem_req_valid_s), .valid_in (mem_req_valid_s),
.ready_in (mem_req_ready_s), .ready_in (mem_req_ready_s),
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}), .data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}),
@ -192,15 +195,13 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s; wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
wire mem_rsp_ready_s; wire mem_rsp_ready_s;
`RESET_RELAY (mem_rsp_reset, reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH), .DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
.SIZE (MRSQ_SIZE), .SIZE (MRSQ_SIZE),
.OUT_REG (MRSQ_SIZE > 2) .OUT_REG (MRSQ_SIZE > 2)
) mem_rsp_queue ( ) mem_rsp_queue (
.clk (clk), .clk (clk),
.reset (mem_rsp_reset), .reset (reset),
.valid_in (mem_bus_if.rsp_valid), .valid_in (mem_bus_if.rsp_valid),
.ready_in (mem_bus_if.rsp_ready), .ready_in (mem_bus_if.rsp_ready),
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}), .data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
@ -316,6 +317,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.NUM_OUTPUTS (NUM_BANKS), .NUM_OUTPUTS (NUM_BANKS),
.DATAW (CORE_REQ_DATAW), .DATAW (CORE_REQ_DATAW),
.PERF_CTR_BITS (`PERF_CTR_BITS), .PERF_CTR_BITS (`PERF_CTR_BITS),
.ARBITER ("F"),
.OUT_BUF (REQ_XBAR_BUF) .OUT_BUF (REQ_XBAR_BUF)
) req_xbar ( ) req_xbar (
.clk (clk), .clk (clk),
@ -373,6 +375,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.MSHR_SIZE (MSHR_SIZE), .MSHR_SIZE (MSHR_SIZE),
.MREQ_SIZE (MREQ_SIZE), .MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.DIRTY_BYTES (DIRTY_BYTES),
.WRITEBACK (WRITEBACK), .WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH), .UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH), .TAG_WIDTH (TAG_WIDTH),
@ -423,8 +426,8 @@ module VX_cache import VX_gpu_pkg::*; #(
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)), .mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]), .mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
.flush_valid (per_bank_flush_valid[bank_id]), .flush_begin (per_bank_flush_begin[bank_id]),
.flush_ready (per_bank_flush_ready[bank_id]) .flush_end (per_bank_flush_end[bank_id])
); );
if (NUM_BANKS == 1) begin if (NUM_BANKS == 1) begin
@ -448,7 +451,8 @@ module VX_cache import VX_gpu_pkg::*; #(
VX_stream_xbar #( VX_stream_xbar #(
.NUM_INPUTS (NUM_BANKS), .NUM_INPUTS (NUM_BANKS),
.NUM_OUTPUTS (NUM_REQS), .NUM_OUTPUTS (NUM_REQS),
.DATAW (CORE_RSP_DATAW) .DATAW (CORE_RSP_DATAW),
.ARBITER ("F")
) rsp_xbar ( ) rsp_xbar (
.clk (clk), .clk (clk),
.reset (rsp_xbar_reset), .reset (rsp_xbar_reset),
@ -494,15 +498,13 @@ module VX_cache import VX_gpu_pkg::*; #(
}; };
end end
`RESET_RELAY (mem_arb_reset, reset);
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS), .NUM_INPUTS (NUM_BANKS),
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1), .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1),
.ARBITER ("F") .ARBITER ("F")
) mem_req_arb ( ) mem_req_arb (
.clk (clk), .clk (clk),
.reset (mem_arb_reset), .reset (reset),
.valid_in (per_bank_mem_req_valid), .valid_in (per_bank_mem_req_valid),
.ready_in (per_bank_mem_req_ready), .ready_in (per_bank_mem_req_ready),
.data_in (data_in), .data_in (data_in),

View file

@ -44,6 +44,9 @@ module VX_cache_bank #(
// Enable cache writeback // Enable cache writeback
parameter WRITEBACK = 0, parameter WRITEBACK = 0,
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
// Request debug identifier // Request debug identifier
parameter UUID_WIDTH = 0, parameter UUID_WIDTH = 0,
@ -105,8 +108,8 @@ module VX_cache_bank #(
output wire mem_rsp_ready, output wire mem_rsp_ready,
// flush // flush
input wire flush_valid, input wire flush_begin,
output wire flush_ready output wire flush_end
); );
localparam PIPELINE_STAGES = 2; localparam PIPELINE_STAGES = 2;
@ -117,6 +120,7 @@ module VX_cache_bank #(
wire crsp_queue_stall; wire crsp_queue_stall;
wire mshr_alm_full; wire mshr_alm_full;
wire mreq_queue_empty;
wire mreq_queue_alm_full; wire mreq_queue_alm_full;
wire [`CS_LINE_ADDR_WIDTH-1:0] mem_rsp_addr; wire [`CS_LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
@ -132,11 +136,12 @@ module VX_cache_bank #(
wire [MSHR_ADDR_WIDTH-1:0] replay_id; wire [MSHR_ADDR_WIDTH-1:0] replay_id;
wire replay_ready; wire replay_ready;
wire is_init_st0; wire is_init_st0, is_init_st1;
wire is_flush_st0, is_flush_st1; wire is_flush_st0, is_flush_st1;
wire [NUM_WAYS-1:0] flush_way_st0; wire [NUM_WAYS-1:0] flush_way_st0;
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
wire [`CS_LINE_SEL_BITS-1:0] line_sel_st0, line_sel_st1;
wire rw_sel, rw_st0, rw_st1; wire rw_sel, rw_st0, rw_st1;
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1; wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1; wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
@ -149,7 +154,8 @@ module VX_cache_bank #(
wire is_creq_st0, is_creq_st1; wire is_creq_st0, is_creq_st1;
wire is_fill_st0, is_fill_st1; wire is_fill_st0, is_fill_st1;
wire is_replay_st0, is_replay_st1; wire is_replay_st0, is_replay_st1;
wire creq_flush_st0, creq_flush_st1; wire creq_flush_sel, creq_flush_st0, creq_flush_st1;
wire evict_dirty_st0, evict_dirty_st1;
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1; wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
wire [NUM_WAYS-1:0] tag_matches_st0; wire [NUM_WAYS-1:0] tag_matches_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
@ -157,73 +163,82 @@ module VX_cache_bank #(
wire mshr_pending_st0, mshr_pending_st1; wire mshr_pending_st0, mshr_pending_st1;
wire mshr_empty; wire mshr_empty;
wire line_flush_valid; wire flush_valid;
wire line_flush_init; wire init_valid;
wire [`CS_LINE_SEL_BITS-1:0] line_flush_sel; wire [`CS_LINE_SEL_BITS-1:0] flush_sel;
wire [NUM_WAYS-1:0] line_flush_way; wire [NUM_WAYS-1:0] flush_way;
wire line_flush_ready; wire flush_ready;
// ensure we have no pending memory request in the bank
wire no_pending_req = ~valid_st0 && ~valid_st1 && mreq_queue_empty;
// flush unit // flush unit
VX_bank_flush #( VX_bank_flush #(
.BANK_ID (BANK_ID),
.CACHE_SIZE (CACHE_SIZE), .CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE), .LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS), .NUM_WAYS (NUM_WAYS),
.WRITEBACK (WRITEBACK) .WRITEBACK (WRITEBACK)
) flush_unit ( ) flush_unit (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.flush_in_valid (flush_valid), .flush_begin (flush_begin),
.flush_in_ready (flush_ready), .flush_end (flush_end),
.flush_out_init (line_flush_init), .flush_init (init_valid),
.flush_out_valid (line_flush_valid), .flush_valid (flush_valid),
.flush_out_line (line_flush_sel), .flush_line (flush_sel),
.flush_out_way (line_flush_way), .flush_way (flush_way),
.flush_out_ready (line_flush_ready), .flush_ready (flush_ready),
.mshr_empty (mshr_empty) .mshr_empty (mshr_empty),
.bank_empty (no_pending_req)
); );
wire rdw_hazard_st0; wire rdw_hazard1_sel;
reg rdw_hazard_st1; wire rdw_hazard2_sel;
reg rdw_hazard3_st1;
wire pipe_stall = crsp_queue_stall || rdw_hazard_st1; wire pipe_stall = crsp_queue_stall || rdw_hazard3_st1;
// inputs arbitration: // inputs arbitration:
// mshr replay has highest priority to maximize utilization since there is no miss. // mshr replay has highest priority to maximize utilization since there is no miss.
// handle memory responses next to prevent deadlock with potential memory request from a miss. // handle memory responses next to prevent deadlock with potential memory request from a miss.
// flush has precedence over core requests to ensure that the cache is in a consistent state. // flush has precedence over core requests to ensure that the cache is in a consistent state.
wire replay_grant = ~line_flush_init; wire replay_grant = ~init_valid;
wire replay_enable = replay_grant && replay_valid; wire replay_enable = replay_grant && replay_valid;
wire fill_grant = ~line_flush_init && ~replay_enable; wire fill_grant = ~init_valid && ~replay_enable;
wire fill_enable = fill_grant && mem_rsp_valid; wire fill_enable = fill_grant && mem_rsp_valid;
wire flush_grant = ~line_flush_init && ~replay_enable && ~fill_enable; wire flush_grant = ~init_valid && ~replay_enable && ~fill_enable;
wire flush_enable = flush_grant && line_flush_valid; wire flush_enable = flush_grant && flush_valid;
wire creq_grant = ~line_flush_init && ~replay_enable && ~fill_enable && ~flush_enable; wire creq_grant = ~init_valid && ~replay_enable && ~fill_enable && ~flush_enable;
wire creq_enable = creq_grant && core_req_valid; wire creq_enable = creq_grant && core_req_valid;
assign replay_ready = replay_grant assign replay_ready = replay_grant
&& ~rdw_hazard_st0 && ~rdw_hazard1_sel
&& ~pipe_stall; && ~pipe_stall;
assign mem_rsp_ready = fill_grant assign mem_rsp_ready = fill_grant
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
&& ~rdw_hazard2_sel
&& ~pipe_stall; && ~pipe_stall;
assign line_flush_ready = flush_grant assign flush_ready = flush_grant
&& ~mreq_queue_alm_full && (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
&& ~pipe_stall; && ~rdw_hazard2_sel
&& ~pipe_stall;
assign core_req_ready = creq_grant assign core_req_ready = creq_grant
&& ~mreq_queue_alm_full && ~mreq_queue_alm_full
&& ~mshr_alm_full && ~mshr_alm_full
&& ~pipe_stall; && ~pipe_stall;
wire init_fire = line_flush_init; wire init_fire = init_valid;
wire replay_fire = replay_valid && replay_ready; wire replay_fire = replay_valid && replay_ready;
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready; wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
wire flush_fire = line_flush_valid && line_flush_ready; wire flush_fire = flush_valid && flush_ready;
wire core_req_fire = core_req_valid && core_req_ready; wire core_req_fire = core_req_valid && core_req_ready;
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire; assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
@ -232,8 +247,9 @@ module VX_cache_bank #(
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel; assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx; assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
assign tag_sel = replay_valid ? replay_tag : core_req_tag; assign tag_sel = replay_valid ? replay_tag : core_req_tag;
assign creq_flush_sel = core_req_valid && core_req_flush;
assign addr_sel = (line_flush_init | line_flush_valid) ? `CS_LINE_ADDR_WIDTH'(line_flush_sel) : assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
if (WRITE_ENABLE) begin if (WRITE_ENABLE) begin
@ -260,8 +276,8 @@ module VX_cache_bank #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (~pipe_stall), .enable (~pipe_stall),
.data_in ({valid_sel, line_flush_init, replay_enable, fill_enable, flush_enable, creq_enable, core_req_flush, line_flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}), .data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0}) .data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
); );
if (UUID_WIDTH != 0) begin if (UUID_WIDTH != 0) begin
@ -273,18 +289,20 @@ module VX_cache_bank #(
wire do_init_st0 = valid_st0 && is_init_st0; wire do_init_st0 = valid_st0 && is_init_st0;
wire do_flush_st0 = valid_st0 && is_flush_st0; wire do_flush_st0 = valid_st0 && is_flush_st0;
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0; wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
wire do_creq_wr_st0 = valid_st0 && is_creq_st0 && rw_st0;
wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0; wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0;
wire do_replay_wr_st0 = valid_st0 && is_replay_st0 && rw_st0;
wire do_fill_st0 = valid_st0 && is_fill_st0; wire do_fill_st0 = valid_st0 && is_fill_st0;
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_init_st0);
wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0; wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0;
wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0;
wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0;
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0]; wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
wire [NUM_WAYS-1:0] repl_way_st0; assign line_sel_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
wire [`CS_TAG_SEL_BITS-1:0] repl_tag_st0;
`RESET_RELAY (tag_reset, reset); wire [NUM_WAYS-1:0] evict_way_st0;
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0;
VX_cache_tags #( VX_cache_tags #(
.INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)), .INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)),
@ -294,42 +312,51 @@ module VX_cache_bank #(
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS), .NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH) .UUID_WIDTH (UUID_WIDTH)
) cache_tags ( ) cache_tags (
.clk (clk), .clk (clk),
.reset (tag_reset), .reset (reset),
.req_uuid (req_uuid_st0), .req_uuid (req_uuid_st0),
.stall (pipe_stall), .stall (pipe_stall),
// init/fill/lookup/flush // init/flush/fill/write/lookup
.init (do_init_st0 || do_flush_st0), .init (do_init_st0),
.flush (do_flush_st0),
.fill (do_fill_st0), .fill (do_fill_st0),
.write (do_cache_wr_st0),
.lookup (do_lookup_st0), .lookup (do_lookup_st0),
.line_addr (addr_st0), .line_addr (addr_st0),
.way_sel (flush_way_st0),
.tag_matches(tag_matches_st0), .tag_matches(tag_matches_st0),
// replacement // replacement
.repl_way (repl_way_st0), .evict_dirty(evict_dirty_st0),
.repl_tag (repl_tag_st0) .evict_way (evict_way_st0),
.evict_tag (evict_tag_st0)
); );
wire [`CS_LINE_ADDR_WIDTH-1:0] addr2_st0;
wire is_flush2_st0 = WRITEBACK && is_flush_st0;
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0; assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
assign way_sel_st0 = is_fill_st0 ? repl_way_st0 : (is_flush_st0 ? flush_way_st0 : tag_matches_st0); assign way_sel_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_r_st0 = (is_fill_st0 || is_flush_st0) ? {repl_tag_st0, addr_st0[`CS_LINE_SEL_BITS-1:0]} : addr_st0; assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0;
VX_pipe_register #( VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1), .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
.RESETW (1) .RESETW (1)
) pipe_reg1 ( ) pipe_reg1 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (~pipe_stall), .enable (~pipe_stall),
.data_in ({valid_st0, is_flush_st0, is_replay_st0, is_fill_st0, is_creq_st0, creq_flush_st0, rw_st0, addr_r_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, mshr_pending_st0}), .data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, creq_flush_st0, rw_st0, addr2_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, evict_dirty_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_flush_st1, is_replay_st1, is_fill_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, mshr_pending_st1}) .data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, evict_dirty_st1, mshr_pending_st1})
); );
// we have a tag hit // we have a tag hit
@ -343,35 +370,40 @@ module VX_cache_bank #(
wire is_read_st1 = is_creq_st1 && ~rw_st1; wire is_read_st1 = is_creq_st1 && ~rw_st1;
wire is_write_st1 = is_creq_st1 && rw_st1; wire is_write_st1 = is_creq_st1 && rw_st1;
wire do_init_st1 = valid_st1 && is_init_st1;
wire do_fill_st1 = valid_st1 && is_fill_st1;
wire do_flush_st1 = valid_st1 && is_flush_st1;
wire do_creq_rd_st1 = valid_st1 && is_read_st1; wire do_creq_rd_st1 = valid_st1 && is_read_st1;
wire do_creq_wr_st1 = valid_st1 && is_write_st1; wire do_creq_wr_st1 = valid_st1 && is_write_st1;
wire do_fill_st1 = valid_st1 && is_fill_st1;
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1; wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1; wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1; wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1;
wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1; wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1;
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1; wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1; wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
wire do_flush_st1 = valid_st1 && is_flush_st1; wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
assign line_sel_st1 = addr_st1[`CS_LINE_SEL_BITS-1:0];
`UNUSED_VAR (do_write_miss_st1) `UNUSED_VAR (do_write_miss_st1)
// ensure mshr replay always get a hit // ensure mshr replay always get a hit
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: invalid mshr replay")); `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("missed mshr replay"));
// detect BRAM's read-during-write hazard // both tag and data stores use BRAM with no read-during-write protection.
assign rdw_hazard_st0 = do_fill_st0; // stall cycle after a fill // we ned to stall the pipeline to prevent read-after-write hazards.
wire rdw_case1 = do_cache_rd_st0 && do_cache_wr_st1 && (addr_st0 == addr_st1); // standard cache access assign rdw_hazard1_sel = do_fill_st0; // stall first replay following a fill
wire rdw_case2 = WRITEBACK && (do_flush_st0 || do_fill_st0) && do_cache_wr_st1; // a writeback can evict preceeding write assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write
always @(posedge clk) begin // after a write to same address always @(posedge clk) begin
rdw_hazard_st1 <= (rdw_case1 || rdw_case2) // stall reads following writes to same line address
&& ~rdw_hazard_st1; // invalidate if pipeline stalled to avoid repeats rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_sel_st0 == line_sel_st1)
&& ~rdw_hazard3_st1; // release pipeline stall
end end
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}}; wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
@ -380,7 +412,6 @@ module VX_cache_bank #(
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1; wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
wire [LINE_SIZE-1:0] dirty_byteen_st1; wire [LINE_SIZE-1:0] dirty_byteen_st1;
wire dirty_valid_st1;
if (`CS_WORDS_PER_LINE > 1) begin if (`CS_WORDS_PER_LINE > 1) begin
reg [LINE_SIZE-1:0] write_byteen_r; reg [LINE_SIZE-1:0] write_byteen_r;
@ -393,8 +424,6 @@ module VX_cache_bank #(
assign write_byteen_st1 = byteen_st1; assign write_byteen_st1 = byteen_st1;
end end
`RESET_RELAY (data_reset, reset);
VX_cache_data #( VX_cache_data #(
.INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)), .INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
.BANK_ID (BANK_ID), .BANK_ID (BANK_ID),
@ -405,17 +434,19 @@ module VX_cache_bank #(
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK), .WRITEBACK (WRITEBACK),
.DIRTY_BYTES (DIRTY_BYTES),
.UUID_WIDTH (UUID_WIDTH) .UUID_WIDTH (UUID_WIDTH)
) cache_data ( ) cache_data (
.clk (clk), .clk (clk),
.reset (data_reset), .reset (reset),
.req_uuid (req_uuid_st1), .req_uuid (req_uuid_st1),
.stall (pipe_stall), .stall (pipe_stall),
.init (do_init_st1),
.read (do_cache_rd_st1), .read (do_cache_rd_st1),
.fill (do_fill_st1 && ~rdw_hazard_st1), .fill (do_fill_st1),
.flush (do_flush_st1), .flush (do_flush_st1),
.write (do_cache_wr_st1), .write (do_cache_wr_st1),
.way_sel (way_sel_st1), .way_sel (way_sel_st1),
@ -425,7 +456,6 @@ module VX_cache_bank #(
.write_data (write_data_st1), .write_data (write_data_st1),
.write_byteen(write_byteen_st1), .write_byteen(write_byteen_st1),
.read_data (read_data_st1), .read_data (read_data_st1),
.dirty_valid(dirty_valid_st1),
.dirty_data (dirty_data_st1), .dirty_data (dirty_data_st1),
.dirty_byteen(dirty_byteen_st1) .dirty_byteen(dirty_byteen_st1)
); );
@ -461,8 +491,6 @@ module VX_cache_bank #(
`UNUSED_PIN (size) `UNUSED_PIN (size)
); );
`RESET_RELAY (mshr_reset, reset);
VX_cache_mshr #( VX_cache_mshr #(
.INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)), .INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)),
.BANK_ID (BANK_ID), .BANK_ID (BANK_ID),
@ -473,7 +501,7 @@ module VX_cache_bank #(
.DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH) .DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH)
) cache_mshr ( ) cache_mshr (
.clk (clk), .clk (clk),
.reset (mshr_reset), .reset (reset),
.deq_req_uuid (req_uuid_sel), .deq_req_uuid (req_uuid_sel),
.lkp_req_uuid (req_uuid_st0), .lkp_req_uuid (req_uuid_st0),
@ -536,16 +564,14 @@ module VX_cache_bank #(
assign crsp_queue_data = read_data_st1; assign crsp_queue_data = read_data_st1;
assign crsp_queue_tag = tag_st1; assign crsp_queue_tag = tag_st1;
`RESET_RELAY (crsp_queue_reset, reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (TAG_WIDTH + `CS_WORD_WIDTH + REQ_SEL_WIDTH), .DATAW (TAG_WIDTH + `CS_WORD_WIDTH + REQ_SEL_WIDTH),
.SIZE (CRSQ_SIZE), .SIZE (CRSQ_SIZE),
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
) core_rsp_queue ( ) core_rsp_queue (
.clk (clk), .clk (clk),
.reset (crsp_queue_reset), .reset (reset),
.valid_in (crsp_queue_valid && ~rdw_hazard_st1), .valid_in (crsp_queue_valid && ~rdw_hazard3_st1),
.ready_in (crsp_queue_ready), .ready_in (crsp_queue_ready),
.data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}), .data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}),
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}), .data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
@ -557,7 +583,7 @@ module VX_cache_bank #(
// schedule memory request // schedule memory request
wire mreq_queue_push, mreq_queue_pop, mreq_queue_empty; wire mreq_queue_push, mreq_queue_pop;
wire [`CS_LINE_WIDTH-1:0] mreq_queue_data; wire [`CS_LINE_WIDTH-1:0] mreq_queue_data;
wire [LINE_SIZE-1:0] mreq_queue_byteen; wire [LINE_SIZE-1:0] mreq_queue_byteen;
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr; wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
@ -565,30 +591,42 @@ module VX_cache_bank #(
wire mreq_queue_rw; wire mreq_queue_rw;
wire mreq_queue_flush; wire mreq_queue_flush;
wire is_evict_st1 = (is_fill_st1 || is_flush_st1) && dirty_valid_st1; wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1;
wire do_writeback_st1 = valid_st1 && is_evict_st1; wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
`UNUSED_VAR (do_writeback_st1) wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1;
if (WRITEBACK) begin if (WRITEBACK) begin
if (DIRTY_BYTES) begin
// ensure dirty bytes match the tag info
wire has_dirty_bytes = (| dirty_byteen_st1);
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)));
end
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1) assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|| do_writeback_st1) || do_writeback_st1)
&& ~rdw_hazard_st1; && ~rdw_hazard3_st1;
end else begin end else begin
`UNUSED_VAR (dirty_valid_st1) `UNUSED_VAR (do_writeback_st1)
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1) assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|| do_creq_wr_st1) || do_creq_wr_st1)
&& ~rdw_hazard_st1; && ~rdw_hazard3_st1;
end end
assign mreq_queue_pop = mem_req_valid && mem_req_ready; assign mreq_queue_pop = mem_req_valid && mem_req_ready;
assign mreq_queue_rw = WRITE_ENABLE && (WRITEBACK ? is_evict_st1 : rw_st1);
assign mreq_queue_addr = addr_st1; assign mreq_queue_addr = addr_st1;
assign mreq_queue_id = mshr_id_st1; assign mreq_queue_id = mshr_id_st1;
assign mreq_queue_data = is_write_st1 ? write_data_st1 : dirty_data_st1;
assign mreq_queue_byteen = is_write_st1 ? write_byteen_st1 : dirty_byteen_st1;
assign mreq_queue_flush = creq_flush_st1; assign mreq_queue_flush = creq_flush_st1;
`RESET_RELAY (mreq_queue_reset, reset); if (WRITE_ENABLE) begin
assign mreq_queue_rw = WRITEBACK ? is_fill_or_flush_st1 : rw_st1;
assign mreq_queue_data = WRITEBACK ? dirty_data_st1 : write_data_st1;
assign mreq_queue_byteen = WRITEBACK ? dirty_byteen_st1 : write_byteen_st1;
end else begin
assign mreq_queue_rw = 0;
assign mreq_queue_data = 0;
assign mreq_queue_byteen = 0;
`UNUSED_VAR (dirty_data_st1)
`UNUSED_VAR (dirty_byteen_st1)
end
VX_fifo_queue #( VX_fifo_queue #(
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1), .DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1),
@ -597,7 +635,7 @@ module VX_cache_bank #(
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
) mem_req_queue ( ) mem_req_queue (
.clk (clk), .clk (clk),
.reset (mreq_queue_reset), .reset (reset),
.push (mreq_queue_push), .push (mreq_queue_push),
.pop (mreq_queue_pop), .pop (mreq_queue_pop),
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}), .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}),
@ -621,32 +659,32 @@ module VX_cache_bank #(
`ifdef DBG_TRACE_CACHE `ifdef DBG_TRACE_CACHE
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready; wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid || line_flush_valid) wire input_stall = (replay_valid || mem_rsp_valid || core_req_valid || flush_valid)
&& ~(replay_fire || mem_rsp_fire || core_req_fire || line_flush_valid); && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
always @(posedge clk) begin always @(posedge clk) begin
if (pipeline_stall) begin if (input_stall || pipe_stall) begin
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw_st0=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard_st0)); `TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1));
end end
if (mem_rsp_fire) begin if (mem_rsp_fire) begin
`TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)); `TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
end end
if (replay_fire) begin if (replay_fire) begin
`TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)); `TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
end end
if (core_req_fire) begin if (core_req_fire) begin
if (core_req_rw) if (core_req_rw)
`TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)); `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
else else
`TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)); `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
end end
if (crsp_queue_fire) begin if (crsp_queue_fire) begin
`TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)); `TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
end end
if (mreq_queue_push) begin if (mreq_queue_push) begin
if (do_creq_wr_st1 && !WRITEBACK) if (do_creq_wr_st1 && !WRITEBACK)
`TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)); `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
else if (do_writeback_st1) else if (do_writeback_st1)
`TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%b, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)); `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data));
else else
`TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)); `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1));
end end

View file

@ -49,6 +49,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
// Enable cache writeback // Enable cache writeback
parameter WRITEBACK = 0, parameter WRITEBACK = 0,
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
// Request debug identifier // Request debug identifier
parameter UUID_WIDTH = 0, parameter UUID_WIDTH = 0,
@ -99,6 +102,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
.TAG_WIDTH (ARB_TAG_WIDTH) .TAG_WIDTH (ARB_TAG_WIDTH)
) arb_core_bus_if[NUM_CACHES * NUM_REQS](); ) arb_core_bus_if[NUM_CACHES * NUM_REQS]();
`RESET_RELAY_EX (cache_arb_reset, reset, NUM_REQS, `MAX_FANOUT);
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_mem_bus_if #( VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE), .DATA_SIZE (WORD_SIZE),
@ -114,8 +119,6 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]); `ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
end end
`RESET_RELAY (arb_reset, reset);
VX_mem_arb #( VX_mem_arb #(
.NUM_INPUTS (NUM_INPUTS), .NUM_INPUTS (NUM_INPUTS),
.NUM_OUTPUTS (NUM_CACHES), .NUM_OUTPUTS (NUM_CACHES),
@ -127,7 +130,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
.RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0) .RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0)
) cache_arb ( ) cache_arb (
.clk (clk), .clk (clk),
.reset (arb_reset), .reset (cache_arb_reset[i]),
.bus_in_if (core_bus_tmp_if), .bus_in_if (core_bus_tmp_if),
.bus_out_if (arb_core_bus_tmp_if) .bus_out_if (arb_core_bus_tmp_if)
); );
@ -155,6 +158,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
.MREQ_SIZE (MREQ_SIZE), .MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK), .WRITEBACK (WRITEBACK),
.DIRTY_BYTES (DIRTY_BYTES),
.UUID_WIDTH (UUID_WIDTH), .UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (ARB_TAG_WIDTH), .TAG_WIDTH (ARB_TAG_WIDTH),
.TAG_SEL_IDX (TAG_SEL_IDX), .TAG_SEL_IDX (TAG_SEL_IDX),

View file

@ -30,6 +30,8 @@ module VX_cache_data #(
parameter WRITE_ENABLE = 1, parameter WRITE_ENABLE = 1,
// Enable cache writeback // Enable cache writeback
parameter WRITEBACK = 0, parameter WRITEBACK = 0,
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
// Request debug identifier // Request debug identifier
parameter UUID_WIDTH = 0 parameter UUID_WIDTH = 0
) ( ) (
@ -42,6 +44,7 @@ module VX_cache_data #(
input wire stall, input wire stall,
input wire init,
input wire read, input wire read,
input wire fill, input wire fill,
input wire flush, input wire flush,
@ -53,89 +56,88 @@ module VX_cache_data #(
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen, input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
input wire [NUM_WAYS-1:0] way_sel, input wire [NUM_WAYS-1:0] way_sel,
output wire [`CS_WORD_WIDTH-1:0] read_data, output wire [`CS_WORD_WIDTH-1:0] read_data,
output wire dirty_valid,
output wire [`CS_LINE_WIDTH-1:0] dirty_data, output wire [`CS_LINE_WIDTH-1:0] dirty_data,
output wire [LINE_SIZE-1:0] dirty_byteen output wire [LINE_SIZE-1:0] dirty_byteen
); );
`UNUSED_SPARAM (INSTANCE_ID) `UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (BANK_ID) `UNUSED_PARAM (BANK_ID)
`UNUSED_PARAM (WORD_SIZE) `UNUSED_PARAM (WORD_SIZE)
`UNUSED_VAR (reset)
`UNUSED_VAR (stall) `UNUSED_VAR (stall)
`UNUSED_VAR (line_addr) `UNUSED_VAR (line_addr)
`UNUSED_VAR (init)
`UNUSED_VAR (read) `UNUSED_VAR (read)
`UNUSED_VAR (flush) `UNUSED_VAR (flush)
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1; localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0]; wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx; wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
if (WRITEBACK) begin if (WRITEBACK) begin
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0][LINE_SIZE-1:0] dirty_bytes_r; if (DIRTY_BYTES) begin
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0] dirty_blocks_r; wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata;
wire [`CLOG2(`CS_LINES_PER_BANK * NUM_WAYS)-1:0] way_addr; for (genvar i = 0; i < NUM_WAYS; ++i) begin
if (NUM_WAYS > 1) begin wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]);
assign way_addr = {line_sel, way_idx}; assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]);
end
VX_sp_ram #(
.DATAW (LINE_SIZE * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK)
) byteen_store (
.clk (clk),
.reset (reset),
.read (write || fill || flush),
.write (init || write || fill || flush),
.wren (1'b1),
.addr (line_sel),
.wdata (bs_wdata),
.rdata (bs_rdata)
);
assign dirty_byteen = bs_rdata[way_idx];
end else begin end else begin
assign way_addr = line_sel; assign dirty_byteen = {LINE_SIZE{1'b1}};
end end
always @(posedge clk) begin wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata;
if (fill) begin for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
dirty_bytes_r[way_addr] <= '0; for (genvar j = 0; j < NUM_WAYS; ++j) begin
end else if (write) begin assign flipped_rdata[j][i] = line_rdata[i][j];
dirty_bytes_r[way_addr] <= dirty_bytes_r[way_addr] | write_byteen;
end end
end end
assign dirty_data = flipped_rdata[way_idx];
always @(posedge clk) begin
if (reset) begin
for (integer i = 0; i < `CS_LINES_PER_BANK * NUM_WAYS; ++i) begin
dirty_blocks_r[i] <= 0;
end
end else begin
if (fill) begin
dirty_blocks_r[way_addr] <= 0;
end else if (write) begin
dirty_blocks_r[way_addr] <= 1;
end
end
end
assign dirty_byteen = dirty_bytes_r[way_addr];
assign dirty_valid = dirty_blocks_r[way_addr];
end else begin end else begin
assign dirty_byteen = '0; assign dirty_byteen = '0;
assign dirty_valid = 0; assign dirty_data = '0;
end end
// order the data layout to perform ways multiplexing last. // order the data layout to perform ways multiplexing last.
// this allows converting way index to binary in parallel with BRAM read. // this allows converting way index to binary in parallel with BRAM readaccess and way selection.
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata; wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
wire [BYTEENW-1:0] wren; wire [BYTEENW-1:0] line_wren;
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
assign wdata[i] = (fill || !WRITE_ENABLE) ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{write_data[i]}};
end
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w; wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
for (genvar j = 0; j < NUM_WAYS; ++j) begin for (genvar j = 0; j < NUM_WAYS; ++j) begin
assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i];
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i]) assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}}; & {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
end end
end end
assign wren = wren_w; assign line_wren = wren_w;
end else begin end else begin
`UNUSED_VAR (write) `UNUSED_VAR (write)
`UNUSED_VAR (write_byteen) `UNUSED_VAR (write_byteen)
`UNUSED_VAR (write_data) `UNUSED_VAR (write_data)
assign wdata = fill_data; assign line_wdata = fill_data;
assign wren = fill; assign line_wren = fill;
end end
VX_onehot_encoder #( VX_onehot_encoder #(
@ -146,53 +148,50 @@ module VX_cache_data #(
`UNUSED_PIN (valid_out) `UNUSED_PIN (valid_out)
); );
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata; wire line_read = (read && ~stall)
|| (WRITEBACK && (fill || flush));
wire line_write = write || fill;
VX_sp_ram #( VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH * NUM_WAYS), .DATAW (`CS_LINE_WIDTH * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK), .SIZE (`CS_LINES_PER_BANK),
.WRENW (BYTEENW), .WRENW (BYTEENW),
.NO_RWCHECK (1) .NO_RWCHECK (1),
.RW_ASSERT (1)
) data_store ( ) data_store (
.clk (clk), .clk (clk),
.read (1'b1), .reset (reset),
.write (write || fill), .read (line_read),
.wren (wren), .write (line_write),
.wren (line_wren),
.addr (line_sel), .addr (line_sel),
.wdata (wdata), .wdata (line_wdata),
.rdata (rdata) .rdata (line_rdata)
); );
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata; wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
if (`CS_WORDS_PER_LINE > 1) begin if (`CS_WORDS_PER_LINE > 1) begin
assign per_way_rdata = rdata[wsel]; assign per_way_rdata = line_rdata[wsel];
end else begin end else begin
`UNUSED_VAR (wsel) `UNUSED_VAR (wsel)
assign per_way_rdata = rdata; assign per_way_rdata = line_rdata;
end end
assign read_data = per_way_rdata[way_idx]; assign read_data = per_way_rdata[way_idx];
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] dirty_data_w;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
for (genvar j = 0; j < NUM_WAYS; ++j) begin
assign dirty_data_w[j][i] = rdata[i][j];
end
end
assign dirty_data = dirty_data_w[way_idx];
`ifdef DBG_TRACE_CACHE `ifdef DBG_TRACE_CACHE
always @(posedge clk) begin always @(posedge clk) begin
if (fill && ~stall) begin if (fill && ~stall) begin
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)); `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
end end
if (flush && ~stall) begin if (flush && ~stall) begin
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b, byteen=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_valid, dirty_byteen)); `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data));
end end
if (read && ~stall) begin if (read && ~stall) begin
`TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)); `TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid));
end end
if (write && ~stall) begin if (write && ~stall) begin
`TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)); `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid));
end end
end end
`endif `endif

View file

@ -26,13 +26,16 @@ module VX_cache_flush #(
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS], VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
VX_mem_bus_if.master core_bus_out_if [NUM_REQS], VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
input wire [NUM_BANKS-1:0] bank_req_fire, input wire [NUM_BANKS-1:0] bank_req_fire,
output wire [NUM_BANKS-1:0] flush_valid, output wire [NUM_BANKS-1:0] flush_begin,
input wire [NUM_BANKS-1:0] flush_ready input wire [NUM_BANKS-1:0] flush_end
); );
localparam STATE_IDLE = 0; localparam STATE_IDLE = 0;
localparam STATE_WAIT = 1; localparam STATE_WAIT1 = 1;
localparam STATE_FLUSH = 2; localparam STATE_FLUSH = 2;
localparam STATE_DONE = 3; localparam STATE_WAIT2 = 3;
localparam STATE_DONE = 4;
reg [2:0] state, state_n;
// track in-flight core requests // track in-flight core requests
@ -76,8 +79,6 @@ module VX_cache_flush #(
`UNUSED_VAR (bank_req_fire) `UNUSED_VAR (bank_req_fire)
end end
reg [1:0] state, state_n;
reg [NUM_BANKS-1:0] flush_done, flush_done_n; reg [NUM_BANKS-1:0] flush_done, flush_done_n;
wire [NUM_REQS-1:0] flush_req_mask; wire [NUM_REQS-1:0] flush_req_mask;
@ -113,22 +114,32 @@ module VX_cache_flush #(
case (state) case (state)
STATE_IDLE: begin STATE_IDLE: begin
if (flush_req_enable) begin if (flush_req_enable) begin
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT : STATE_FLUSH; state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH;
end end
end end
STATE_WAIT: begin STATE_WAIT1: begin
if (no_inflight_reqs) begin if (no_inflight_reqs) begin
state_n = STATE_FLUSH; state_n = STATE_FLUSH;
end end
end end
STATE_FLUSH: begin STATE_FLUSH: begin
flush_done_n = flush_done | flush_ready; // generate a flush request pulse
if (flush_done_n == 0) begin state_n = STATE_WAIT2;
end
STATE_WAIT2: begin
// wait for all banks to finish flushing
flush_done_n = flush_done | flush_end;
if (flush_done_n == {NUM_BANKS{1'b1}}) begin
state_n = STATE_DONE; state_n = STATE_DONE;
flush_done_n = '0;
// only release current flush requests
// and keep normal requests locked
lock_released_n = flush_req_mask; lock_released_n = flush_req_mask;
end end
end end
STATE_DONE: begin STATE_DONE: begin
// wait until released flush requests are issued
// when returning to IDLE state other requests will unlock
lock_released_n = lock_released & ~core_bus_out_ready; lock_released_n = lock_released & ~core_bus_out_ready;
if (lock_released_n == 0) begin if (lock_released_n == 0) begin
state_n = STATE_IDLE; state_n = STATE_IDLE;
@ -149,6 +160,6 @@ module VX_cache_flush #(
end end
end end
assign flush_valid = {NUM_BANKS{state == STATE_FLUSH}}; assign flush_begin = {NUM_BANKS{state == STATE_FLUSH}};
endmodule endmodule

View file

@ -1,52 +0,0 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
// cache flush unit
module VX_cache_init #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1
) (
input wire clk,
input wire reset,
output wire [`CS_LINE_SEL_BITS-1:0] addr_out,
output wire valid_out
);
reg enabled;
reg [`CS_LINE_SEL_BITS-1:0] line_ctr;
always @(posedge clk) begin
if (reset) begin
enabled <= 1;
line_ctr <= '0;
end else begin
if (enabled) begin
if (line_ctr == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
enabled <= 0;
end
line_ctr <= line_ctr + `CS_LINE_SEL_BITS'(1);
end
end
end
assign addr_out = line_ctr;
assign valid_out = enabled;
endmodule

View file

@ -232,9 +232,10 @@ module VX_cache_mshr #(
.LUTRAM (1) .LUTRAM (1)
) entries ( ) entries (
.clk (clk), .clk (clk),
.reset (reset),
.read (1'b1), .read (1'b1),
.write (allocate_valid), .write (allocate_valid),
`UNUSED_PIN (wren), .wren (1'b1),
.waddr (allocate_id_r), .waddr (allocate_id_r),
.wdata (allocate_data), .wdata (allocate_data),
.raddr (dequeue_id_r), .raddr (dequeue_id_r),

View file

@ -26,6 +26,8 @@ module VX_cache_tags #(
parameter NUM_WAYS = 1, parameter NUM_WAYS = 1,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE = 1, parameter WORD_SIZE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Request debug identifier // Request debug identifier
parameter UUID_WIDTH = 0 parameter UUID_WIDTH = 0
) ( ) (
@ -40,74 +42,100 @@ module VX_cache_tags #(
// init/fill/lookup // init/fill/lookup
input wire init, input wire init,
input wire flush,
input wire fill, input wire fill,
input wire write,
input wire lookup, input wire lookup,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire [NUM_WAYS-1:0] way_sel,
output wire [NUM_WAYS-1:0] tag_matches, output wire [NUM_WAYS-1:0] tag_matches,
// replacement // eviction
output wire [NUM_WAYS-1:0] repl_way, output wire evict_dirty,
output wire [`CS_TAG_SEL_BITS-1:0] repl_tag output wire [NUM_WAYS-1:0] evict_way,
output wire [`CS_TAG_SEL_BITS-1:0] evict_tag
); );
`UNUSED_SPARAM (INSTANCE_ID) `UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (BANK_ID) `UNUSED_PARAM (BANK_ID)
`UNUSED_VAR (reset)
`UNUSED_VAR (lookup) `UNUSED_VAR (lookup)
// valid, tag // valid, dirty, tag
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS; localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS;
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0]; wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr); wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag; wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
wire [NUM_WAYS-1:0] read_valid; wire [NUM_WAYS-1:0] read_valid;
wire [NUM_WAYS-1:0] read_dirty;
if (NUM_WAYS > 1) begin if (NUM_WAYS > 1) begin
reg [NUM_WAYS-1:0] repl_way_r; reg [NUM_WAYS-1:0] evict_way_r;
// cyclic assignment of replacement way // cyclic assignment of replacement way
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
repl_way_r <= 1; evict_way_r <= 1;
end else if (~stall) begin // hold the value on stalls prevent filling different slots twice end else if (~stall) begin // holding the value on stalls prevents filling different slots twice
repl_way_r <= {repl_way_r[NUM_WAYS-2:0], repl_way_r[NUM_WAYS-1]}; evict_way_r <= {evict_way_r[NUM_WAYS-2:0], evict_way_r[NUM_WAYS-1]};
end end
end end
assign repl_way = repl_way_r; assign evict_way = fill ? evict_way_r : way_sel;
VX_onehot_mux #( VX_onehot_mux #(
.DATAW (`CS_TAG_SEL_BITS), .DATAW (`CS_TAG_SEL_BITS),
.N (NUM_WAYS) .N (NUM_WAYS)
) repl_tag_sel ( ) evict_tag_sel (
.data_in (read_tag), .data_in (read_tag),
.sel_in (repl_way_r), .sel_in (evict_way),
.data_out (repl_tag) .data_out (evict_tag)
); );
end else begin end else begin
`UNUSED_VAR (stall) `UNUSED_VAR (stall)
assign repl_way = 1'b1; assign evict_way = 1'b1;
assign repl_tag = read_tag; assign evict_tag = read_tag;
end end
// fill and flush need to also read in writeback mode
wire fill_s = fill && (!WRITEBACK || ~stall);
wire flush_s = flush && (!WRITEBACK || ~stall);
for (genvar i = 0; i < NUM_WAYS; ++i) begin for (genvar i = 0; i < NUM_WAYS; ++i) begin
wire do_fill = fill && repl_way[i]; wire do_fill = fill_s && evict_way[i];
wire do_write = init || do_fill; wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode
wire line_valid = ~init; wire do_write = WRITEBACK && write && tag_matches[i];
wire line_read = (WRITEBACK && (fill_s || flush_s));
wire line_write = init || do_fill || do_flush || do_write;
wire line_valid = ~(init || flush);
wire [TAG_WIDTH-1:0] line_wdata;
wire [TAG_WIDTH-1:0] line_rdata;
if (WRITEBACK) begin
assign line_wdata = {line_valid, write, line_tag};
assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata;
end else begin
assign line_wdata = {line_valid, line_tag};
assign {read_valid[i], read_tag[i]} = line_rdata;
assign read_dirty[i] = 1'b0;
end
VX_sp_ram #( VX_sp_ram #(
.DATAW (TAG_WIDTH), .DATAW (TAG_WIDTH),
.SIZE (`CS_LINES_PER_BANK), .SIZE (`CS_LINES_PER_BANK),
.NO_RWCHECK (1) .NO_RWCHECK (1),
.RW_ASSERT (1)
) tag_store ( ) tag_store (
.clk (clk), .clk (clk),
.read (1'b1), .reset (reset),
.write (do_write), .read (line_read),
`UNUSED_PIN (wren), .write (line_write),
.wren (1'b1),
.addr (line_sel), .addr (line_sel),
.wdata ({line_valid, line_tag}), .wdata (line_wdata),
.rdata ({read_valid[i], read_tag[i]}) .rdata (line_rdata)
); );
end end
@ -115,19 +143,31 @@ module VX_cache_tags #(
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]); assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
end end
assign evict_dirty = | (read_dirty & evict_way);
`ifdef DBG_TRACE_CACHE `ifdef DBG_TRACE_CACHE
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel};
always @(posedge clk) begin always @(posedge clk) begin
if (fill && ~stall) begin if (fill && ~stall) begin
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), repl_way, line_sel, line_tag)); `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)));
end end
if (init) begin if (init) begin
`TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)); `TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
end end
if (flush && ~stall) begin
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty));
end
if (lookup && ~stall) begin if (lookup && ~stall) begin
if (tag_matches != 0) begin if (tag_matches != 0) begin
`TRACE(3, ("%d: %s hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)); if (write)
`TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
else
`TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
end else begin end else begin
`TRACE(3, ("%d: %s miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)); if (write)
`TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
else
`TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
end end
end end
end end

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -20,20 +20,20 @@ module VX_cache_top import VX_gpu_pkg::*; #(
parameter NUM_REQS = 4, parameter NUM_REQS = 4,
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE = 16384, parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter LINE_SIZE = 64, parameter LINE_SIZE = 64,
// Number of banks // Number of banks
parameter NUM_BANKS = 4, parameter NUM_BANKS = 4,
// Number of associative ways // Number of associative ways
parameter NUM_WAYS = 4, parameter NUM_WAYS = 4,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE = 4, parameter WORD_SIZE = 4,
// Core Response Queue Size // Core Response Queue Size
parameter CRSQ_SIZE = 2, parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob // Miss Reserv Queue Knob
parameter MSHR_SIZE = 16, parameter MSHR_SIZE = 16,
// Memory Response Queue Size // Memory Response Queue Size
parameter MRSQ_SIZE = 0, parameter MRSQ_SIZE = 0,
// Memory Request Queue Size // Memory Request Queue Size
@ -42,6 +42,12 @@ module VX_cache_top import VX_gpu_pkg::*; #(
// Enable cache writeable // Enable cache writeable
parameter WRITE_ENABLE = 1, parameter WRITE_ENABLE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
// Request debug identifier // Request debug identifier
parameter UUID_WIDTH = 0, parameter UUID_WIDTH = 0,
@ -55,7 +61,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
parameter MEM_OUT_BUF = 2, parameter MEM_OUT_BUF = 2,
parameter MEM_TAG_WIDTH = `CLOG2(MSHR_SIZE) + `CLOG2(NUM_BANKS) parameter MEM_TAG_WIDTH = `CLOG2(MSHR_SIZE) + `CLOG2(NUM_BANKS)
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -82,17 +88,17 @@ module VX_cache_top import VX_gpu_pkg::*; #(
// Memory request // Memory request
output wire mem_req_valid, output wire mem_req_valid,
output wire mem_req_rw, output wire mem_req_rw,
output wire [LINE_SIZE-1:0] mem_req_byteen, output wire [LINE_SIZE-1:0] mem_req_byteen,
output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr, output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`CS_LINE_WIDTH-1:0] mem_req_data, output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag, output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready, input wire mem_req_ready,
// Memory response // Memory response
input wire mem_rsp_valid, input wire mem_rsp_valid,
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data, input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag, input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready output wire mem_rsp_ready
); );
VX_mem_bus_if #( VX_mem_bus_if #(
@ -127,18 +133,18 @@ module VX_cache_top import VX_gpu_pkg::*; #(
// Memory request // Memory request
assign mem_req_valid = mem_bus_if.req_valid; assign mem_req_valid = mem_bus_if.req_valid;
assign mem_req_rw = mem_bus_if.req_data.rw; assign mem_req_rw = mem_bus_if.req_data.rw;
assign mem_req_byteen = mem_bus_if.req_data.byteen; assign mem_req_byteen = mem_bus_if.req_data.byteen;
assign mem_req_addr = mem_bus_if.req_data.addr; assign mem_req_addr = mem_bus_if.req_data.addr;
assign mem_req_data = mem_bus_if.req_data.data; assign mem_req_data = mem_bus_if.req_data.data;
assign mem_req_tag = mem_bus_if.req_data.tag; assign mem_req_tag = mem_bus_if.req_data.tag;
assign mem_bus_if.req_ready = mem_req_ready; assign mem_bus_if.req_ready = mem_req_ready;
`UNUSED_VAR (mem_bus_if.req_data.atype) `UNUSED_VAR (mem_bus_if.req_data.atype)
// Memory response // Memory response
assign mem_bus_if.rsp_valid = mem_rsp_valid; assign mem_bus_if.rsp_valid = mem_rsp_valid;
assign mem_bus_if.rsp_data.data = mem_rsp_data; assign mem_bus_if.rsp_data.data = mem_rsp_data;
assign mem_bus_if.rsp_data.tag = mem_rsp_tag; assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
assign mem_rsp_ready = mem_bus_if.rsp_ready; assign mem_rsp_ready = mem_bus_if.rsp_ready;
VX_cache #( VX_cache #(
@ -156,6 +162,8 @@ module VX_cache_top import VX_gpu_pkg::*; #(
.TAG_WIDTH (TAG_WIDTH), .TAG_WIDTH (TAG_WIDTH),
.UUID_WIDTH (UUID_WIDTH), .UUID_WIDTH (UUID_WIDTH),
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.DIRTY_BYTES (DIRTY_BYTES),
.CORE_OUT_BUF (CORE_OUT_BUF), .CORE_OUT_BUF (CORE_OUT_BUF),
.MEM_OUT_BUF (MEM_OUT_BUF) .MEM_OUT_BUF (MEM_OUT_BUF)
) cache ( ) cache (

View file

@ -48,6 +48,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
// Enable cache writeback // Enable cache writeback
parameter WRITEBACK = 0, parameter WRITEBACK = 0,
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
// Request debug identifier // Request debug identifier
parameter UUID_WIDTH = 0, parameter UUID_WIDTH = 0,
@ -187,6 +190,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
.MREQ_SIZE (MREQ_SIZE), .MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK), .WRITEBACK (WRITEBACK),
.DIRTY_BYTES (DIRTY_BYTES),
.UUID_WIDTH (UUID_WIDTH), .UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH), .TAG_WIDTH (TAG_WIDTH),
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF), .CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
@ -223,12 +227,12 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
always @(posedge clk) begin always @(posedge clk) begin
if (core_req_fire) begin if (core_req_fire) begin
if (core_bus_if[i].req_data.rw) if (core_bus_if[i].req_data.rw)
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)); `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
else else
`TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)); `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid));
end end
if (core_rsp_fire) begin if (core_rsp_fire) begin
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)); `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
end end
end end
end end
@ -250,14 +254,14 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
always @(posedge clk) begin always @(posedge clk) begin
if (mem_req_fire) begin if (mem_req_fire) begin
if (mem_bus_if.req_data.rw) if (mem_bus_if.req_data.rw)
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)); $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
else else
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)); $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
end end
if (mem_rsp_fire) begin if (mem_rsp_fire) begin
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n", `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid)); $time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
end end
end end

View file

@ -83,7 +83,7 @@ module VX_alu_muldiv #(
.DEPTH (`LATENCY_IMUL), .DEPTH (`LATENCY_IMUL),
.RESETW (1) .RESETW (1)
) mul_shift_reg ( ) mul_shift_reg (
.clk(clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (mul_ready_in), .enable (mul_ready_in),
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, mul_result_tmp}), .data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, mul_result_tmp}),
@ -324,6 +324,7 @@ module VX_alu_muldiv #(
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (2), .NUM_INPUTS (2),
.DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)), .DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)),
.ARBITER ("F"),
.OUT_BUF (1) .OUT_BUF (1)
) rsp_buf ( ) rsp_buf (
.clk (clk), .clk (clk),

View file

@ -57,7 +57,7 @@ module VX_alu_unit #(
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
`RESET_RELAY (block_reset, reset); `RESET_RELAY_EN (block_reset, reset,(BLOCK_SIZE > 1));
wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV); wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV);
@ -72,15 +72,13 @@ module VX_alu_unit #(
assign int_execute_if.valid = per_block_execute_if[block_idx].valid && ~is_muldiv_op; assign int_execute_if.valid = per_block_execute_if[block_idx].valid && ~is_muldiv_op;
assign int_execute_if.data = per_block_execute_if[block_idx].data; assign int_execute_if.data = per_block_execute_if[block_idx].data;
`RESET_RELAY (int_reset, block_reset);
VX_alu_int #( VX_alu_int #(
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)), .INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
.BLOCK_IDX (block_idx), .BLOCK_IDX (block_idx),
.NUM_LANES (NUM_LANES) .NUM_LANES (NUM_LANES)
) alu_int ( ) alu_int (
.clk (clk), .clk (clk),
.reset (int_reset), .reset (block_reset),
.execute_if (int_execute_if), .execute_if (int_execute_if),
.branch_ctl_if (branch_ctl_if[block_idx]), .branch_ctl_if (branch_ctl_if[block_idx]),
.commit_if (int_commit_if) .commit_if (int_commit_if)
@ -99,14 +97,12 @@ module VX_alu_unit #(
assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op; assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
assign muldiv_execute_if.data = per_block_execute_if[block_idx].data; assign muldiv_execute_if.data = per_block_execute_if[block_idx].data;
`RESET_RELAY (muldiv_reset, block_reset);
VX_alu_muldiv #( VX_alu_muldiv #(
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)), .INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
.NUM_LANES (NUM_LANES) .NUM_LANES (NUM_LANES)
) muldiv_unit ( ) muldiv_unit (
.clk (clk), .clk (clk),
.reset (muldiv_reset), .reset (block_reset),
.execute_if (muldiv_execute_if), .execute_if (muldiv_execute_if),
.commit_if (muldiv_commit_if) .commit_if (muldiv_commit_if)
); );
@ -121,15 +117,14 @@ module VX_alu_unit #(
// send response // send response
`RESET_RELAY (arb_reset, block_reset);
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (RSP_ARB_SIZE), .NUM_INPUTS (RSP_ARB_SIZE),
.DATAW (RSP_ARB_DATAW), .DATAW (RSP_ARB_DATAW),
.OUT_BUF (PARTIAL_BW ? 1 : 3) .OUT_BUF (PARTIAL_BW ? 1 : 3),
.ARBITER ("F")
) rsp_arb ( ) rsp_arb (
.clk (clk), .clk (clk),
.reset (arb_reset), .reset (block_reset),
.valid_in ({ .valid_in ({
`ifdef EXT_M_ENABLE `ifdef EXT_M_ENABLE
muldiv_commit_if.valid, muldiv_commit_if.valid,

View file

@ -313,6 +313,7 @@ module VX_core import VX_gpu_pkg::*; #(
.DATA_SIZE (DCACHE_WORD_SIZE), .DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH), .TAG_WIDTH (DCACHE_TAG_WIDTH),
.TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH), .TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH),
.ARBITER ("P"),
.REQ_OUT_BUF (0), .REQ_OUT_BUF (0),
.RSP_OUT_BUF (0) .RSP_OUT_BUF (0)
) lsu_adapter ( ) lsu_adapter (

View file

@ -52,7 +52,7 @@ module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; (
if (dcr_bus_if.write_valid) begin if (dcr_bus_if.write_valid) begin
`TRACE(1, ("%d: base-dcr: state=", $time)); `TRACE(1, ("%d: base-dcr: state=", $time));
trace_base_dcr(1, dcr_bus_if.write_addr); trace_base_dcr(1, dcr_bus_if.write_addr);
`TRACE(1, (", data=0x%0h\n", dcr_bus_if.write_data)); `TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data));
end end
end end
`endif `endif

View file

@ -40,7 +40,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH); localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH);
localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN); localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN);
localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1; localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
localparam FANOUT_ENABLE= (`NUM_THREADS > MAX_FANOUT); localparam FANOUT_ENABLE= (`NUM_THREADS > (MAX_FANOUT + MAX_FANOUT /2));
localparam DATA_TMASK_OFF = IN_DATAW - (`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS); localparam DATA_TMASK_OFF = IN_DATAW - (`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS);
localparam DATA_REGS_OFF = 0; localparam DATA_REGS_OFF = 0;
@ -85,6 +85,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx); wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx);
assign issue_indices[block_idx] = issue_idx; assign issue_indices[block_idx] = issue_idx;
`RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1));
wire valid_p, ready_p; wire valid_p, ready_p;
if (`NUM_THREADS != NUM_LANES) begin if (`NUM_THREADS != NUM_LANES) begin
@ -100,7 +102,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
wire fire_eop = fire_p && is_last_p; wire fire_eop = fire_p && is_last_p;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (block_reset) begin
sent_mask_p <= '0; sent_mask_p <= '0;
is_first_p <= 1; is_first_p <= 1;
end else begin end else begin
@ -215,8 +217,6 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
assign isw = block_idx; assign isw = block_idx;
end end
`RESET_RELAY(buf_out_reset, reset);
wire [`NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_data[issue_idx][DATA_TMASK_OFF+`NUM_THREADS +: ISSUE_WIS_W], isw); wire [`NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_data[issue_idx][DATA_TMASK_OFF+`NUM_THREADS +: ISSUE_WIS_W], isw);
VX_elastic_buffer #( VX_elastic_buffer #(
@ -225,7 +225,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
) buf_out ( ) buf_out (
.clk (clk), .clk (clk),
.reset (buf_out_reset), .reset (block_reset),
.valid_in (valid_p), .valid_in (valid_p),
.ready_in (ready_p), .ready_in (ready_p),
.data_in ({ .data_in ({

View file

@ -56,9 +56,10 @@ module VX_fetch import VX_gpu_pkg::*; #(
.LUTRAM (1) .LUTRAM (1)
) tag_store ( ) tag_store (
.clk (clk), .clk (clk),
.reset (reset),
.read (1'b1), .read (1'b1),
.write (icache_req_fire), .write (icache_req_fire),
`UNUSED_PIN (wren), .wren (1'b1),
.waddr (req_tag), .waddr (req_tag),
.wdata ({schedule_if.data.PC, schedule_if.data.tmask}), .wdata ({schedule_if.data.PC, schedule_if.data.tmask}),
.raddr (rsp_tag), .raddr (rsp_tag),

View file

@ -57,7 +57,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
`UNUSED_VAR (per_block_execute_if[block_idx].data.tid) `UNUSED_VAR (per_block_execute_if[block_idx].data.tid)
`UNUSED_VAR (per_block_execute_if[block_idx].data.wb) `UNUSED_VAR (per_block_execute_if[block_idx].data.wb)
`RESET_RELAY (block_reset, reset); `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1));
// Store request info // Store request info
wire fpu_req_valid, fpu_req_ready; wire fpu_req_valid, fpu_req_ready;
@ -84,14 +84,12 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
wire execute_fire = per_block_execute_if[block_idx].valid && per_block_execute_if[block_idx].ready; wire execute_fire = per_block_execute_if[block_idx].valid && per_block_execute_if[block_idx].ready;
wire fpu_rsp_fire = fpu_rsp_valid && fpu_rsp_ready; wire fpu_rsp_fire = fpu_rsp_valid && fpu_rsp_ready;
`RESET_RELAY (ibuf_reset, block_reset);
VX_index_buffer #( VX_index_buffer #(
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + PID_WIDTH + 1 + 1), .DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + PID_WIDTH + 1 + 1),
.SIZE (`FPUQ_SIZE) .SIZE (`FPUQ_SIZE)
) tag_store ( ) tag_store (
.clk (clk), .clk (clk),
.reset (ibuf_reset), .reset (block_reset),
.acquire_en (execute_fire), .acquire_en (execute_fire),
.write_addr (fpu_req_tag), .write_addr (fpu_req_tag),
.write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}), .write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}),
@ -113,8 +111,6 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
assign fpu_req_valid = per_block_execute_if[block_idx].valid && ~mdata_full; assign fpu_req_valid = per_block_execute_if[block_idx].valid && ~mdata_full;
assign per_block_execute_if[block_idx].ready = fpu_req_ready && ~mdata_full; assign per_block_execute_if[block_idx].ready = fpu_req_ready && ~mdata_full;
`RESET_RELAY (fpu_reset, block_reset);
`ifdef FPU_DPI `ifdef FPU_DPI
VX_fpu_dpi #( VX_fpu_dpi #(
@ -123,7 +119,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
.OUT_BUF (PARTIAL_BW ? 1 : 3) .OUT_BUF (PARTIAL_BW ? 1 : 3)
) fpu_dpi ( ) fpu_dpi (
.clk (clk), .clk (clk),
.reset (fpu_reset), .reset (block_reset),
.valid_in (fpu_req_valid), .valid_in (fpu_req_valid),
.mask_in (per_block_execute_if[block_idx].data.tmask), .mask_in (per_block_execute_if[block_idx].data.tmask),
@ -152,7 +148,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
.OUT_BUF (PARTIAL_BW ? 1 : 3) .OUT_BUF (PARTIAL_BW ? 1 : 3)
) fpu_fpnew ( ) fpu_fpnew (
.clk (clk), .clk (clk),
.reset (fpu_reset), .reset (block_reset),
.valid_in (fpu_req_valid), .valid_in (fpu_req_valid),
.mask_in (per_block_execute_if[block_idx].data.tmask), .mask_in (per_block_execute_if[block_idx].data.tmask),
@ -181,7 +177,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
.OUT_BUF (PARTIAL_BW ? 1 : 3) .OUT_BUF (PARTIAL_BW ? 1 : 3)
) fpu_dsp ( ) fpu_dsp (
.clk (clk), .clk (clk),
.reset (fpu_reset), .reset (block_reset),
.valid_in (fpu_req_valid), .valid_in (fpu_req_valid),
.mask_in (per_block_execute_if[block_idx].data.tmask), .mask_in (per_block_execute_if[block_idx].data.tmask),
@ -228,14 +224,12 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
// send response // send response
`RESET_RELAY (rsp_reset, block_reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1), .DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
.SIZE (0) .SIZE (0)
) rsp_buf ( ) rsp_buf (
.clk (clk), .clk (clk),
.reset (rsp_reset), .reset (block_reset),
.valid_in (fpu_rsp_valid), .valid_in (fpu_rsp_valid),
.ready_in (fpu_rsp_ready), .ready_in (fpu_rsp_ready),
.data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}), .data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),

View file

@ -79,15 +79,13 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
.NUM_LANES (NUM_LANES) .NUM_LANES (NUM_LANES)
) commit_tmp_if(); ) commit_tmp_if();
`RESET_RELAY(commit_out_reset, reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (commit_out_reset), .reset (reset),
.valid_in (commit_out_valid[i]), .valid_in (commit_out_valid[i]),
.ready_in (commit_out_ready[i]), .ready_in (commit_out_ready[i]),
.data_in (commit_out_data[i]), .data_in (commit_out_data[i]),

View file

@ -72,9 +72,10 @@ module VX_ipdom_stack #(
.LUTRAM (OUT_REG ? 0 : 1) .LUTRAM (OUT_REG ? 0 : 1)
) store ( ) store (
.clk (clk), .clk (clk),
.reset (reset),
.read (1'b1), .read (1'b1),
.write (push), .write (push),
`UNUSED_PIN (wren), .wren (1'b1),
.waddr (wr_ptr), .waddr (wr_ptr),
.wdata ({q1, q0}), .wdata ({q1, q0}),
.raddr (rd_ptr), .raddr (rd_ptr),

View file

@ -39,6 +39,8 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
.TAG_WIDTH (LSU_TAG_WIDTH) .TAG_WIDTH (LSU_TAG_WIDTH)
) lsu_switch_if[`NUM_LSU_BLOCKS](); ) lsu_switch_if[`NUM_LSU_BLOCKS]();
`RESET_RELAY_EX (block_reset, reset, `NUM_LSU_BLOCKS, 1);
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
@ -52,15 +54,13 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
wire req_global_ready; wire req_global_ready;
wire req_local_ready; wire req_local_ready;
`RESET_RELAY (switch_reset, reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (REQ_DATAW), .DATAW (REQ_DATAW),
.SIZE (2), .SIZE (2),
.OUT_REG (1) .OUT_REG (1)
) req_global_buf ( ) req_global_buf (
.clk (clk), .clk (clk),
.reset (switch_reset), .reset (block_reset[i]),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global), .valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
.data_in ({ .data_in ({
lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask, lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask,
@ -91,7 +91,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
.OUT_REG (0) .OUT_REG (0)
) req_local_buf ( ) req_local_buf (
.clk (clk), .clk (clk),
.reset (switch_reset), .reset (block_reset[i]),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local), .valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
.data_in ({ .data_in ({
lsu_mem_in_if[i].req_data.mask & is_addr_local_mask, lsu_mem_in_if[i].req_data.mask & is_addr_local_mask,
@ -126,7 +126,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
.OUT_BUF (1) .OUT_BUF (1)
) rsp_arb ( ) rsp_arb (
.clk (clk), .clk (clk),
.reset (switch_reset), .reset (block_reset[i]),
.valid_in ({ .valid_in ({
lsu_switch_if[i].rsp_valid, lsu_switch_if[i].rsp_valid,
lsu_mem_out_if[i].rsp_valid lsu_mem_out_if[i].rsp_valid
@ -157,18 +157,17 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
.TAG_WIDTH (LSU_TAG_WIDTH) .TAG_WIDTH (LSU_TAG_WIDTH)
) lmem_bus_tmp_if[`NUM_LSU_LANES](); ) lmem_bus_tmp_if[`NUM_LSU_LANES]();
`RESET_RELAY (adapter_reset, reset);
VX_lsu_adapter #( VX_lsu_adapter #(
.NUM_LANES (`NUM_LSU_LANES), .NUM_LANES (`NUM_LSU_LANES),
.DATA_SIZE (LSU_WORD_SIZE), .DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH), .TAG_WIDTH (LSU_TAG_WIDTH),
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH), .TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
.ARBITER ("P"),
.REQ_OUT_BUF (3), .REQ_OUT_BUF (3),
.RSP_OUT_BUF (0) .RSP_OUT_BUF (0)
) lsu_adapter ( ) lsu_adapter (
.clk (clk), .clk (clk),
.reset (adapter_reset), .reset (block_reset[i]),
.lsu_mem_if (lsu_switch_if[i]), .lsu_mem_if (lsu_switch_if[i]),
.mem_bus_if (lmem_bus_tmp_if) .mem_bus_if (lmem_bus_tmp_if)
); );

View file

@ -490,6 +490,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (2), .NUM_INPUTS (2),
.DATAW (RSP_ARB_DATAW), .DATAW (RSP_ARB_DATAW),
.ARBITER ("P"), // prioritize commit_rsp_if
.OUT_BUF (3) .OUT_BUF (3)
) rsp_arb ( ) rsp_arb (
.clk (clk), .clk (clk),

View file

@ -13,6 +13,13 @@
`include "VX_define.vh" `include "VX_define.vh"
// reset all GPRs in debug mode
`ifdef SIMULATION
`ifndef NDEBUG
`define GPR_RESET
`endif
`endif
module VX_operands import VX_gpu_pkg::*; #( module VX_operands import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "", parameter `STRING INSTANCE_ID = "",
parameter NUM_BANKS = 4, parameter NUM_BANKS = 4,
@ -36,8 +43,9 @@ module VX_operands import VX_gpu_pkg::*; #(
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS; localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
localparam METADATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS; localparam META_DATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS + `UUID_WIDTH;
localparam DATAW = `UUID_WIDTH + METADATAW + 3 * `NUM_THREADS * `XLEN; localparam REGS_DATAW = `XLEN * `NUM_THREADS;
localparam DATAW = META_DATAW + NUM_SRC_REGS * REGS_DATAW;
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS); localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS; localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
localparam XLEN_SIZE = `XLEN / 8; localparam XLEN_SIZE = `XLEN / 8;
@ -46,30 +54,28 @@ module VX_operands import VX_gpu_pkg::*; #(
`UNUSED_VAR (writeback_if.data.sop) `UNUSED_VAR (writeback_if.data.sop)
wire [NUM_SRC_REGS-1:0] src_valid; wire [NUM_SRC_REGS-1:0] src_valid;
wire [NUM_SRC_REGS-1:0] req_in_valid; wire [NUM_SRC_REGS-1:0] req_in_valid, req_in_ready;
wire [NUM_SRC_REGS-1:0] req_in_ready;
wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data; wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data;
wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
wire [NUM_BANKS-1:0] gpr_rd_valid_n, gpr_rd_ready; wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready;
reg [NUM_BANKS-1:0] gpr_rd_valid; wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2;
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr_n; wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1;
reg [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr; wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1, gpr_rd_data_st2;
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx_n;
reg [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx;
wire pipe_in_ready; wire pipe_valid_st1, pipe_ready_st1;
reg pipe_out_valid; wire pipe_valid_st2, pipe_ready_st2;
wire pipe_out_ready; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2;
reg [`UUID_WIDTH-1:0] pipe_out_uuid;
reg [METADATAW-1:0] pipe_out_data;
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data, src_data_n; reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n;
reg [NUM_SRC_REGS-1:0] data_fetched; wire [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2;
reg has_collision, has_collision_n;
wire stg_in_valid, stg_in_ready; reg [NUM_SRC_REGS-1:0] data_fetched_n;
wire [NUM_SRC_REGS-1:0] data_fetched_st1;
reg has_collision_n;
wire has_collision_st1;
wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3, wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3,
scoreboard_if.data.rs2, scoreboard_if.data.rs2,
@ -89,7 +95,7 @@ module VX_operands import VX_gpu_pkg::*; #(
end end
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched[i]; assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched_st1[i];
end end
assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid; assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid;
@ -109,13 +115,20 @@ module VX_operands import VX_gpu_pkg::*; #(
.data_in (req_in_data), .data_in (req_in_data),
.sel_in (req_bank_idx), .sel_in (req_bank_idx),
.ready_in (req_in_ready), .ready_in (req_in_ready),
.valid_out (gpr_rd_valid_n), .valid_out (gpr_rd_valid),
.data_out (gpr_rd_addr_n), .data_out (gpr_rd_addr),
.sel_out (gpr_rd_req_idx_n), .sel_out (gpr_rd_req_idx),
.ready_out (gpr_rd_ready) .ready_out (gpr_rd_ready)
); );
assign gpr_rd_ready = {NUM_BANKS{stg_in_ready}}; wire pipe_in_ready = pipe_ready_st1 || ~pipe_valid_st1;
assign gpr_rd_ready = {NUM_BANKS{pipe_in_ready}};
assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n;
wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1;
wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2;
always @(*) begin always @(*) begin
has_collision_n = 0; has_collision_n = 0;
@ -129,83 +142,82 @@ module VX_operands import VX_gpu_pkg::*; #(
end end
always @(*) begin always @(*) begin
src_data_n = src_data; data_fetched_n = data_fetched_st1;
for (integer b = 0; b < NUM_BANKS; ++b) begin if (scoreboard_if.ready) begin
if (gpr_rd_valid[b]) begin data_fetched_n = '0;
src_data_n[gpr_rd_req_idx[b]] = gpr_rd_data[b];
end
end
end
wire pipe_stall = pipe_out_valid && ~pipe_out_ready;
assign pipe_in_ready = ~pipe_stall;
assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n;
wire stg_in_fire = stg_in_valid && stg_in_ready;
always @(posedge clk) begin
if (reset) begin
pipe_out_valid <= 0;
gpr_rd_valid <= '0;
data_fetched <= '0;
src_data <= '0;
end else begin end else begin
if (~pipe_stall) begin data_fetched_n = data_fetched_st1 | req_in_ready;
pipe_out_valid <= scoreboard_if.valid;
gpr_rd_valid <= gpr_rd_valid_n;
if (scoreboard_if.ready) begin
data_fetched <= '0;
end else begin
data_fetched <= data_fetched | req_in_ready;
end
if (stg_in_fire) begin
src_data <= '0;
end else begin
src_data <= src_data_n;
end
end
end
if (~pipe_stall) begin
pipe_out_uuid <= scoreboard_if.data.uuid;
pipe_out_data <= {
scoreboard_if.data.wis,
scoreboard_if.data.tmask,
scoreboard_if.data.PC,
scoreboard_if.data.wb,
scoreboard_if.data.ex_type,
scoreboard_if.data.op_type,
scoreboard_if.data.op_args,
scoreboard_if.data.rd
};
has_collision <= has_collision_n;
gpr_rd_addr <= gpr_rd_addr_n;
gpr_rd_req_idx <= gpr_rd_req_idx_n;
end end
end end
assign pipe_out_ready = stg_in_ready; assign pipe_data = {
assign stg_in_valid = pipe_out_valid && ~has_collision; scoreboard_if.data.wis,
scoreboard_if.data.tmask,
scoreboard_if.data.PC,
scoreboard_if.data.wb,
scoreboard_if.data.ex_type,
scoreboard_if.data.op_type,
scoreboard_if.data.op_args,
scoreboard_if.data.rd,
scoreboard_if.data.uuid
};
VX_pipe_register #(
.DATAW (1 + NUM_SRC_REGS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)),
.RESETW (1 + NUM_SRC_REGS)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (pipe_in_ready),
.data_in ({scoreboard_if.valid, data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}),
.data_out ({pipe_valid_st1, data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1})
);
assign pipe_ready_st1 = pipe_ready_st2 || ~pipe_valid_st2;
assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_n;
wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1;
`RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW
VX_pipe_register #(
.DATAW (1 + NUM_SRC_REGS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH),
.RESETW (1 + NUM_SRC_REGS * REGS_DATAW)
) pipe_reg2 (
.clk (clk),
.reset (pipe2_reset),
.enable (pipe_ready_st1),
.data_in ({pipe_valid2_st1, src_data_st1, gpr_rd_valid_st1, gpr_rd_data_st1, pipe_data_st1, gpr_rd_req_idx_st1}),
.data_out ({pipe_valid_st2, src_data_st2, gpr_rd_valid_st2, gpr_rd_data_st2, pipe_data_st2, gpr_rd_req_idx_st2})
);
always @(*) begin
src_data_n = src_data_st2;
for (integer b = 0; b < NUM_BANKS; ++b) begin
if (gpr_rd_valid_st2[b]) begin
src_data_n[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b];
end
end
end
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (1) .LUTRAM (1)
) out_buffer ( ) out_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (stg_in_valid), .valid_in (pipe_valid_st2),
.ready_in (stg_in_ready), .ready_in (pipe_ready_st2),
.data_in ({ .data_in ({
pipe_out_uuid, pipe_data_st2,
pipe_out_data,
src_data_n[0], src_data_n[0],
src_data_n[1], src_data_n[1],
src_data_n[2] src_data_n[2]
}), }),
.data_out ({ .data_out ({
operands_if.data.uuid,
operands_if.data.wis, operands_if.data.wis,
operands_if.data.tmask, operands_if.data.tmask,
operands_if.data.PC, operands_if.data.PC,
@ -214,6 +226,7 @@ module VX_operands import VX_gpu_pkg::*; #(
operands_if.data.op_type, operands_if.data.op_type,
operands_if.data.op_args, operands_if.data.op_args,
operands_if.data.rd, operands_if.data.rd,
operands_if.data.uuid,
operands_if.data.rs1_data, operands_if.data.rs1_data,
operands_if.data.rs2_data, operands_if.data.rs2_data,
operands_if.data.rs3_data operands_if.data.rs3_data
@ -262,27 +275,24 @@ module VX_operands import VX_gpu_pkg::*; #(
assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}}; assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}};
end end
`ifdef GPR_RESET
VX_dp_ram_rst #(
`else
VX_dp_ram #( VX_dp_ram #(
`endif .DATAW (REGS_DATAW),
.DATAW (`XLEN * `NUM_THREADS),
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS), .SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
.WRENW (BYTEENW), .WRENW (BYTEENW),
`ifdef GPR_RESET
.RESET_RAM (1),
`endif
.NO_RWCHECK (1) .NO_RWCHECK (1)
) gpr_ram ( ) gpr_ram (
.clk (clk), .clk (clk),
`ifdef GPR_RESET
.reset (reset), .reset (reset),
`endif .read (pipe_fire_st1),
.read (1'b1),
.wren (wren), .wren (wren),
.write (gpr_wr_enabled), .write (gpr_wr_enabled),
.waddr (gpr_wr_addr), .waddr (gpr_wr_addr),
.wdata (writeback_if.data.data), .wdata (writeback_if.data.data),
.raddr (gpr_rd_addr[b]), .raddr (gpr_rd_addr_st1[b]),
.rdata (gpr_rd_data[b]) .rdata (gpr_rd_data_st1[b])
); );
end end

View file

@ -383,16 +383,16 @@ module VX_schedule import VX_gpu_pkg::*; #(
wire [`NUM_WARPS-1:0] pending_warp_empty; wire [`NUM_WARPS-1:0] pending_warp_empty;
wire [`NUM_WARPS-1:0] pending_warp_alm_empty; wire [`NUM_WARPS-1:0] pending_warp_alm_empty;
for (genvar i = 0; i < `NUM_WARPS; ++i) begin `RESET_RELAY_EX (pending_instr_reset, reset, `NUM_WARPS, `MAX_FANOUT);
`RESET_RELAY (pending_instr_reset, reset); for (genvar i = 0; i < `NUM_WARPS; ++i) begin
VX_pending_size #( VX_pending_size #(
.SIZE (4096), .SIZE (4096),
.ALM_EMPTY (1) .ALM_EMPTY (1)
) counter ( ) counter (
.clk (clk), .clk (clk),
.reset (pending_instr_reset), .reset (pending_instr_reset[i]),
.incr (per_warp_incr[i]), .incr (per_warp_incr[i]),
.decr (commit_sched_if.committed_warps[i]), .decr (commit_sched_if.committed_warps[i]),
.empty (pending_warp_empty[i]), .empty (pending_warp_empty[i]),

View file

@ -179,7 +179,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
VX_gather_unit #( VX_gather_unit #(
.BLOCK_SIZE (BLOCK_SIZE), .BLOCK_SIZE (BLOCK_SIZE),
.NUM_LANES (NUM_LANES), .NUM_LANES (NUM_LANES),
.OUT_BUF (1) .OUT_BUF (3)
) gather_unit ( ) gather_unit (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -21,7 +21,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
parameter TAG_WIDTH = 1 parameter TAG_WIDTH = 1
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
output wire ready_in, output wire ready_in,
input wire valid_in, input wire valid_in,
@ -36,7 +36,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
input wire is_signed, input wire is_signed,
input wire [NUM_LANES-1:0][31:0] dataa, input wire [NUM_LANES-1:0][31:0] dataa,
output wire [NUM_LANES-1:0][31:0] result, output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags, output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags, output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -45,25 +45,26 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
input wire ready_out, input wire ready_out,
output wire valid_out output wire valid_out
); );
`UNUSED_VAR (frm) `UNUSED_VAR (frm)
wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
fflags_t [NUM_LANES-1:0] fflags_out; fflags_t [NUM_LANES-1:0] fflags_out;
wire pe_enable; wire pe_enable;
wire [NUM_PES-1:0][31:0] pe_data_in; wire [NUM_PES-1:0][31:0] pe_data_in;
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
VX_pe_serializer #( VX_pe_serializer #(
.NUM_LANES (NUM_LANES), .NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES), .NUM_PES (NUM_PES),
.LATENCY (`LATENCY_FCVT), .LATENCY (`LATENCY_FCVT),
.DATA_IN_WIDTH(32), .DATA_IN_WIDTH(32),
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
.TAG_WIDTH (NUM_LANES + TAG_WIDTH), .TAG_WIDTH (NUM_LANES + TAG_WIDTH),
.PE_REG (0) .PE_REG (0),
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
) pe_serializer ( ) pe_serializer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -94,7 +95,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
.enable (pe_enable), .enable (pe_enable),
.frm (frm), .frm (frm),
.is_itof (is_itof), .is_itof (is_itof),
.is_signed (is_signed), .is_signed (is_signed),
.dataa (pe_data_in[i][0 +: 32]), .dataa (pe_data_in[i][0 +: 32]),
.result (pe_data_out[i][0 +: 32]), .result (pe_data_out[i][0 +: 32]),
.fflags (pe_data_out[i][32 +: `FP_FLAGS_BITS]) .fflags (pe_data_out[i][32 +: `FP_FLAGS_BITS])

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -21,7 +21,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
parameter TAG_WIDTH = 1 parameter TAG_WIDTH = 1
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire valid_in, input wire valid_in,
output wire ready_in, output wire ready_in,
@ -31,10 +31,10 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
input wire [TAG_WIDTH-1:0] tag_in, input wire [TAG_WIDTH-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm, input wire [`INST_FRM_BITS-1:0] frm,
input wire [NUM_LANES-1:0][31:0] dataa, input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab, input wire [NUM_LANES-1:0][31:0] datab,
output wire [NUM_LANES-1:0][31:0] result, output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags, output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags, output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -47,27 +47,28 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
`UNUSED_VAR (frm) `UNUSED_VAR (frm)
wire [NUM_LANES-1:0][2*32-1:0] data_in; wire [NUM_LANES-1:0][2*32-1:0] data_in;
wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
wire pe_enable; wire pe_enable;
wire [NUM_PES-1:0][2*32-1:0] pe_data_in; wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
for (genvar i = 0; i < NUM_LANES; ++i) begin for (genvar i = 0; i < NUM_LANES; ++i) begin
assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][0 +: 32] = dataa[i];
assign data_in[i][32 +: 32] = datab[i]; assign data_in[i][32 +: 32] = datab[i];
end end
VX_pe_serializer #( VX_pe_serializer #(
.NUM_LANES (NUM_LANES), .NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES), .NUM_PES (NUM_PES),
.LATENCY (`LATENCY_FDIV), .LATENCY (`LATENCY_FDIV),
.DATA_IN_WIDTH(2*32), .DATA_IN_WIDTH(2*32),
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
.TAG_WIDTH (NUM_LANES + TAG_WIDTH), .TAG_WIDTH (NUM_LANES + TAG_WIDTH),
.PE_REG (0) .PE_REG (0),
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
) pe_serializer ( ) pe_serializer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -92,7 +93,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
fflags_t [NUM_LANES-1:0] per_lane_fflags; fflags_t [NUM_LANES-1:0] per_lane_fflags;
`ifdef QUARTUS `ifdef QUARTUS
for (genvar i = 0; i < NUM_PES; ++i) begin for (genvar i = 0; i < NUM_PES; ++i) begin
acl_fdiv fdiv ( acl_fdiv fdiv (
.clk (clk), .clk (clk),
@ -103,8 +104,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
.q (pe_data_out[i][0 +: 32]) .q (pe_data_out[i][0 +: 32])
); );
assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x; assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x;
end end
assign has_fflags = 0; assign has_fflags = 0;
assign per_lane_fflags = 'x; assign per_lane_fflags = 'x;
`UNUSED_VAR (fflags_out) `UNUSED_VAR (fflags_out)
@ -131,21 +132,21 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
assign has_fflags = 1; assign has_fflags = 1;
assign per_lane_fflags = fflags_out; assign per_lane_fflags = fflags_out;
`else `else
for (genvar i = 0; i < NUM_PES; ++i) begin for (genvar i = 0; i < NUM_PES; ++i) begin
reg [63:0] r; reg [63:0] r;
`UNUSED_VAR (r) `UNUSED_VAR (r)
fflags_t f; fflags_t f;
always @(*) begin always @(*) begin
dpi_fdiv ( dpi_fdiv (
pe_enable, pe_enable,
int'(0), int'(0),
{32'hffffffff, pe_data_in[i][0 +: 32]}, {32'hffffffff, pe_data_in[i][0 +: 32]},
{32'hffffffff, pe_data_in[i][32 +: 32]}, {32'hffffffff, pe_data_in[i][32 +: 32]},
frm, frm,
r, r,
f f
); );
end end

View file

@ -98,7 +98,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
.DATA_IN_WIDTH(3*32), .DATA_IN_WIDTH(3*32),
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
.TAG_WIDTH (NUM_LANES + TAG_WIDTH), .TAG_WIDTH (NUM_LANES + TAG_WIDTH),
.PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0) .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
) pe_serializer ( ) pe_serializer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -35,7 +35,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0][31:0] dataa, input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab, input wire [NUM_LANES-1:0][31:0] datab,
output wire [NUM_LANES-1:0][31:0] result, output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags, output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags, output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -44,15 +44,15 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
input wire ready_out, input wire ready_out,
output wire valid_out output wire valid_out
); );
`UNUSED_VAR (frm) `UNUSED_VAR (frm)
wire [NUM_LANES-1:0][2*32-1:0] data_in; wire [NUM_LANES-1:0][2*32-1:0] data_in;
wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
fflags_t [NUM_LANES-1:0] fflags_out; fflags_t [NUM_LANES-1:0] fflags_out;
wire pe_enable; wire pe_enable;
wire [NUM_PES-1:0][2*32-1:0] pe_data_in; wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
@ -60,15 +60,16 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][0 +: 32] = dataa[i];
assign data_in[i][32 +: 32] = datab[i]; assign data_in[i][32 +: 32] = datab[i];
end end
VX_pe_serializer #( VX_pe_serializer #(
.NUM_LANES (NUM_LANES), .NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES), .NUM_PES (NUM_PES),
.LATENCY (`LATENCY_FNCP), .LATENCY (`LATENCY_FNCP),
.DATA_IN_WIDTH(2*32), .DATA_IN_WIDTH(2*32),
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
.TAG_WIDTH (NUM_LANES + TAG_WIDTH), .TAG_WIDTH (NUM_LANES + TAG_WIDTH),
.PE_REG (0) .PE_REG (0),
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
) pe_serializer ( ) pe_serializer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -97,8 +98,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (pe_enable), .enable (pe_enable),
.frm (frm), .frm (frm),
.op_type (op_type), .op_type (op_type),
.dataa (pe_data_in[i][0 +: 32]), .dataa (pe_data_in[i][0 +: 32]),
.datab (pe_data_in[i][32 +: 32]), .datab (pe_data_in[i][32 +: 32]),
.result (pe_data_out[i][0 +: 32]), .result (pe_data_out[i][0 +: 32]),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -18,10 +18,10 @@
module VX_fpu_sqrt import VX_fpu_pkg::*; #( module VX_fpu_sqrt import VX_fpu_pkg::*; #(
parameter NUM_LANES = 1, parameter NUM_LANES = 1,
parameter NUM_PES = `UP(NUM_LANES /`FSQRT_PE_RATIO), parameter NUM_PES = `UP(NUM_LANES /`FSQRT_PE_RATIO),
parameter TAG_WIDTH = 1 parameter TAG_WIDTH = 1
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
output wire ready_in, output wire ready_in,
input wire valid_in, input wire valid_in,
@ -29,11 +29,11 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0] mask_in, input wire [NUM_LANES-1:0] mask_in,
input wire [TAG_WIDTH-1:0] tag_in, input wire [TAG_WIDTH-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm, input wire [`INST_FRM_BITS-1:0] frm,
input wire [NUM_LANES-1:0][31:0] dataa, input wire [NUM_LANES-1:0][31:0] dataa,
output wire [NUM_LANES-1:0][31:0] result, output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags, output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags, output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -46,22 +46,23 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
`UNUSED_VAR (frm) `UNUSED_VAR (frm)
wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
wire pe_enable; wire pe_enable;
wire [NUM_PES-1:0][31:0] pe_data_in; wire [NUM_PES-1:0][31:0] pe_data_in;
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
VX_pe_serializer #( VX_pe_serializer #(
.NUM_LANES (NUM_LANES), .NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES), .NUM_PES (NUM_PES),
.LATENCY (`LATENCY_FSQRT), .LATENCY (`LATENCY_FSQRT),
.DATA_IN_WIDTH(32), .DATA_IN_WIDTH(32),
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
.TAG_WIDTH (NUM_LANES + TAG_WIDTH), .TAG_WIDTH (NUM_LANES + TAG_WIDTH),
.PE_REG (0) .PE_REG (0),
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
) pe_serializer ( ) pe_serializer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -83,10 +84,10 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
end end
fflags_t [NUM_LANES-1:0] per_lane_fflags; fflags_t [NUM_LANES-1:0] per_lane_fflags;
`ifdef QUARTUS `ifdef QUARTUS
for (genvar i = 0; i < NUM_PES; ++i) begin for (genvar i = 0; i < NUM_PES; ++i) begin
acl_fsqrt fsqrt ( acl_fsqrt fsqrt (
.clk (clk), .clk (clk),
@ -105,7 +106,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
`elsif VIVADO `elsif VIVADO
for (genvar i = 0; i < NUM_PES; ++i) begin for (genvar i = 0; i < NUM_PES; ++i) begin
wire tuser; wire tuser;
xil_fsqrt fsqrt ( xil_fsqrt fsqrt (
.aclk (clk), .aclk (clk),
@ -130,17 +131,17 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
`UNUSED_VAR (r) `UNUSED_VAR (r)
fflags_t f; fflags_t f;
always @(*) begin always @(*) begin
dpi_fsqrt ( dpi_fsqrt (
pe_enable, pe_enable,
int'(0), int'(0),
{32'hffffffff, pe_data_in[i]}, {32'hffffffff, pe_data_in[i]},
frm, frm,
r, r,
f f
); );
end end
VX_shift_register #( VX_shift_register #(
.DATAW (32 + $bits(fflags_t)), .DATAW (32 + $bits(fflags_t)),
.DEPTH (`LATENCY_FSQRT) .DEPTH (`LATENCY_FSQRT)

View file

@ -81,12 +81,15 @@ module VX_avs_adapter #(
assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i); assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i);
end end
`RESET_RELAY_EX (bank_reset, reset, NUM_BANKS, 1);
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
VX_pending_size #( VX_pending_size #(
.SIZE (RD_QUEUE_SIZE) .SIZE (RD_QUEUE_SIZE)
) pending_size ( ) pending_size (
.clk (clk), .clk (clk),
.reset (reset), .reset (bank_reset[i]),
.incr (req_queue_push[i]), .incr (req_queue_push[i]),
.decr (req_queue_pop[i]), .decr (req_queue_pop[i]),
`UNUSED_PIN (empty), `UNUSED_PIN (empty),
@ -102,7 +105,7 @@ module VX_avs_adapter #(
.DEPTH (RD_QUEUE_SIZE) .DEPTH (RD_QUEUE_SIZE)
) rd_req_queue ( ) rd_req_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (bank_reset[i]),
.push (req_queue_push[i]), .push (req_queue_push[i]),
.pop (req_queue_pop[i]), .pop (req_queue_pop[i]),
.data_in (mem_req_tag), .data_in (mem_req_tag),
@ -132,7 +135,7 @@ module VX_avs_adapter #(
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF))
) req_out_buf ( ) req_out_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (bank_reset[i]),
.valid_in (valid_out_w), .valid_in (valid_out_w),
.ready_in (ready_out_w), .ready_in (ready_out_w),
.data_in ({mem_req_rw, mem_req_byteen, req_bank_off, mem_req_data}), .data_in ({mem_req_rw, mem_req_byteen, req_bank_off, mem_req_data}),
@ -168,12 +171,13 @@ module VX_avs_adapter #(
wire [NUM_BANKS-1:0] rsp_queue_empty; wire [NUM_BANKS-1:0] rsp_queue_empty;
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
VX_fifo_queue #( VX_fifo_queue #(
.DATAW (DATA_WIDTH), .DATAW (DATA_WIDTH),
.DEPTH (RD_QUEUE_SIZE) .DEPTH (RD_QUEUE_SIZE)
) rd_rsp_queue ( ) rd_rsp_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (bank_reset[i]),
.push (avs_readdatavalid[i]), .push (avs_readdatavalid[i]),
.pop (req_queue_pop[i]), .pop (req_queue_pop[i]),
.data_in (avs_readdata[i]), .data_in (avs_readdata[i]),
@ -195,7 +199,7 @@ module VX_avs_adapter #(
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS), .NUM_INPUTS (NUM_BANKS),
.DATAW (DATA_WIDTH + TAG_WIDTH), .DATAW (DATA_WIDTH + TAG_WIDTH),
.ARBITER ("R"), .ARBITER ("F"),
.OUT_BUF (RSP_OUT_BUF) .OUT_BUF (RSP_OUT_BUF)
) rsp_arb ( ) rsp_arb (
.clk (clk), .clk (clk),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,10 +15,10 @@
`TRACING_OFF `TRACING_OFF
module VX_axi_adapter #( module VX_axi_adapter #(
parameter DATA_WIDTH = 512, parameter DATA_WIDTH = 512,
parameter ADDR_WIDTH = 32, parameter ADDR_WIDTH = 32,
parameter TAG_WIDTH = 8, parameter TAG_WIDTH = 8,
parameter NUM_BANKS = 1, parameter NUM_BANKS = 1,
parameter AVS_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)), parameter AVS_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)),
parameter RSP_OUT_BUF = 0 parameter RSP_OUT_BUF = 0
) ( ) (
@ -34,13 +34,13 @@ module VX_axi_adapter #(
input wire [TAG_WIDTH-1:0] mem_req_tag, input wire [TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_ready, output wire mem_req_ready,
// Vortex response // Vortex response
output wire mem_rsp_valid, output wire mem_rsp_valid,
output wire [DATA_WIDTH-1:0] mem_rsp_data, output wire [DATA_WIDTH-1:0] mem_rsp_data,
output wire [TAG_WIDTH-1:0] mem_rsp_tag, output wire [TAG_WIDTH-1:0] mem_rsp_tag,
input wire mem_rsp_ready, input wire mem_rsp_ready,
// AXI write request address channel // AXI write request address channel
output wire m_axi_awvalid [NUM_BANKS], output wire m_axi_awvalid [NUM_BANKS],
input wire m_axi_awready [NUM_BANKS], input wire m_axi_awready [NUM_BANKS],
output wire [ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS], output wire [ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS],
@ -54,7 +54,7 @@ module VX_axi_adapter #(
output wire [3:0] m_axi_awqos [NUM_BANKS], output wire [3:0] m_axi_awqos [NUM_BANKS],
output wire [3:0] m_axi_awregion [NUM_BANKS], output wire [3:0] m_axi_awregion [NUM_BANKS],
// AXI write request data channel // AXI write request data channel
output wire m_axi_wvalid [NUM_BANKS], output wire m_axi_wvalid [NUM_BANKS],
input wire m_axi_wready [NUM_BANKS], input wire m_axi_wready [NUM_BANKS],
output wire [DATA_WIDTH-1:0] m_axi_wdata [NUM_BANKS], output wire [DATA_WIDTH-1:0] m_axi_wdata [NUM_BANKS],
@ -66,7 +66,7 @@ module VX_axi_adapter #(
output wire m_axi_bready [NUM_BANKS], output wire m_axi_bready [NUM_BANKS],
input wire [TAG_WIDTH-1:0] m_axi_bid [NUM_BANKS], input wire [TAG_WIDTH-1:0] m_axi_bid [NUM_BANKS],
input wire [1:0] m_axi_bresp [NUM_BANKS], input wire [1:0] m_axi_bresp [NUM_BANKS],
// AXI read address channel // AXI read address channel
output wire m_axi_arvalid [NUM_BANKS], output wire m_axi_arvalid [NUM_BANKS],
input wire m_axi_arready [NUM_BANKS], input wire m_axi_arready [NUM_BANKS],
@ -74,13 +74,13 @@ module VX_axi_adapter #(
output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS], output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS],
output wire [7:0] m_axi_arlen [NUM_BANKS], output wire [7:0] m_axi_arlen [NUM_BANKS],
output wire [2:0] m_axi_arsize [NUM_BANKS], output wire [2:0] m_axi_arsize [NUM_BANKS],
output wire [1:0] m_axi_arburst [NUM_BANKS], output wire [1:0] m_axi_arburst [NUM_BANKS],
output wire [1:0] m_axi_arlock [NUM_BANKS], output wire [1:0] m_axi_arlock [NUM_BANKS],
output wire [3:0] m_axi_arcache [NUM_BANKS], output wire [3:0] m_axi_arcache [NUM_BANKS],
output wire [2:0] m_axi_arprot [NUM_BANKS], output wire [2:0] m_axi_arprot [NUM_BANKS],
output wire [3:0] m_axi_arqos [NUM_BANKS], output wire [3:0] m_axi_arqos [NUM_BANKS],
output wire [3:0] m_axi_arregion [NUM_BANKS], output wire [3:0] m_axi_arregion [NUM_BANKS],
// AXI read response channel // AXI read response channel
input wire m_axi_rvalid [NUM_BANKS], input wire m_axi_rvalid [NUM_BANKS],
output wire m_axi_rready [NUM_BANKS], output wire m_axi_rready [NUM_BANKS],
@ -88,15 +88,15 @@ module VX_axi_adapter #(
input wire m_axi_rlast [NUM_BANKS], input wire m_axi_rlast [NUM_BANKS],
input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS], input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS],
input wire [1:0] m_axi_rresp [NUM_BANKS] input wire [1:0] m_axi_rresp [NUM_BANKS]
); );
localparam AXSIZE = `CLOG2(DATA_WIDTH/8); localparam AXSIZE = `CLOG2(DATA_WIDTH/8);
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS); localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS); localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS);
wire [BANK_ADDRW-1:0] req_bank_sel; wire [BANK_ADDRW-1:0] req_bank_sel;
if (NUM_BANKS > 1) begin if (NUM_BANKS > 1) begin
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
end else begin end else begin
assign req_bank_sel = '0; assign req_bank_sel = '0;
end end
@ -108,12 +108,12 @@ module VX_axi_adapter #(
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i]; wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i];
wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i]; wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i];
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
m_axi_aw_ack[i] <= 0; m_axi_aw_ack[i] <= 0;
m_axi_w_ack[i] <= 0; m_axi_w_ack[i] <= 0;
end else begin end else begin
if (mem_req_fire && (req_bank_sel == i)) begin if (mem_req_fire && (req_bank_sel == i)) begin
m_axi_aw_ack[i] <= 0; m_axi_aw_ack[i] <= 0;
m_axi_w_ack[i] <= 0; m_axi_w_ack[i] <= 0;
@ -127,10 +127,10 @@ module VX_axi_adapter #(
end end
end end
wire axi_write_ready [NUM_BANKS]; wire axi_write_ready [NUM_BANKS];
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i])
&& (m_axi_wready[i] || m_axi_w_ack[i]); && (m_axi_wready[i] || m_axi_w_ack[i]);
end end
@ -141,17 +141,17 @@ module VX_axi_adapter #(
assign mem_req_ready = mem_req_rw ? axi_write_ready[0] : m_axi_arready[0]; assign mem_req_ready = mem_req_rw ? axi_write_ready[0] : m_axi_arready[0];
end end
// AXI write request address channel // AXI write request address channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i];
assign m_axi_awaddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; assign m_axi_awaddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE;
assign m_axi_awid[i] = mem_req_tag; assign m_axi_awid[i] = mem_req_tag;
assign m_axi_awlen[i] = 8'b00000000; assign m_axi_awlen[i] = 8'b00000000;
assign m_axi_awsize[i] = 3'(AXSIZE); assign m_axi_awsize[i] = 3'(AXSIZE);
assign m_axi_awburst[i] = 2'b00; assign m_axi_awburst[i] = 2'b00;
assign m_axi_awlock[i] = 2'b00; assign m_axi_awlock[i] = 2'b00;
assign m_axi_awcache[i] = 4'b0000; assign m_axi_awcache[i] = 4'b0000;
assign m_axi_awprot[i] = 3'b000; assign m_axi_awprot[i] = 3'b000;
assign m_axi_awqos[i] = 4'b0000; assign m_axi_awqos[i] = 4'b0000;
assign m_axi_awregion[i]= 4'b0000; assign m_axi_awregion[i]= 4'b0000;
end end
@ -170,31 +170,31 @@ module VX_axi_adapter #(
`UNUSED_VAR (m_axi_bid[i]) `UNUSED_VAR (m_axi_bid[i])
`UNUSED_VAR (m_axi_bresp[i]) `UNUSED_VAR (m_axi_bresp[i])
assign m_axi_bready[i] = 1'b1; assign m_axi_bready[i] = 1'b1;
`RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time)); `RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time));
end end
// AXI read request channel // AXI read request channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i); assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i);
assign m_axi_araddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; assign m_axi_araddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE;
assign m_axi_arid[i] = mem_req_tag; assign m_axi_arid[i] = mem_req_tag;
assign m_axi_arlen[i] = 8'b00000000; assign m_axi_arlen[i] = 8'b00000000;
assign m_axi_arsize[i] = 3'(AXSIZE); assign m_axi_arsize[i] = 3'(AXSIZE);
assign m_axi_arburst[i] = 2'b00; assign m_axi_arburst[i] = 2'b00;
assign m_axi_arlock[i] = 2'b00; assign m_axi_arlock[i] = 2'b00;
assign m_axi_arcache[i] = 4'b0000; assign m_axi_arcache[i] = 4'b0000;
assign m_axi_arprot[i] = 3'b000; assign m_axi_arprot[i] = 3'b000;
assign m_axi_arqos[i] = 4'b0000; assign m_axi_arqos[i] = 4'b0000;
assign m_axi_arregion[i]= 4'b0000; assign m_axi_arregion[i]= 4'b0000;
end end
// AXI read response channel // AXI read response channel
wire [NUM_BANKS-1:0] rsp_arb_valid_in; wire [NUM_BANKS-1:0] rsp_arb_valid_in;
wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in; wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in;
wire [NUM_BANKS-1:0] rsp_arb_ready_in; wire [NUM_BANKS-1:0] rsp_arb_ready_in;
`UNUSED_VAR (m_axi_rlast) `UNUSED_VAR (m_axi_rlast)
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign rsp_arb_valid_in[i] = m_axi_rvalid[i]; assign rsp_arb_valid_in[i] = m_axi_rvalid[i];
@ -203,11 +203,11 @@ module VX_axi_adapter #(
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time)); `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time));
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time)); `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time));
end end
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS), .NUM_INPUTS (NUM_BANKS),
.DATAW (DATA_WIDTH + TAG_WIDTH), .DATAW (DATA_WIDTH + TAG_WIDTH),
.ARBITER ("R"), .ARBITER ("F"),
.OUT_BUF (RSP_OUT_BUF) .OUT_BUF (RSP_OUT_BUF)
) rsp_arb ( ) rsp_arb (
.clk (clk), .clk (clk),

View file

@ -22,12 +22,16 @@ module VX_dp_ram #(
parameter OUT_REG = 0, parameter OUT_REG = 0,
parameter NO_RWCHECK = 0, parameter NO_RWCHECK = 0,
parameter LUTRAM = 0, parameter LUTRAM = 0,
parameter RW_ASSERT = 0,
parameter RESET_RAM = 0,
parameter READ_ENABLE = 0,
parameter INIT_ENABLE = 0, parameter INIT_ENABLE = 0,
parameter INIT_FILE = "", parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0, parameter [DATAW-1:0] INIT_VALUE = 0,
parameter ADDRW = `LOG2UP(SIZE) parameter ADDRW = `LOG2UP(SIZE)
) ( ) (
input wire clk, input wire clk,
input wire reset,
input wire read, input wire read,
input wire write, input wire write,
input wire [WRENW-1:0] wren, input wire [WRENW-1:0] wren,
@ -50,44 +54,44 @@ module VX_dp_ram #(
end \ end \
end end
`UNUSED_PARAM (RW_ASSERT)
`UNUSED_VAR (read) `UNUSED_VAR (read)
if (WRENW > 1) begin
`RUNTIME_ASSERT(~write || (| wren), ("invalid write enable mask"));
end
wire [DATAW-1:0] rdata_w;
`ifdef SYNTHESIS `ifdef SYNTHESIS
if (WRENW > 1) begin if (WRENW > 1) begin
`ifdef QUARTUS `ifdef QUARTUS
if (LUTRAM != 0) begin if (LUTRAM != 0) begin
if (OUT_REG != 0) begin `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
reg [DATAW-1:0] rdata_r; `RAM_INITIALIZATION
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; always @(posedge clk) begin
`RAM_INITIALIZATION if (write) begin
always @(posedge clk) begin for (integer i = 0; i < WRENW; ++i) begin
if (write) begin if (wren[i])
for (integer i = 0; i < WRENW; ++i) begin ram[waddr][i] <= wdata[i * WSELW +: WSELW];
if (wren[i])
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
end
end
if (read) begin
rdata_r <= ram[raddr];
end end
end end
assign rdata = rdata_r;
end else begin
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
end
end
end
assign rdata = ram[raddr];
end end
assign rdata_w = ram[raddr];
end else begin end else begin
if (OUT_REG != 0) begin if (NO_RWCHECK != 0) begin
reg [DATAW-1:0] rdata_r; `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
end
end
end
assign rdata_w = ram[raddr];
end else begin
reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
@ -97,37 +101,8 @@ module VX_dp_ram #(
ram[waddr][i] <= wdata[i * WSELW +: WSELW]; ram[waddr][i] <= wdata[i * WSELW +: WSELW];
end end
end end
if (read) begin
rdata_r <= ram[raddr];
end
end
assign rdata = rdata_r;
end else begin
if (NO_RWCHECK != 0) begin
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
end
end
end
assign rdata = ram[raddr];
end else begin
reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
end
end
end
assign rdata = ram[raddr];
end end
assign rdata_w = ram[raddr];
end end
end end
`else `else
@ -135,35 +110,18 @@ module VX_dp_ram #(
if (LUTRAM != 0) begin if (LUTRAM != 0) begin
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; `USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
if (OUT_REG != 0) begin always @(posedge clk) begin
reg [DATAW-1:0] rdata_r; if (write) begin
always @(posedge clk) begin for (integer i = 0; i < WRENW; ++i) begin
if (write) begin if (wren[i])
for (integer i = 0; i < WRENW; ++i) begin ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
if (wren[i])
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
end
end
if (read) begin
rdata_r <= ram[raddr];
end end
end end
assign rdata = rdata_r;
end else begin
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
end
end
end
assign rdata = ram[raddr];
end end
assign rdata_w = ram[raddr];
end else begin end else begin
if (OUT_REG != 0) begin if (NO_RWCHECK != 0) begin
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
reg [DATAW-1:0] rdata_r;
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
@ -172,37 +130,20 @@ module VX_dp_ram #(
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
end end
end end
if (read) begin
rdata_r <= ram[raddr];
end
end end
assign rdata = rdata_r; assign rdata_w = ram[raddr];
end else begin end else begin
if (NO_RWCHECK != 0) begin reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION
`RAM_INITIALIZATION always @(posedge clk) begin
always @(posedge clk) begin if (write) begin
if (write) begin for (integer i = 0; i < WRENW; ++i) begin
for (integer i = 0; i < WRENW; ++i) begin if (wren[i])
if (wren[i]) ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
end
end end
end end
assign rdata = ram[raddr];
end else begin
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
end
end
end
assign rdata = ram[raddr];
end end
assign rdata_w = ram[raddr];
end end
end end
`endif `endif
@ -211,64 +152,36 @@ module VX_dp_ram #(
if (LUTRAM != 0) begin if (LUTRAM != 0) begin
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; `USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
if (OUT_REG != 0) begin always @(posedge clk) begin
reg [DATAW-1:0] rdata_r; if (write) begin
always @(posedge clk) begin ram[waddr] <= wdata;
if (write) begin
ram[waddr] <= wdata;
end
if (read) begin
rdata_r <= ram[raddr];
end
end end
assign rdata = rdata_r;
end else begin
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end
end
assign rdata = ram[raddr];
end end
assign rdata_w = ram[raddr];
end else begin end else begin
if (OUT_REG != 0) begin if (NO_RWCHECK != 0) begin
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
reg [DATAW-1:0] rdata_r;
`RAM_INITIALIZATION `RAM_INITIALIZATION
always @(posedge clk) begin always @(posedge clk) begin
if (write) begin if (write) begin
ram[waddr] <= wdata; ram[waddr] <= wdata;
end end
if (read) begin
rdata_r <= ram[raddr];
end
end end
assign rdata = rdata_r; assign rdata_w = ram[raddr];
end else begin end else begin
if (NO_RWCHECK != 0) begin reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION
`RAM_INITIALIZATION always @(posedge clk) begin
always @(posedge clk) begin if (write) begin
if (write) begin ram[waddr] <= wdata;
ram[waddr] <= wdata;
end
end end
assign rdata = ram[raddr];
end else begin
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end
end
assign rdata = ram[raddr];
end end
assign rdata_w = ram[raddr];
end end
end end
end end
`else `else
// RAM emulation // simulation
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
@ -277,39 +190,57 @@ module VX_dp_ram #(
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
end end
if (OUT_REG != 0) begin reg [DATAW-1:0] prev_data;
reg [DATAW-1:0] rdata_r; reg [ADDRW-1:0] prev_waddr;
always @(posedge clk) begin reg prev_write;
always @(posedge clk) begin
if (RESET_RAM && reset) begin
for (integer i = 0; i < SIZE; ++i) begin
ram[i] <= DATAW'(INIT_VALUE);
end
end else begin
if (write) begin if (write) begin
ram[waddr] <= ram_n; ram[waddr] <= ram_n;
end end
if (read) begin
rdata_r <= ram[raddr];
end
end end
assign rdata = rdata_r; if (reset) begin
end else begin prev_write <= 0;
reg [DATAW-1:0] prev_data; prev_data <= '0;
reg [ADDRW-1:0] prev_waddr; prev_waddr <= '0;
reg prev_write; end else begin
always @(posedge clk) begin prev_write <= write;
if (write) begin
ram[waddr] <= ram_n;
end
prev_write <= (| wren);
prev_data <= ram[waddr]; prev_data <= ram[waddr];
prev_waddr <= waddr; prev_waddr <= waddr;
end end
if (LUTRAM || !NO_RWCHECK) begin end
`UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data) if (LUTRAM || !NO_RWCHECK) begin
`UNUSED_VAR (prev_waddr) `UNUSED_VAR (prev_write)
assign rdata = ram[raddr]; `UNUSED_VAR (prev_data)
end else begin `UNUSED_VAR (prev_waddr)
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; assign rdata_w = ram[raddr];
end else begin
assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
if (RW_ASSERT) begin
`RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("read after write hazard"));
end end
end end
`endif `endif
if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (READ_ENABLE && reset) begin
rdata_r <= '0;
end else if (!READ_ENABLE || read) begin
rdata_r <= rdata_w;
end
end
assign rdata = rdata_r;
end else begin
assign rdata = rdata_w;
end
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -1,115 +0,0 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_platform.vh"
`TRACING_OFF
module VX_dp_ram_rst #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter ADDR_MIN = 0,
parameter WRENW = 1,
parameter OUT_REG = 0,
parameter NO_RWCHECK = 0,
parameter LUTRAM = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0,
parameter ADDRW = `LOG2UP(SIZE)
) (
input wire clk,
input wire reset,
input wire read,
input wire write,
input wire [WRENW-1:0] wren,
input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata,
input wire [ADDRW-1:0] raddr,
output wire [DATAW-1:0] rdata
);
localparam WSELW = DATAW / WRENW;
`STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter"))
`define RAM_INITIALIZATION \
if (INIT_ENABLE != 0) begin \
if (INIT_FILE != "") begin \
initial $readmemh(INIT_FILE, ram); \
end else begin \
initial \
for (integer i = 0; i < SIZE; ++i) \
ram[i] = INIT_VALUE; \
end \
end
`UNUSED_VAR (read)
// RAM emulation
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
`RAM_INITIALIZATION
wire [DATAW-1:0] ram_n;
for (genvar i = 0; i < WRENW; ++i) begin
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
end
if (OUT_REG != 0) begin
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (reset) begin
for (integer i = 0; i < SIZE; ++i) begin
ram[i] <= DATAW'(INIT_VALUE);
end
rdata_r <= '0;
end else begin
if (write) begin
ram[waddr] <= ram_n;
end
if (read) begin
rdata_r <= ram[raddr];
end
end
end
assign rdata = rdata_r;
end else begin
reg [DATAW-1:0] prev_data;
reg [ADDRW-1:0] prev_waddr;
reg prev_write;
always @(posedge clk) begin
if (reset) begin
for (integer i = 0; i < SIZE; ++i) begin
ram[i] <= DATAW'(INIT_VALUE);
end
prev_write <= 0;
prev_data <= '0;
prev_waddr <= '0;
end else begin
if (write) begin
ram[waddr] <= ram_n;
end
prev_write <= (| wren);
prev_data <= ram[waddr];
prev_waddr <= waddr;
end
end
if (LUTRAM || !NO_RWCHECK) begin
`UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data)
`UNUSED_VAR (prev_waddr)
assign rdata = ram[raddr];
end else begin
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
end
end
endmodule
`TRACING_ON

View file

@ -18,7 +18,8 @@ module VX_elastic_buffer #(
parameter DATAW = 1, parameter DATAW = 1,
parameter SIZE = 1, parameter SIZE = 1,
parameter OUT_REG = 0, parameter OUT_REG = 0,
parameter LUTRAM = 0 parameter LUTRAM = 0,
parameter MAX_FANOUT = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -40,6 +41,43 @@ module VX_elastic_buffer #(
assign data_out = data_in; assign data_out = data_in;
assign ready_in = ready_out; assign ready_in = ready_out;
end else if (MAX_FANOUT != 0 && (DATAW > (MAX_FANOUT + MAX_FANOUT/2))) begin
localparam NUM_SLICES = `CDIV(DATAW, MAX_FANOUT);
localparam N_DATAW = DATAW / NUM_SLICES;
for (genvar i = 0; i < NUM_SLICES; ++i) begin
localparam S_DATAW = (i == NUM_SLICES-1) ? (DATAW - i * N_DATAW) : N_DATAW;
wire valid_out_t, ready_in_t;
`UNUSED_VAR (valid_out_t)
`UNUSED_VAR (ready_in_t)
`RESET_RELAY (slice_reset, reset);
VX_elastic_buffer #(
.DATAW (S_DATAW),
.SIZE (SIZE),
.OUT_REG (OUT_REG),
.LUTRAM (LUTRAM)
) buffer_slice (
.clk (clk),
.reset (slice_reset),
.valid_in (valid_in),
.data_in (data_in[i * N_DATAW +: S_DATAW]),
.ready_in (ready_in_t),
.valid_out (valid_out_t),
.data_out (data_out[i * N_DATAW +: S_DATAW]),
.ready_out (ready_out)
);
if (i == 0) begin
assign ready_in = ready_in_t;
assign valid_out = valid_out_t;
end
end
end else if (SIZE == 1) begin end else if (SIZE == 1) begin
VX_pipe_buffer #( VX_pipe_buffer #(
@ -103,9 +141,9 @@ module VX_elastic_buffer #(
assign ready_in = ~full; assign ready_in = ~full;
VX_elastic_buffer #( VX_pipe_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE ((OUT_REG == 2) ? 1 : 0) .DEPTH ((OUT_REG > 0) ? (OUT_REG-1) : 0)
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View file

@ -38,17 +38,16 @@ module VX_fair_arbiter #(
end else begin end else begin
reg [NUM_REQS-1:0] grant_mask; reg [NUM_REQS-1:0] requests_r;
wire [NUM_REQS-1:0] requests_rem = requests & ~grant_mask; wire [NUM_REQS-1:0] requests_sel = requests_r & requests;
wire rem_valid = (| requests_rem); wire [NUM_REQS-1:0] requests_qual = (| requests_sel) ? requests_sel : requests;
wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_rem : requests;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
grant_mask <= '0; requests_r <= '0;
end else if (grant_ready) begin end else if (grant_ready) begin
grant_mask <= rem_valid ? (grant_mask | grant_onehot) : grant_onehot; requests_r <= requests_qual & ~grant_onehot;
end end
end end

View file

@ -177,10 +177,11 @@ module VX_fifo_queue #(
.SIZE (DEPTH), .SIZE (DEPTH),
.LUTRAM (LUTRAM) .LUTRAM (LUTRAM)
) dp_ram ( ) dp_ram (
.clk(clk), .clk (clk),
.reset (reset),
.read (1'b1), .read (1'b1),
.write (push), .write (push),
`UNUSED_PIN (wren), .wren (1'b1),
.waddr (wr_ptr_r), .waddr (wr_ptr_r),
.wdata (data_in), .wdata (data_in),
.raddr (rd_ptr_r), .raddr (rd_ptr_r),
@ -226,9 +227,10 @@ module VX_fifo_queue #(
.LUTRAM (LUTRAM) .LUTRAM (LUTRAM)
) dp_ram ( ) dp_ram (
.clk (clk), .clk (clk),
.reset (reset),
.read (1'b1), .read (1'b1),
.write (push), .write (push),
`UNUSED_PIN (wren), .wren (1'b1),
.waddr (wr_ptr_r), .waddr (wr_ptr_r),
.wdata (data_in), .wdata (data_in),
.raddr (rd_ptr_n_r), .raddr (rd_ptr_n_r),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,10 +17,10 @@
module VX_find_first #( module VX_find_first #(
parameter N = 1, parameter N = 1,
parameter DATAW = 1, parameter DATAW = 1,
parameter REVERSE = 0 parameter REVERSE = 0
) ( ) (
input wire [N-1:0][DATAW-1:0] data_in, input wire [N-1:0][DATAW-1:0] data_in,
input wire [N-1:0] valid_in, input wire [N-1:0] valid_in,
output wire [DATAW-1:0] data_out, output wire [DATAW-1:0] data_out,
output wire valid_out output wire valid_out
); );
@ -37,10 +37,12 @@ module VX_find_first #(
assign s_n[TL+i] = REVERSE ? valid_in[N-1-i] : valid_in[i]; assign s_n[TL+i] = REVERSE ? valid_in[N-1-i] : valid_in[i];
assign d_n[TL+i] = REVERSE ? data_in[N-1-i] : data_in[i]; assign d_n[TL+i] = REVERSE ? data_in[N-1-i] : data_in[i];
end end
for (genvar i = TL+N; i < TN; ++i) begin if (TL < (TN-N)) begin
assign s_n[i] = 0; for (genvar i = TL+N; i < TN; ++i) begin
assign d_n[i] = '0; assign s_n[i] = 0;
assign d_n[i] = '0;
end
end end
for (genvar j = 0; j < LOGN; ++j) begin for (genvar j = 0; j < LOGN; ++j) begin
@ -48,10 +50,10 @@ module VX_find_first #(
assign s_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] | s_n[2**(j+1)-1+i*2+1]; assign s_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] | s_n[2**(j+1)-1+i*2+1];
assign d_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] ? d_n[2**(j+1)-1+i*2] : d_n[2**(j+1)-1+i*2+1]; assign d_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] ? d_n[2**(j+1)-1+i*2] : d_n[2**(j+1)-1+i*2+1];
end end
end end
assign valid_out = s_n[0]; assign valid_out = s_n[0];
assign data_out = d_n[0]; assign data_out = d_n[0];
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -24,17 +24,17 @@ module VX_index_buffer #(
input wire reset, input wire reset,
output wire [ADDRW-1:0] write_addr, output wire [ADDRW-1:0] write_addr,
input wire [DATAW-1:0] write_data, input wire [DATAW-1:0] write_data,
input wire acquire_en, input wire acquire_en,
input wire [ADDRW-1:0] read_addr, input wire [ADDRW-1:0] read_addr,
output wire [DATAW-1:0] read_data, output wire [DATAW-1:0] read_data,
input wire release_en, input wire release_en,
output wire empty, output wire empty,
output wire full output wire full
); );
VX_allocator #( VX_allocator #(
.SIZE (SIZE) .SIZE (SIZE)
) allocator ( ) allocator (
@ -43,9 +43,9 @@ module VX_index_buffer #(
.acquire_en (acquire_en), .acquire_en (acquire_en),
.acquire_addr (write_addr), .acquire_addr (write_addr),
.release_en (release_en), .release_en (release_en),
.release_addr (read_addr), .release_addr (read_addr),
.empty (empty), .empty (empty),
.full (full) .full (full)
); );
VX_dp_ram #( VX_dp_ram #(
@ -54,14 +54,15 @@ module VX_index_buffer #(
.LUTRAM (LUTRAM) .LUTRAM (LUTRAM)
) data_table ( ) data_table (
.clk (clk), .clk (clk),
.reset (reset),
.read (1'b1), .read (1'b1),
.write (acquire_en), .write (acquire_en),
`UNUSED_PIN (wren), .wren (1'b1),
.waddr (write_addr), .waddr (write_addr),
.wdata (write_data), .wdata (write_data),
.raddr (read_addr), .raddr (read_addr),
.rdata (read_data) .rdata (read_data)
); );
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,10 +15,10 @@
`TRACING_OFF `TRACING_OFF
module VX_mem_adapter #( module VX_mem_adapter #(
parameter SRC_DATA_WIDTH = 1, parameter SRC_DATA_WIDTH = 1,
parameter SRC_ADDR_WIDTH = 1, parameter SRC_ADDR_WIDTH = 1,
parameter DST_DATA_WIDTH = 1, parameter DST_DATA_WIDTH = 1,
parameter DST_ADDR_WIDTH = 1, parameter DST_ADDR_WIDTH = 1,
parameter SRC_TAG_WIDTH = 1, parameter SRC_TAG_WIDTH = 1,
parameter DST_TAG_WIDTH = 1, parameter DST_TAG_WIDTH = 1,
parameter REQ_OUT_BUF = 0, parameter REQ_OUT_BUF = 0,
@ -35,9 +35,9 @@ module VX_mem_adapter #(
input wire [SRC_TAG_WIDTH-1:0] mem_req_tag_in, input wire [SRC_TAG_WIDTH-1:0] mem_req_tag_in,
output wire mem_req_ready_in, output wire mem_req_ready_in,
output wire mem_rsp_valid_in, output wire mem_rsp_valid_in,
output wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in, output wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in,
output wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in, output wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in,
input wire mem_rsp_ready_in, input wire mem_rsp_ready_in,
output wire mem_req_valid_out, output wire mem_req_valid_out,
@ -48,12 +48,12 @@ module VX_mem_adapter #(
output wire [DST_TAG_WIDTH-1:0] mem_req_tag_out, output wire [DST_TAG_WIDTH-1:0] mem_req_tag_out,
input wire mem_req_ready_out, input wire mem_req_ready_out,
input wire mem_rsp_valid_out, input wire mem_rsp_valid_out,
input wire [DST_DATA_WIDTH-1:0] mem_rsp_data_out, input wire [DST_DATA_WIDTH-1:0] mem_rsp_data_out,
input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_out, input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_out,
output wire mem_rsp_ready_out output wire mem_rsp_ready_out
); );
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!")) `STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
localparam DST_DATA_SIZE = (DST_DATA_WIDTH / 8); localparam DST_DATA_SIZE = (DST_DATA_WIDTH / 8);
localparam DST_LDATAW = `CLOG2(DST_DATA_WIDTH); localparam DST_LDATAW = `CLOG2(DST_DATA_WIDTH);
@ -69,7 +69,7 @@ module VX_mem_adapter #(
wire [DST_TAG_WIDTH-1:0] mem_req_tag_out_w; wire [DST_TAG_WIDTH-1:0] mem_req_tag_out_w;
wire mem_req_ready_out_w; wire mem_req_ready_out_w;
wire mem_rsp_valid_in_w; wire mem_rsp_valid_in_w;
wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in_w; wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in_w;
wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in_w; wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in_w;
wire mem_rsp_ready_in_w; wire mem_rsp_ready_in_w;
@ -80,7 +80,7 @@ module VX_mem_adapter #(
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
wire [D-1:0] req_idx = mem_req_addr_in[D-1:0]; wire [D-1:0] req_idx = mem_req_addr_in[D-1:0];
wire [D-1:0] rsp_idx = mem_rsp_tag_out[D-1:0]; wire [D-1:0] rsp_idx = mem_rsp_tag_out[D-1:0];
@ -99,31 +99,31 @@ module VX_mem_adapter #(
assign mem_req_valid_out_w = mem_req_valid_in; assign mem_req_valid_out_w = mem_req_valid_in;
assign mem_req_rw_out_w = mem_req_rw_in; assign mem_req_rw_out_w = mem_req_rw_in;
assign mem_req_byteen_out_w = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3)); assign mem_req_byteen_out_w = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3));
assign mem_req_data_out_w = DST_DATA_WIDTH'(mem_req_data_in) << ((DST_LDATAW'(req_idx)) << SRC_LDATAW); assign mem_req_data_out_w = DST_DATA_WIDTH'(mem_req_data_in) << ((DST_LDATAW'(req_idx)) << SRC_LDATAW);
assign mem_req_tag_out_w = DST_TAG_WIDTH'({mem_req_tag_in, req_idx}); assign mem_req_tag_out_w = DST_TAG_WIDTH'({mem_req_tag_in, req_idx});
assign mem_req_ready_in = mem_req_ready_out_w; assign mem_req_ready_in = mem_req_ready_out_w;
assign mem_rsp_valid_in_w = mem_rsp_valid_out; assign mem_rsp_valid_in_w = mem_rsp_valid_out;
assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx]; assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx];
assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]); assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]);
assign mem_rsp_ready_out = mem_rsp_ready_in_w; assign mem_rsp_ready_out = mem_rsp_ready_in_w;
end else if (DST_LDATAW < SRC_LDATAW) begin end else if (DST_LDATAW < SRC_LDATAW) begin
reg [D-1:0] req_ctr, rsp_ctr; reg [D-1:0] req_ctr, rsp_ctr;
reg [P-1:0][DST_DATA_WIDTH-1:0] mem_rsp_data_out_r, mem_rsp_data_out_n; reg [P-1:0][DST_DATA_WIDTH-1:0] mem_rsp_data_out_r, mem_rsp_data_out_n;
wire mem_req_out_fire = mem_req_valid_out && mem_req_ready_out; wire mem_req_out_fire = mem_req_valid_out && mem_req_ready_out;
wire mem_rsp_in_fire = mem_rsp_valid_out && mem_rsp_ready_out; wire mem_rsp_in_fire = mem_rsp_valid_out && mem_rsp_ready_out;
wire [P-1:0][DST_DATA_WIDTH-1:0] mem_req_data_in_w = mem_req_data_in; wire [P-1:0][DST_DATA_WIDTH-1:0] mem_req_data_in_w = mem_req_data_in;
wire [P-1:0][DST_DATA_SIZE-1:0] mem_req_byteen_in_w = mem_req_byteen_in; wire [P-1:0][DST_DATA_SIZE-1:0] mem_req_byteen_in_w = mem_req_byteen_in;
always @(*) begin always @(*) begin
mem_rsp_data_out_n = mem_rsp_data_out_r; mem_rsp_data_out_n = mem_rsp_data_out_r;
if (mem_rsp_in_fire) begin if (mem_rsp_in_fire) begin
mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_out; mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_out;
end end
end end
@ -139,24 +139,24 @@ module VX_mem_adapter #(
if (mem_rsp_in_fire) begin if (mem_rsp_in_fire) begin
rsp_ctr <= rsp_ctr + 1; rsp_ctr <= rsp_ctr + 1;
end end
end end
mem_rsp_data_out_r <= mem_rsp_data_out_n; mem_rsp_data_out_r <= mem_rsp_data_out_n;
end end
reg [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_r; reg [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_r;
wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_x; wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_x;
always @(posedge clk) begin always @(posedge clk) begin
if (mem_rsp_in_fire) begin if (mem_rsp_in_fire) begin
mem_rsp_tag_in_r <= mem_rsp_tag_out; mem_rsp_tag_in_r <= mem_rsp_tag_out;
end end
end end
assign mem_rsp_tag_in_x = (rsp_ctr != 0) ? mem_rsp_tag_in_r : mem_rsp_tag_out; assign mem_rsp_tag_in_x = (rsp_ctr != 0) ? mem_rsp_tag_in_r : mem_rsp_tag_out;
`RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_x == mem_rsp_tag_out), `RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_x == mem_rsp_tag_out),
("%t: *** out-of-order memory reponse! cur=%d, expected=%d", $time, mem_rsp_tag_in_x, mem_rsp_tag_out)) ("%t: *** out-of-order memory reponse! cur=%d, expected=%d", $time, mem_rsp_tag_in_x, mem_rsp_tag_out))
wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr}; wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr};
if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin
`UNUSED_VAR (mem_req_addr_in_qual) `UNUSED_VAR (mem_req_addr_in_qual)
assign mem_req_addr_out_w = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0]; assign mem_req_addr_out_w = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0];
@ -181,8 +181,8 @@ module VX_mem_adapter #(
end else begin end else begin
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin
`UNUSED_VAR (mem_req_addr_in) `UNUSED_VAR (mem_req_addr_in)
assign mem_req_addr_out_w = mem_req_addr_in[DST_ADDR_WIDTH-1:0]; assign mem_req_addr_out_w = mem_req_addr_in[DST_ADDR_WIDTH-1:0];

View file

@ -87,16 +87,16 @@ module VX_mem_coalescer #(
localparam STATE_SETUP = 0; localparam STATE_SETUP = 0;
localparam STATE_SEND = 1; localparam STATE_SEND = 1;
reg state_r, state_n; logic state_r, state_n;
reg out_req_valid_r, out_req_valid_n; logic out_req_valid_r, out_req_valid_n;
reg out_req_rw_r, out_req_rw_n; logic out_req_rw_r, out_req_rw_n;
reg [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n; logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
reg [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n; logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
reg [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n; logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n; logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n; logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
reg [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n; logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
reg in_req_ready_n; reg in_req_ready_n;
@ -135,7 +135,11 @@ module VX_mem_coalescer #(
`UNUSED_PIN (onehot), `UNUSED_PIN (onehot),
.valid_out (batch_valid_n[i]) .valid_out (batch_valid_n[i])
); );
assign seed_idx[i] = NUM_REQS_W'(i * DATA_RATIO) + NUM_REQS_W'(batch_idx); if (OUT_REQS > 1) begin
assign seed_idx[i] = {(NUM_REQS_W-DATA_RATIO_W)'(i), batch_idx};
end else begin
assign seed_idx[i] = batch_idx;
end
end end
for (genvar i = 0; i < OUT_REQS; ++i) begin for (genvar i = 0; i < OUT_REQS; ++i) begin
@ -149,29 +153,6 @@ module VX_mem_coalescer #(
end end
end end
always @(posedge clk) begin
if (reset) begin
state_r <= STATE_SETUP;
processed_mask_r <= '0;
out_req_valid_r <= 0;
end else begin
state_r <= state_n;
batch_valid_r <= batch_valid_n;
seed_addr_r <= seed_addr_n;
seed_atype_r <= seed_atype_n;
addr_matches_r <= addr_matches_n;
out_req_valid_r <= out_req_valid_n;
out_req_mask_r <= out_req_mask_n;
out_req_rw_r <= out_req_rw_n;
out_req_addr_r <= out_req_addr_n;
out_req_atype_r <= out_req_atype_n;
out_req_byteen_r <= out_req_byteen_n;
out_req_data_r <= out_req_data_n;
out_req_tag_r <= out_req_tag_n;
processed_mask_r <= processed_mask_n;
end
end
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r; wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged; reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged;
@ -248,6 +229,17 @@ module VX_mem_coalescer #(
endcase endcase
end end
VX_pipe_register #(
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + ATYPE_WIDTH + OUT_ADDR_WIDTH + ATYPE_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
.RESETW (1 + NUM_REQS + 1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (1'b1),
.data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_atype_n, out_req_addr_n, out_req_atype_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}),
.data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_atype_r, out_req_addr_r, out_req_atype_r, out_req_byteen_r, out_req_data_r, out_req_tag_r})
);
wire out_rsp_fire = out_rsp_valid && out_rsp_ready; wire out_rsp_fire = out_rsp_valid && out_rsp_ready;
wire out_rsp_eop; wire out_rsp_eop;

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -23,13 +23,13 @@ module VX_onehot_encoder #(
parameter MODEL = 1, parameter MODEL = 1,
parameter LN = `LOG2UP(N) parameter LN = `LOG2UP(N)
) ( ) (
input wire [N-1:0] data_in, input wire [N-1:0] data_in,
output wire [LN-1:0] data_out, output wire [LN-1:0] data_out,
output wire valid_out output wire valid_out
); );
if (N == 1) begin if (N == 1) begin
assign data_out = data_in; assign data_out = 0;
assign valid_out = data_in; assign valid_out = data_in;
end else if (N == 2) begin end else if (N == 2) begin
@ -37,43 +37,43 @@ module VX_onehot_encoder #(
assign data_out = data_in[!REVERSE]; assign data_out = data_in[!REVERSE];
assign valid_out = (| data_in); assign valid_out = (| data_in);
end else if (MODEL == 1) begin end else if (MODEL == 1) begin
localparam M = 1 << LN; localparam M = 1 << LN;
`IGNORE_UNOPTFLAT_BEGIN `IGNORE_UNOPTFLAT_BEGIN
wire [LN-1:0][M-1:0] addr; wire [LN-1:0][M-1:0] addr;
wire [LN:0][M-1:0] v; wire [LN:0][M-1:0] v;
`IGNORE_UNOPTFLAT_END `IGNORE_UNOPTFLAT_END
// base case, also handle padding for non-power of two inputs // base case, also handle padding for non-power of two inputs
assign v[0] = REVERSE ? (M'(data_in) << (M - N)) : M'(data_in); assign v[0] = REVERSE ? (M'(data_in) << (M - N)) : M'(data_in);
for (genvar lvl = 1; lvl < (LN+1); ++lvl) begin for (genvar lvl = 1; lvl < (LN+1); ++lvl) begin
localparam SN = 1 << (LN - lvl); localparam SN = 1 << (LN - lvl);
localparam SI = M / SN; localparam SI = M / SN;
localparam SW = lvl; localparam SW = lvl;
for (genvar s = 0; s < SN; ++s) begin for (genvar s = 0; s < SN; ++s) begin
`IGNORE_UNOPTFLAT_BEGIN `IGNORE_UNOPTFLAT_BEGIN
wire [1:0] vs = {v[lvl-1][s*SI+(SI>>1)], v[lvl-1][s*SI]}; wire [1:0] vs = {v[lvl-1][s*SI+(SI>>1)], v[lvl-1][s*SI]};
`IGNORE_UNOPTFLAT_END `IGNORE_UNOPTFLAT_END
assign v[lvl][s*SI] = (| vs); assign v[lvl][s*SI] = (| vs);
if (lvl == 1) begin if (lvl == 1) begin
assign addr[lvl-1][s*SI +: SW] = vs[!REVERSE]; assign addr[lvl-1][s*SI +: SW] = vs[!REVERSE];
end else begin end else begin
assign addr[lvl-1][s*SI +: SW] = { assign addr[lvl-1][s*SI +: SW] = {
vs[!REVERSE], vs[!REVERSE],
addr[lvl-2][s*SI +: SW-1] | addr[lvl-2][s*SI+(SI>>1) +: SW-1] addr[lvl-2][s*SI +: SW-1] | addr[lvl-2][s*SI+(SI>>1) +: SW-1]
}; };
end end
end end
end end
assign data_out = addr[LN-1][LN-1:0]; assign data_out = addr[LN-1][LN-1:0];
assign valid_out = v[LN][0]; assign valid_out = v[LN][0];
end else if (MODEL == 2 && REVERSE == 0) begin end else if (MODEL == 2 && REVERSE == 0) begin
for (genvar j = 0; j < LN; ++j) begin for (genvar j = 0; j < LN; ++j) begin
wire [N-1:0] mask; wire [N-1:0] mask;
@ -90,19 +90,19 @@ module VX_onehot_encoder #(
reg [LN-1:0] index_r; reg [LN-1:0] index_r;
if (REVERSE != 0) begin if (REVERSE != 0) begin
always @(*) begin always @(*) begin
index_r = 'x; index_r = 'x;
for (integer i = N-1; i >= 0; --i) begin for (integer i = N-1; i >= 0; --i) begin
if (data_in[i]) begin if (data_in[i]) begin
index_r = LN'(N-1-i); index_r = LN'(N-1-i);
end end
end end
end end
end else begin end else begin
always @(*) begin always @(*) begin
index_r = 'x; index_r = 'x;
for (integer i = 0; i < N; ++i) begin for (integer i = 0; i < N; ++i) begin
if (data_in[i]) begin if (data_in[i]) begin
index_r = LN'(i); index_r = LN'(i);
end end
end end

View file

@ -17,7 +17,8 @@
module VX_onehot_mux #( module VX_onehot_mux #(
parameter DATAW = 1, parameter DATAW = 1,
parameter N = 1, parameter N = 1,
parameter MODEL = 1 parameter MODEL = 1,
parameter LUT_OPT = 0
) ( ) (
input wire [N-1:0][DATAW-1:0] data_in, input wire [N-1:0][DATAW-1:0] data_in,
input wire [N-1:0] sel_in, input wire [N-1:0] sel_in,
@ -26,6 +27,90 @@ module VX_onehot_mux #(
if (N == 1) begin if (N == 1) begin
`UNUSED_VAR (sel_in) `UNUSED_VAR (sel_in)
assign data_out = data_in; assign data_out = data_in;
end else if (LUT_OPT && N == 2) begin
`UNUSED_VAR (sel_in)
assign data_out = sel_in[0] ? data_in[0] : data_in[1];
end else if (LUT_OPT && N == 3) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
3'b001: data_out_r = data_in[0];
3'b010: data_out_r = data_in[1];
3'b100: data_out_r = data_in[2];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (LUT_OPT && N == 4) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
4'b0001: data_out_r = data_in[0];
4'b0010: data_out_r = data_in[1];
4'b0100: data_out_r = data_in[2];
4'b1000: data_out_r = data_in[3];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (LUT_OPT && N == 5) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
5'b00001: data_out_r = data_in[0];
5'b00010: data_out_r = data_in[1];
5'b00100: data_out_r = data_in[2];
5'b01000: data_out_r = data_in[3];
5'b10000: data_out_r = data_in[4];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (LUT_OPT && N == 6) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
6'b000001: data_out_r = data_in[0];
6'b000010: data_out_r = data_in[1];
6'b000100: data_out_r = data_in[2];
6'b001000: data_out_r = data_in[3];
6'b010000: data_out_r = data_in[4];
6'b100000: data_out_r = data_in[5];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (LUT_OPT && N == 7) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
7'b0000001: data_out_r = data_in[0];
7'b0000010: data_out_r = data_in[1];
7'b0000100: data_out_r = data_in[2];
7'b0001000: data_out_r = data_in[3];
7'b0010000: data_out_r = data_in[4];
7'b0100000: data_out_r = data_in[5];
7'b1000000: data_out_r = data_in[6];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (LUT_OPT && N == 8) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
8'b00000001: data_out_r = data_in[0];
8'b00000010: data_out_r = data_in[1];
8'b00000100: data_out_r = data_in[2];
8'b00001000: data_out_r = data_in[3];
8'b00010000: data_out_r = data_in[4];
8'b00100000: data_out_r = data_in[5];
8'b01000000: data_out_r = data_in[6];
8'b10000000: data_out_r = data_in[7];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (MODEL == 1) begin end else if (MODEL == 1) begin
wire [N-1:0][DATAW-1:0] mask; wire [N-1:0][DATAW-1:0] mask;
for (genvar i = 0; i < N; ++i) begin for (genvar i = 0; i < N; ++i) begin

View file

@ -21,7 +21,8 @@ module VX_pe_serializer #(
parameter DATA_IN_WIDTH = 1, parameter DATA_IN_WIDTH = 1,
parameter DATA_OUT_WIDTH = 1, parameter DATA_OUT_WIDTH = 1,
parameter TAG_WIDTH = 0, parameter TAG_WIDTH = 0,
parameter PE_REG = 0 parameter PE_REG = 0,
parameter OUT_BUF = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -43,6 +44,11 @@ module VX_pe_serializer #(
output wire [TAG_WIDTH-1:0] tag_out, output wire [TAG_WIDTH-1:0] tag_out,
input wire ready_out input wire ready_out
); );
wire valid_out_u;
wire [NUM_LANES-1:0][DATA_OUT_WIDTH-1:0] data_out_u;
wire [TAG_WIDTH-1:0] tag_out_u;
wire ready_out_u;
wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s; wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s;
wire valid_out_s; wire valid_out_s;
wire [TAG_WIDTH-1:0] tag_out_s; wire [TAG_WIDTH-1:0] tag_out_s;
@ -105,7 +111,7 @@ module VX_pe_serializer #(
reg [TAG_WIDTH-1:0] tag_out_r; reg [TAG_WIDTH-1:0] tag_out_r;
wire valid_out_b = valid_out_s && batch_out_done; wire valid_out_b = valid_out_s && batch_out_done;
wire ready_out_b = ready_out || ~valid_out; wire ready_out_b = ready_out_u || ~valid_out_u;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
@ -119,29 +125,42 @@ module VX_pe_serializer #(
end end
end end
assign enable = ready_out_b || ~valid_out_b; assign enable = ready_out_b || ~valid_out_b;
assign ready_in = enable && batch_in_done; assign ready_in = enable && batch_in_done;
assign pe_enable = enable;
assign pe_enable = enable; assign valid_out_u = valid_out_r;
assign data_out_u = data_out_r;
assign valid_out = valid_out_r; assign tag_out_u = tag_out_r;
assign data_out = data_out_r;
assign tag_out = tag_out_r;
end else begin end else begin
assign pe_data_in_s = data_in; assign pe_data_in_s = data_in;
assign enable = ready_out || ~valid_out; assign enable = ready_out_u || ~valid_out_u;
assign ready_in = enable; assign ready_in = enable;
assign pe_enable = enable;
assign pe_enable = enable; assign valid_out_u = valid_out_s;
assign data_out_u = pe_data_out;
assign valid_out = valid_out_s; assign tag_out_u = tag_out_s;
assign data_out = pe_data_out;
assign tag_out = tag_out_s;
end end
VX_elastic_buffer #(
.DATAW (NUM_LANES * DATA_OUT_WIDTH + TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
) out_buf (
.clk (clk),
.reset (reset),
.valid_in (valid_out_u),
.ready_in (ready_out_u),
.data_in ({data_out_u, tag_out_u}),
.data_out ({data_out, tag_out}),
.valid_out (valid_out),
.ready_out (ready_out)
);
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -1,11 +1,11 @@
// Copyright 2019-2023 // Copyright 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// //
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -24,39 +24,53 @@
`TRACING_OFF `TRACING_OFF
module VX_pipe_buffer #( module VX_pipe_buffer #(
parameter DATAW = 1, parameter DATAW = 1,
parameter PASSTHRU = 0 parameter DEPTH = 1
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire valid_in, input wire valid_in,
output wire ready_in, output wire ready_in,
input wire [DATAW-1:0] data_in, input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out, output wire [DATAW-1:0] data_out,
input wire ready_out, input wire ready_out,
output wire valid_out output wire valid_out
); );
if (PASSTHRU != 0) begin if (DEPTH == 0) begin
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
assign ready_in = ready_out; assign ready_in = ready_out;
assign valid_out = valid_in; assign valid_out = valid_in;
assign data_out = data_in; assign data_out = data_in;
end else begin end else begin
wire stall = valid_out && ~ready_out; wire [DEPTH:0] valid;
`IGNORE_UNOPTFLAT_BEGIN
wire [DEPTH:0] ready;
`IGNORE_UNOPTFLAT_END
wire [DEPTH:0][DATAW-1:0] data;
VX_pipe_register #( assign valid[0] = valid_in;
.DATAW (1 + DATAW), assign data[0] = data_in;
.RESETW (1) assign ready_in = ready[0];
) pipe_register (
.clk (clk), for (genvar i = 0; i < DEPTH; ++i) begin
.reset (reset), assign ready[i] = (ready[i+1] || ~valid[i+1]);
.enable (~stall), VX_pipe_register #(
.data_in ({valid_in, data_in}), .DATAW (1 + DATAW),
.data_out ({valid_out, data_out}) .RESETW (1)
); ) pipe_register (
.clk (clk),
.reset (reset),
.enable (ready[i]),
.data_in ({valid[i], data[i]}),
.data_out ({valid[i+1], data[i+1]})
);
end
assign valid_out = valid[DEPTH];
assign data_out = data[DEPTH];
assign ready[DEPTH] = ready_out;
assign ready_in = ~stall;
end end
endmodule endmodule

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,10 +14,11 @@
`include "VX_platform.vh" `include "VX_platform.vh"
`TRACING_OFF `TRACING_OFF
module VX_pipe_register #( module VX_pipe_register #(
parameter DATAW = 1, parameter DATAW = 1,
parameter RESETW = 0, parameter RESETW = 0,
parameter DEPTH = 1 parameter DEPTH = 1,
parameter MAX_FANOUT = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -25,54 +26,76 @@ module VX_pipe_register #(
input wire [DATAW-1:0] data_in, input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out output wire [DATAW-1:0] data_out
); );
if (DEPTH == 0) begin if (DEPTH == 0) begin
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
`UNUSED_VAR (enable) `UNUSED_VAR (enable)
assign data_out = data_in; assign data_out = data_in;
end else if (DEPTH == 1) begin end else if (DEPTH == 1) begin
if (RESETW == 0) begin if (MAX_FANOUT != 0 && (DATAW > (MAX_FANOUT + MAX_FANOUT/2))) begin
`UNUSED_VAR (reset) localparam NUM_SLICES = `CDIV(DATAW, MAX_FANOUT);
reg [DATAW-1:0] value; localparam N_DATAW = DATAW / NUM_SLICES;
for (genvar i = 0; i < NUM_SLICES; ++i) begin
always @(posedge clk) begin localparam SLICE_START = i * N_DATAW;
if (enable) begin localparam SLICE_END = SLICE_START + S_DATAW - 1;
value <= data_in; localparam S_DATAW = (i == NUM_SLICES-1) ? (DATAW - SLICE_START) : N_DATAW;
end localparam S_RESETW = (SLICE_END >= (DATAW - RESETW)) ?
((SLICE_START >= (DATAW - RESETW)) ? S_DATAW : (SLICE_END - (DATAW - RESETW) + 1)) : 0;
VX_pipe_register #(
.DATAW (S_DATAW),
.RESETW (S_RESETW)
) pipe_register_slice (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in (data_in[i * N_DATAW +: S_DATAW]),
.data_out (data_out[i * N_DATAW +: S_DATAW])
);
end end
assign data_out = value;
end else if (RESETW == DATAW) begin
reg [DATAW-1:0] value;
always @(posedge clk) begin
if (reset) begin
value <= RESETW'(0);
end else if (enable) begin
value <= data_in;
end
end
assign data_out = value;
end else begin end else begin
reg [DATAW-RESETW-1:0] value_d; if (RESETW == 0) begin
reg [RESETW-1:0] value_r; `UNUSED_VAR (reset)
reg [DATAW-1:0] value;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (enable) begin
value_r <= RESETW'(0); value <= data_in;
end else if (enable) begin end
value_r <= data_in[DATAW-1:DATAW-RESETW];
end end
assign data_out = value;
end else if (RESETW == DATAW) begin
reg [DATAW-1:0] value;
always @(posedge clk) begin
if (reset) begin
value <= RESETW'(0);
end else if (enable) begin
value <= data_in;
end
end
assign data_out = value;
end else begin
reg [DATAW-RESETW-1:0] value_d;
reg [RESETW-1:0] value_r;
always @(posedge clk) begin
if (reset) begin
value_r <= RESETW'(0);
end else if (enable) begin
value_r <= data_in[DATAW-1:DATAW-RESETW];
end
end
always @(posedge clk) begin
if (enable) begin
value_d <= data_in[DATAW-RESETW-1:0];
end
end
assign data_out = {value_r, value_d};
end end
always @(posedge clk) begin
if (enable) begin
value_d <= data_in[DATAW-RESETW-1:0];
end
end
assign data_out = {value_r, value_d};
end end
end else begin end else begin
wire [DEPTH:0][DATAW-1:0] data_delayed; wire [DEPTH:0][DATAW-1:0] data_delayed;
assign data_delayed[0] = data_in; assign data_delayed[0] = data_in;
for (genvar i = 1; i <= DEPTH; ++i) begin for (genvar i = 1; i <= DEPTH; ++i) begin
VX_pipe_register #( VX_pipe_register #(

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -21,8 +21,8 @@ module VX_reset_relay #(
input wire clk, input wire clk,
input wire reset, input wire reset,
output wire [N-1:0] reset_o output wire [N-1:0] reset_o
); );
if (MAX_FANOUT >= 0 && N > MAX_FANOUT) begin if (MAX_FANOUT >= 0 && N > (MAX_FANOUT + MAX_FANOUT/2)) begin
localparam F = `UP(MAX_FANOUT); localparam F = `UP(MAX_FANOUT);
localparam R = N / F; localparam R = N / F;
`PRESERVE_NET reg [R-1:0] reset_r; `PRESERVE_NET reg [R-1:0] reset_r;
@ -38,6 +38,6 @@ module VX_reset_relay #(
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
assign reset_o = {N{reset}}; assign reset_o = {N{reset}};
end end
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -15,9 +15,10 @@
`TRACING_OFF `TRACING_OFF
module VX_rr_arbiter #( module VX_rr_arbiter #(
parameter NUM_REQS = 1, parameter NUM_REQS = 1,
parameter MODEL = 1, parameter MODEL = 1,
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS),
parameter LUT_OPT = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -37,7 +38,7 @@ module VX_rr_arbiter #(
assign grant_onehot = requests; assign grant_onehot = requests;
assign grant_valid = requests[0]; assign grant_valid = requests[0];
end else if (NUM_REQS == 2) begin end else if (LUT_OPT && NUM_REQS == 2) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
@ -63,7 +64,7 @@ module VX_rr_arbiter #(
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end /*else if (NUM_REQS == 3) begin end else if (LUT_OPT && NUM_REQS == 3) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
@ -93,7 +94,7 @@ module VX_rr_arbiter #(
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end */else if (NUM_REQS == 4) begin end else if (LUT_OPT && NUM_REQS == 4) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
@ -129,7 +130,7 @@ module VX_rr_arbiter #(
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end /*else if (NUM_REQS == 5) begin end else if (LUT_OPT && NUM_REQS == 5) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
@ -173,7 +174,7 @@ module VX_rr_arbiter #(
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end else if (NUM_REQS == 6) begin end else if (LUT_OPT && NUM_REQS == 6) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
@ -227,7 +228,7 @@ module VX_rr_arbiter #(
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end else if (NUM_REQS == 7) begin end else if (LUT_OPT && NUM_REQS == 7) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;
@ -293,7 +294,7 @@ module VX_rr_arbiter #(
assign grant_onehot = grant_onehot_r; assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests); assign grant_valid = (| requests);
end */else if (NUM_REQS == 8) begin end else if (LUT_OPT && NUM_REQS == 8) begin
reg [LOG_NUM_REQS-1:0] grant_index_r; reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r; reg [NUM_REQS-1:0] grant_onehot_r;

View file

@ -21,13 +21,16 @@ module VX_sp_ram #(
parameter WRENW = 1, parameter WRENW = 1,
parameter OUT_REG = 0, parameter OUT_REG = 0,
parameter NO_RWCHECK = 0, parameter NO_RWCHECK = 0,
parameter RW_ASSERT = 0,
parameter LUTRAM = 0, parameter LUTRAM = 0,
parameter RESET_RAM = 0,
parameter INIT_ENABLE = 0, parameter INIT_ENABLE = 0,
parameter INIT_FILE = "", parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0, parameter [DATAW-1:0] INIT_VALUE = 0,
parameter ADDRW = `LOG2UP(SIZE) parameter ADDRW = `LOG2UP(SIZE)
) ( ) (
input wire clk, input wire clk,
input wire reset,
input wire read, input wire read,
input wire write, input wire write,
input wire [WRENW-1:0] wren, input wire [WRENW-1:0] wren,
@ -42,13 +45,16 @@ module VX_sp_ram #(
.WRENW (WRENW), .WRENW (WRENW),
.OUT_REG (OUT_REG), .OUT_REG (OUT_REG),
.NO_RWCHECK (NO_RWCHECK), .NO_RWCHECK (NO_RWCHECK),
.RW_ASSERT (RW_ASSERT),
.LUTRAM (LUTRAM), .LUTRAM (LUTRAM),
.RESET_RAM (RESET_RAM),
.INIT_ENABLE (INIT_ENABLE), .INIT_ENABLE (INIT_ENABLE),
.INIT_FILE (INIT_FILE), .INIT_FILE (INIT_FILE),
.INIT_VALUE (INIT_VALUE), .INIT_VALUE (INIT_VALUE),
.ADDRW (ADDRW) .ADDRW (ADDRW)
) dp_ram ( ) dp_ram (
.clk (clk), .clk (clk),
.reset (reset),
.read (read), .read (read),
.write (write), .write (write),
.wren (wren), .wren (wren),

View file

@ -18,7 +18,7 @@ module VX_stream_arb #(
parameter NUM_INPUTS = 1, parameter NUM_INPUTS = 1,
parameter NUM_OUTPUTS = 1, parameter NUM_OUTPUTS = 1,
parameter DATAW = 1, parameter DATAW = 1,
parameter `STRING ARBITER = "P", parameter `STRING ARBITER = "R",
parameter MAX_FANOUT = `MAX_FANOUT, parameter MAX_FANOUT = `MAX_FANOUT,
parameter OUT_BUF = 0, parameter OUT_BUF = 0,
parameter LUTRAM = 0, parameter LUTRAM = 0,
@ -46,14 +46,14 @@ module VX_stream_arb #(
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
localparam BATCH_BEGIN = i * NUM_REQS; localparam SLICE_BEGIN = i * NUM_REQS;
localparam BATCH_END = `MIN(BATCH_BEGIN + NUM_REQS, NUM_INPUTS); localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_INPUTS);
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN; localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
`RESET_RELAY (slice_reset, reset); `RESET_RELAY (slice_reset, reset);
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (BATCH_SIZE), .NUM_INPUTS (SLICE_SIZE),
.NUM_OUTPUTS (1), .NUM_OUTPUTS (1),
.DATAW (DATAW), .DATAW (DATAW),
.ARBITER (ARBITER), .ARBITER (ARBITER),
@ -63,9 +63,9 @@ module VX_stream_arb #(
) arb_slice ( ) arb_slice (
.clk (clk), .clk (clk),
.reset (slice_reset), .reset (slice_reset),
.valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]), .valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]),
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]), .ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]),
.data_in (data_in[BATCH_END-1: BATCH_BEGIN]), .data_in (data_in[SLICE_END-1: SLICE_BEGIN]),
.data_out (data_out[i]), .data_out (data_out[i]),
.sel_out (sel_out[i]), .sel_out (sel_out[i]),
.valid_out (valid_out[i]), .valid_out (valid_out[i]),
@ -73,32 +73,32 @@ module VX_stream_arb #(
); );
end end
end else if (MAX_FANOUT != 0 && (NUM_INPUTS > MAX_FANOUT)) begin end else if (MAX_FANOUT != 0 && (NUM_INPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin
// (#inputs > max_fanout) and (#outputs == 1) // (#inputs > max_fanout) and (#outputs == 1)
localparam NUM_BATCHES = `CDIV(NUM_INPUTS, MAX_FANOUT); localparam NUM_SLICES = `CDIV(NUM_INPUTS, MAX_FANOUT);
localparam LOG_NUM_REQS2 = `CLOG2(MAX_FANOUT); localparam LOG_NUM_REQS2 = `CLOG2(MAX_FANOUT);
localparam LOG_NUM_REQS3 = `CLOG2(NUM_BATCHES); localparam LOG_NUM_REQS3 = `CLOG2(NUM_SLICES);
wire [NUM_BATCHES-1:0] valid_tmp; wire [NUM_SLICES-1:0] valid_tmp;
wire [NUM_BATCHES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp; wire [NUM_SLICES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp;
wire [NUM_BATCHES-1:0] ready_tmp; wire [NUM_SLICES-1:0] ready_tmp;
for (genvar i = 0; i < NUM_BATCHES; ++i) begin for (genvar i = 0; i < NUM_SLICES; ++i) begin
localparam BATCH_BEGIN = i * MAX_FANOUT; localparam SLICE_BEGIN = i * MAX_FANOUT;
localparam BATCH_END = `MIN(BATCH_BEGIN + MAX_FANOUT, NUM_INPUTS); localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_INPUTS);
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN; localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
wire [DATAW-1:0] data_tmp_u; wire [DATAW-1:0] data_tmp_u;
wire [`LOG2UP(BATCH_SIZE)-1:0] sel_tmp_u; wire [`LOG2UP(SLICE_SIZE)-1:0] sel_tmp_u;
`RESET_RELAY (slice_reset, reset); `RESET_RELAY (slice_reset, reset);
if (MAX_FANOUT != 1) begin if (MAX_FANOUT != 1) begin
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (BATCH_SIZE), .NUM_INPUTS (SLICE_SIZE),
.NUM_OUTPUTS (1), .NUM_OUTPUTS (1),
.DATAW (DATAW), .DATAW (DATAW),
.ARBITER (ARBITER), .ARBITER (ARBITER),
@ -108,9 +108,9 @@ module VX_stream_arb #(
) fanout_slice_arb ( ) fanout_slice_arb (
.clk (clk), .clk (clk),
.reset (slice_reset), .reset (slice_reset),
.valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]), .valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]),
.data_in (data_in[BATCH_END-1: BATCH_BEGIN]), .data_in (data_in[SLICE_END-1: SLICE_BEGIN]),
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]), .ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]),
.valid_out (valid_tmp[i]), .valid_out (valid_tmp[i]),
.data_out (data_tmp_u), .data_out (data_tmp_u),
.sel_out (sel_tmp_u), .sel_out (sel_tmp_u),
@ -125,7 +125,7 @@ module VX_stream_arb #(
wire [LOG_NUM_REQS3-1:0] sel_out_u; wire [LOG_NUM_REQS3-1:0] sel_out_u;
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (NUM_BATCHES), .NUM_INPUTS (NUM_SLICES),
.NUM_OUTPUTS (1), .NUM_OUTPUTS (1),
.DATAW (DATAW + LOG_NUM_REQS2), .DATAW (DATAW + LOG_NUM_REQS2),
.ARBITER (ARBITER), .ARBITER (ARBITER),
@ -174,17 +174,9 @@ module VX_stream_arb #(
); );
assign valid_in_r = arb_valid; assign valid_in_r = arb_valid;
assign data_in_r = data_in[arb_index];
assign arb_ready = ready_in_r; assign arb_ready = ready_in_r;
VX_onehot_mux #(
.DATAW (DATAW),
.N (NUM_REQS)
) onehot_mux (
.data_in (data_in),
.sel_in (arb_onehot),
.data_out (data_in_r)
);
for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar i = 0; i < NUM_REQS; ++i) begin
assign ready_in[i] = ready_in_r && arb_onehot[i]; assign ready_in[i] = ready_in_r && arb_onehot[i];
end end
@ -214,15 +206,15 @@ module VX_stream_arb #(
for (genvar i = 0; i < NUM_INPUTS; ++i) begin for (genvar i = 0; i < NUM_INPUTS; ++i) begin
localparam BATCH_BEGIN = i * NUM_REQS; localparam SLICE_BEGIN = i * NUM_REQS;
localparam BATCH_END = `MIN(BATCH_BEGIN + NUM_REQS, NUM_OUTPUTS); localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_OUTPUTS);
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN; localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
`RESET_RELAY (slice_reset, reset); `RESET_RELAY (slice_reset, reset);
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (1), .NUM_INPUTS (1),
.NUM_OUTPUTS (BATCH_SIZE), .NUM_OUTPUTS (SLICE_SIZE),
.DATAW (DATAW), .DATAW (DATAW),
.ARBITER (ARBITER), .ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT), .MAX_FANOUT (MAX_FANOUT),
@ -234,30 +226,30 @@ module VX_stream_arb #(
.valid_in (valid_in[i]), .valid_in (valid_in[i]),
.ready_in (ready_in[i]), .ready_in (ready_in[i]),
.data_in (data_in[i]), .data_in (data_in[i]),
.data_out (data_out[BATCH_END-1: BATCH_BEGIN]), .data_out (data_out[SLICE_END-1: SLICE_BEGIN]),
.valid_out (valid_out[BATCH_END-1: BATCH_BEGIN]), .valid_out (valid_out[SLICE_END-1: SLICE_BEGIN]),
.ready_out (ready_out[BATCH_END-1: BATCH_BEGIN]), .ready_out (ready_out[SLICE_END-1: SLICE_BEGIN]),
`UNUSED_PIN (sel_out) `UNUSED_PIN (sel_out)
); );
for (genvar j = BATCH_BEGIN; j < BATCH_END; ++j) begin for (genvar j = SLICE_BEGIN; j < SLICE_END; ++j) begin
assign sel_out[j] = i; assign sel_out[j] = i;
end end
end end
end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > MAX_FANOUT)) begin end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin
// (#inputs == 1) and (#outputs > max_fanout) // (#inputs == 1) and (#outputs > max_fanout)
localparam NUM_BATCHES = `CDIV(NUM_OUTPUTS, MAX_FANOUT); localparam NUM_SLICES = `CDIV(NUM_OUTPUTS, MAX_FANOUT);
wire [NUM_BATCHES-1:0] valid_tmp; wire [NUM_SLICES-1:0] valid_tmp;
wire [NUM_BATCHES-1:0][DATAW-1:0] data_tmp; wire [NUM_SLICES-1:0][DATAW-1:0] data_tmp;
wire [NUM_BATCHES-1:0] ready_tmp; wire [NUM_SLICES-1:0] ready_tmp;
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (1), .NUM_INPUTS (1),
.NUM_OUTPUTS (NUM_BATCHES), .NUM_OUTPUTS (NUM_SLICES),
.DATAW (DATAW), .DATAW (DATAW),
.ARBITER (ARBITER), .ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT), .MAX_FANOUT (MAX_FANOUT),
@ -275,17 +267,17 @@ module VX_stream_arb #(
`UNUSED_PIN (sel_out) `UNUSED_PIN (sel_out)
); );
for (genvar i = 0; i < NUM_BATCHES; ++i) begin for (genvar i = 0; i < NUM_SLICES; ++i) begin
localparam BATCH_BEGIN = i * MAX_FANOUT; localparam SLICE_BEGIN = i * MAX_FANOUT;
localparam BATCH_END = `MIN(BATCH_BEGIN + MAX_FANOUT, NUM_OUTPUTS); localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_OUTPUTS);
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN; localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
`RESET_RELAY (slice_reset, reset); `RESET_RELAY (slice_reset, reset);
VX_stream_arb #( VX_stream_arb #(
.NUM_INPUTS (1), .NUM_INPUTS (1),
.NUM_OUTPUTS (BATCH_SIZE), .NUM_OUTPUTS (SLICE_SIZE),
.DATAW (DATAW), .DATAW (DATAW),
.ARBITER (ARBITER), .ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT), .MAX_FANOUT (MAX_FANOUT),
@ -297,9 +289,9 @@ module VX_stream_arb #(
.valid_in (valid_tmp[i]), .valid_in (valid_tmp[i]),
.ready_in (ready_tmp[i]), .ready_in (ready_tmp[i]),
.data_in (data_tmp[i]), .data_in (data_tmp[i]),
.data_out (data_out[BATCH_END-1: BATCH_BEGIN]), .data_out (data_out[SLICE_END-1: SLICE_BEGIN]),
.valid_out (valid_out[BATCH_END-1: BATCH_BEGIN]), .valid_out (valid_out[SLICE_END-1: SLICE_BEGIN]),
.ready_out (ready_out[BATCH_END-1: BATCH_BEGIN]), .ready_out (ready_out[SLICE_END-1: SLICE_BEGIN]),
`UNUSED_PIN (sel_out) `UNUSED_PIN (sel_out)
); );
end end
@ -357,9 +349,9 @@ module VX_stream_arb #(
// #Inputs == #Outputs // #Inputs == #Outputs
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
`RESET_RELAY_EN (out_buf_reset, reset, (NUM_OUTPUTS > 1)); for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
@ -368,7 +360,7 @@ module VX_stream_arb #(
.LUTRAM (LUTRAM) .LUTRAM (LUTRAM)
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (out_buf_reset), .reset (out_buf_reset[i]),
.valid_in (valid_in[i]), .valid_in (valid_in[i]),
.ready_in (ready_in[i]), .ready_in (ready_in[i]),
.data_in (data_in[i]), .data_in (data_in[i]),

View file

@ -39,8 +39,9 @@ module VX_stream_pack #(
input wire ready_out input wire ready_out
); );
if (NUM_REQS > 1) begin if (NUM_REQS > 1) begin
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS);
wire [NUM_REQS-1:0] grant_onehot; wire [LOG_NUM_REQS-1:0] grant_index;
wire grant_valid; wire grant_valid;
wire grant_ready; wire grant_ready;
@ -52,21 +53,12 @@ module VX_stream_pack #(
.reset (reset), .reset (reset),
.requests (valid_in), .requests (valid_in),
.grant_valid (grant_valid), .grant_valid (grant_valid),
`UNUSED_PIN (grant_index), .grant_index (grant_index),
.grant_onehot(grant_onehot), `UNUSED_PIN (grant_onehot),
.grant_ready (grant_ready) .grant_ready (grant_ready)
); );
wire [TAG_WIDTH-1:0] tag_sel; wire [TAG_WIDTH-1:0] tag_sel = tag_in[grant_index];
VX_onehot_mux #(
.DATAW (TAG_WIDTH),
.N (NUM_REQS)
) onehot_mux (
.data_in (tag_in),
.sel_in (grant_onehot),
.data_out (tag_sel)
);
wire [NUM_REQS-1:0] tag_matches; wire [NUM_REQS-1:0] tag_matches;

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023 // Copyright © 2019-2023
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -33,7 +33,7 @@ module VX_stream_switch #(
output wire [NUM_INPUTS-1:0] ready_in, output wire [NUM_INPUTS-1:0] ready_in,
output wire [NUM_OUTPUTS-1:0] valid_out, output wire [NUM_OUTPUTS-1:0] valid_out,
output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out, output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out,
input wire [NUM_OUTPUTS-1:0] ready_out input wire [NUM_OUTPUTS-1:0] ready_out
); );
if (NUM_INPUTS > NUM_OUTPUTS) begin if (NUM_INPUTS > NUM_OUTPUTS) begin
@ -52,7 +52,7 @@ module VX_stream_switch #(
assign data_in_r[i][j] = '0; assign data_in_r[i][j] = '0;
end end
end end
end end
wire [NUM_OUTPUTS-1:0] valid_out_r; wire [NUM_OUTPUTS-1:0] valid_out_r;
wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_r; wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_r;
@ -65,25 +65,24 @@ module VX_stream_switch #(
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
for (genvar j = 0; j < NUM_REQS; ++j) begin for (genvar j = 0; j < NUM_REQS; ++j) begin
localparam ii = i * NUM_REQS + j; localparam ii = i * NUM_REQS + j;
if (ii < NUM_INPUTS) begin if (ii < NUM_INPUTS) begin
assign ready_in[ii] = ready_out_r[i] & (sel_in[i] == LOG_NUM_REQS'(j)); assign ready_in[ii] = ready_out_r[i] & (sel_in[i] == LOG_NUM_REQS'(j));
end end
end end
end end
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
`RESET_RELAY_EN (out_buf_reset, reset, (NUM_OUTPUTS > 1));
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (out_buf_reset), .reset (out_buf_reset[i]),
.valid_in (valid_out_r[i]), .valid_in (valid_out_r[i]),
.ready_in (ready_out_r[i]), .ready_in (ready_out_r[i]),
.data_in (data_out_r[i]), .data_in (data_out_r[i]),
.data_out (data_out[i]), .data_out (data_out[i]),
@ -93,7 +92,7 @@ module VX_stream_switch #(
end end
end else if (NUM_OUTPUTS > NUM_INPUTS) begin end else if (NUM_OUTPUTS > NUM_INPUTS) begin
wire [NUM_INPUTS-1:0][NUM_REQS-1:0] valid_out_r; wire [NUM_INPUTS-1:0][NUM_REQS-1:0] valid_out_r;
wire [NUM_INPUTS-1:0][NUM_REQS-1:0] ready_out_r; wire [NUM_INPUTS-1:0][NUM_REQS-1:0] ready_out_r;
@ -104,51 +103,50 @@ module VX_stream_switch #(
assign ready_in[i] = ready_out_r[i][sel_in[i]]; assign ready_in[i] = ready_out_r[i][sel_in[i]];
end end
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
for (genvar i = 0; i < NUM_INPUTS; ++i) begin for (genvar i = 0; i < NUM_INPUTS; ++i) begin
for (genvar j = 0; j < NUM_REQS; ++j) begin for (genvar j = 0; j < NUM_REQS; ++j) begin
localparam ii = i * NUM_REQS + j; localparam ii = i * NUM_REQS + j;
if (ii < NUM_OUTPUTS) begin if (ii < NUM_OUTPUTS) begin
`RESET_RELAY (out_buf_reset, reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (out_buf_reset), .reset (out_buf_reset[ii]),
.valid_in (valid_out_r[i][j]), .valid_in (valid_out_r[i][j]),
.ready_in (ready_out_r[i][j]), .ready_in (ready_out_r[i][j]),
.data_in (data_in[i]), .data_in (data_in[i]),
.data_out (data_out[ii]), .data_out (data_out[ii]),
.valid_out (valid_out[ii]), .valid_out (valid_out[ii]),
.ready_out (ready_out[ii]) .ready_out (ready_out[ii])
); );
end else begin end else begin
`UNUSED_VAR (out_buf_reset[ii])
`UNUSED_VAR (valid_out_r[i][j]) `UNUSED_VAR (valid_out_r[i][j])
assign ready_out_r[i][j] = '0; assign ready_out_r[i][j] = '0;
end end
end end
end end
end else begin end else begin
// #Inputs == #Outputs // #Inputs == #Outputs
`UNUSED_VAR (sel_in) `UNUSED_VAR (sel_in)
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
`RESET_RELAY_EN (out_buf_reset, reset, (NUM_OUTPUTS > 1));
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (out_buf_reset), .reset (out_buf_reset[i]),
.valid_in (valid_in[i]), .valid_in (valid_in[i]),
.ready_in (ready_in[i]), .ready_in (ready_in[i]),
.data_in (data_in[i]), .data_in (data_in[i]),
@ -159,6 +157,6 @@ module VX_stream_switch #(
end end
end end
endmodule endmodule
`TRACING_ON `TRACING_ON

View file

@ -20,7 +20,7 @@ module VX_stream_xbar #(
parameter DATAW = 4, parameter DATAW = 4,
parameter IN_WIDTH = `LOG2UP(NUM_INPUTS), parameter IN_WIDTH = `LOG2UP(NUM_INPUTS),
parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS), parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS),
parameter ARBITER = "P", parameter ARBITER = "R",
parameter OUT_BUF = 0, parameter OUT_BUF = 0,
parameter LUTRAM = 0, parameter LUTRAM = 0,
parameter MAX_FANOUT = `MAX_FANOUT, parameter MAX_FANOUT = `MAX_FANOUT,
@ -126,10 +126,9 @@ module VX_stream_xbar #(
assign data_out_r = {NUM_OUTPUTS{data_in}}; assign data_out_r = {NUM_OUTPUTS{data_in}};
assign ready_in = ready_out_r[sel_in]; assign ready_in = ready_out_r[sel_in];
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
`RESET_RELAY (out_buf_reset, reset);
VX_elastic_buffer #( VX_elastic_buffer #(
.DATAW (DATAW), .DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
@ -137,7 +136,7 @@ module VX_stream_xbar #(
.LUTRAM (LUTRAM) .LUTRAM (LUTRAM)
) out_buf ( ) out_buf (
.clk (clk), .clk (clk),
.reset (out_buf_reset), .reset (out_buf_reset[i]),
.valid_in (valid_out_r[i]), .valid_in (valid_out_r[i]),
.ready_in (ready_out_r[i]), .ready_in (ready_out_r[i]),
.data_in (data_out_r[i]), .data_in (data_out_r[i]),

View file

@ -94,7 +94,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx;
wire [NUM_BANKS-1:0] per_bank_req_ready; wire [NUM_BANKS-1:0] per_bank_req_ready;
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_all; wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_aos;
wire [NUM_REQS-1:0] req_valid_in; wire [NUM_REQS-1:0] req_valid_in;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in; wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
@ -111,7 +111,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
req_bank_addr[i], req_bank_addr[i],
mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.byteen,
mem_bus_if[i].req_data.data, mem_bus_if[i].req_data.data,
mem_bus_if[i].req_data.tag}; mem_bus_if[i].req_data.tag
};
assign mem_bus_if[i].req_ready = req_ready_in[i]; assign mem_bus_if[i].req_ready = req_ready_in[i];
end end
@ -120,6 +121,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
.NUM_OUTPUTS (NUM_BANKS), .NUM_OUTPUTS (NUM_BANKS),
.DATAW (REQ_DATAW), .DATAW (REQ_DATAW),
.PERF_CTR_BITS (`PERF_CTR_BITS), .PERF_CTR_BITS (`PERF_CTR_BITS),
.ARBITER ("F"),
.OUT_BUF (3) // output should be registered for the data_store addressing .OUT_BUF (3) // output should be registered for the data_store addressing
) req_xbar ( ) req_xbar (
.clk (clk), .clk (clk),
@ -134,7 +136,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
.sel_in (req_bank_idx), .sel_in (req_bank_idx),
.ready_in (req_ready_in), .ready_in (req_ready_in),
.valid_out (per_bank_req_valid), .valid_out (per_bank_req_valid),
.data_out (per_bank_req_data_all), .data_out (per_bank_req_data_aos),
.sel_out (per_bank_req_idx), .sel_out (per_bank_req_idx),
.ready_out (per_bank_req_ready) .ready_out (per_bank_req_ready)
); );
@ -145,7 +147,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
per_bank_req_addr[i], per_bank_req_addr[i],
per_bank_req_byteen[i], per_bank_req_byteen[i],
per_bank_req_data[i], per_bank_req_data[i],
per_bank_req_tag[i]} = per_bank_req_data_all[i]; per_bank_req_tag[i]
} = per_bank_req_data_aos[i];
end end
// banks access // banks access
@ -156,38 +159,55 @@ module VX_local_mem import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_rsp_ready; wire [NUM_BANKS-1:0] per_bank_rsp_ready;
`RESET_RELAY (bank_reset, reset);
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
wire bank_rsp_valid, bank_rsp_ready;
wire [WORD_WIDTH-1:0] bank_rsp_data;
`RESET_RELAY_EN (bram_reset, reset, (NUM_BANKS > 1));
VX_sp_ram #( VX_sp_ram #(
.DATAW (WORD_WIDTH), .DATAW (WORD_WIDTH),
.SIZE (WORDS_PER_BANK), .SIZE (WORDS_PER_BANK),
.WRENW (WORD_SIZE) .WRENW (WORD_SIZE),
.NO_RWCHECK (1)
) data_store ( ) data_store (
.clk (clk), .clk (clk),
.read (1'b1), .reset (bram_reset),
.read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]),
.write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]), .write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]),
.wren (per_bank_req_byteen[i]), .wren (per_bank_req_byteen[i]),
.addr (per_bank_req_addr[i]), .addr (per_bank_req_addr[i]),
.wdata (per_bank_req_data[i]), .wdata (per_bank_req_data[i]),
.rdata (per_bank_rsp_data[i]) .rdata (bank_rsp_data)
); );
// drop write response // read-during-write hazard detection
wire per_bank_req_valid_w, per_bank_req_ready_w; reg [BANK_ADDR_WIDTH-1:0] last_wr_addr;
assign per_bank_req_valid_w = per_bank_req_valid[i] && ~per_bank_req_rw[i]; reg last_wr_valid;
assign per_bank_req_ready[i] = per_bank_req_ready_w || per_bank_req_rw[i]; always @(posedge clk) begin
if (bram_reset) begin
last_wr_valid <= 0;
end else begin
last_wr_valid <= per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i];
end
last_wr_addr <= per_bank_req_addr[i];
end
wire is_rdw_hazard = last_wr_valid && ~per_bank_req_rw[i] && (per_bank_req_addr[i] == last_wr_addr);
VX_elastic_buffer #( // drop write response and stall on read-during-write hazard
.DATAW (REQ_SEL_WIDTH + TAG_WIDTH), assign bank_rsp_valid = per_bank_req_valid[i] && ~per_bank_req_rw[i] && ~is_rdw_hazard;
.SIZE (0) assign per_bank_req_ready[i] = (bank_rsp_ready || per_bank_req_rw[i]) && ~is_rdw_hazard;
) bank_buf (
// register BRAM output
VX_pipe_buffer #(
.DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH)
) bram_buf (
.clk (clk), .clk (clk),
.reset (bank_reset), .reset (bram_reset),
.valid_in (per_bank_req_valid_w), .valid_in (bank_rsp_valid),
.ready_in (per_bank_req_ready_w), .ready_in (bank_rsp_ready),
.data_in ({per_bank_req_idx[i], per_bank_req_tag[i]}), .data_in ({per_bank_req_idx[i], bank_rsp_data, per_bank_req_tag[i]}),
.data_out ({per_bank_rsp_idx[i], per_bank_rsp_tag[i]}), .data_out ({per_bank_rsp_idx[i], per_bank_rsp_data[i], per_bank_rsp_tag[i]}),
.valid_out (per_bank_rsp_valid[i]), .valid_out (per_bank_rsp_valid[i]),
.ready_out (per_bank_rsp_ready[i]) .ready_out (per_bank_rsp_ready[i])
); );
@ -195,10 +215,10 @@ module VX_local_mem import VX_gpu_pkg::*; #(
// bank responses gather // bank responses gather
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_all; wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_aos;
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign per_bank_rsp_data_all[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]}; assign per_bank_rsp_data_aos[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
end end
wire [NUM_REQS-1:0] rsp_valid_out; wire [NUM_REQS-1:0] rsp_valid_out;
@ -209,6 +229,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
.NUM_INPUTS (NUM_BANKS), .NUM_INPUTS (NUM_BANKS),
.NUM_OUTPUTS (NUM_REQS), .NUM_OUTPUTS (NUM_REQS),
.DATAW (RSP_DATAW), .DATAW (RSP_DATAW),
.ARBITER ("P"), // this priority arbiter has negligeable impact om performance
.OUT_BUF (OUT_BUF) .OUT_BUF (OUT_BUF)
) rsp_xbar ( ) rsp_xbar (
.clk (clk), .clk (clk),
@ -216,7 +237,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
`UNUSED_PIN (collisions), `UNUSED_PIN (collisions),
.sel_in (per_bank_rsp_idx), .sel_in (per_bank_rsp_idx),
.valid_in (per_bank_rsp_valid), .valid_in (per_bank_rsp_valid),
.data_in (per_bank_rsp_data_all), .data_in (per_bank_rsp_data_aos),
.ready_in (per_bank_rsp_ready), .ready_in (per_bank_rsp_ready),
.valid_out (rsp_valid_out), .valid_out (rsp_valid_out),
.data_out (rsp_data_out), .data_out (rsp_data_out),
@ -310,7 +331,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
always @(posedge clk) begin always @(posedge clk) begin
if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin
if (mem_bus_if[i].req_data.rw) begin if (mem_bus_if[i].req_data.rw) begin
`TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", `TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n",
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])); $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i]));
end else begin end else begin
`TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", `TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
@ -318,7 +339,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
end end
end end
if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin
`TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n", `TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n",
$time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i])); $time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i]));
end end
end end
@ -328,7 +349,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
always @(posedge clk) begin always @(posedge clk) begin
if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin
if (per_bank_req_rw[i]) begin if (per_bank_req_rw[i]) begin
`TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", `TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i])); $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i]));
end else begin end else begin
`TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", `TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
@ -336,7 +357,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
end end
end end
if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin
`TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n", `TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i])); $time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i]));
end end
end end

View file

@ -73,12 +73,12 @@ ifneq ($(TARGET), fpga)
CFLAGS += -DSIMULATION CFLAGS += -DSIMULATION
endif endif
# Debugigng # Debugging
ifdef DEBUG ifdef DEBUG
ifneq ($(TARGET), fpga) ifneq ($(TARGET), fpga)
CFLAGS += -DNDEBUG CFLAGS += -DDEBUG_LEVEL=$(DEBUG) $(DBG_TRACE_FLAGS)
else else
CFLAGS += $(DBG_TRACE_FLAGS) CFLAGS += -DNDEBUG
endif endif
else else
CFLAGS += -DNDEBUG CFLAGS += -DNDEBUG

View file

@ -1 +1 @@
create_clock -name {clk} -period "220 MHz" -waveform { 0.000 1.0 } [get_ports {clk}] create_clock -name {clk} -period "200 MHz" -waveform { 0.000 1.0 } [get_ports {clk}]

View file

@ -45,6 +45,7 @@ FPGA_BIN_DIR=<bin_dir> XRT_DEVICE_INDEX=1 TARGET=hw ./ci/blackbox.sh --driver=xr
# build report logs # build report logs
<build_dir>/bin/vortex_afu.xclbin.info <build_dir>/bin/vortex_afu.xclbin.info
<build_dir>/_x/logs/link/vivado.log # search for keyword "Very high fanout"
<build_dir>/_x/reports/link/link/imp/impl_1_full_util_routed.rpt <build_dir>/_x/reports/link/link/imp/impl_1_full_util_routed.rpt
<build_dir>/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt # search for keyword "VIOLATED" <build_dir>/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt # search for keyword "VIOLATED"
<build_dir>/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log <build_dir>/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log

View file

@ -111,14 +111,14 @@ ifeq ($(TARGET), hw_emu)
CFLAGS += -DSIMULATION CFLAGS += -DSIMULATION
endif endif
# Debugigng # Debugging
ifdef DEBUG ifdef DEBUG
VPP_FLAGS += -g --debug.protocol all VPP_FLAGS += -g --debug.protocol all
ifneq ($(TARGET), hw) ifneq ($(TARGET), hw)
CFLAGS += -DNDEBUG
else
VPP_FLAGS += --vivado.prop fileset.sim_1.xsim.elaborate.debug_level=all VPP_FLAGS += --vivado.prop fileset.sim_1.xsim.elaborate.debug_level=all
CFLAGS += $(DBG_TRACE_FLAGS) CFLAGS += -DDEBUG_LEVEL=$(DEBUG) $(DBG_TRACE_FLAGS)
else
CFLAGS += -DNDEBUG
endif endif
else else
VPP_FLAGS += --optimize 3 VPP_FLAGS += --optimize 3

View file

@ -49,7 +49,7 @@ endif
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
RTL_INCLUDE += $(FPU_INCLUDE) RTL_INCLUDE += $(FPU_INCLUDE)
# Debugigng # Debugging
ifdef DEBUG ifdef DEBUG
CFLAGS += $(DBG_TRACE_FLAGS) CFLAGS += $(DBG_TRACE_FLAGS)
else else

View file

@ -29,7 +29,7 @@ THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count()
VL_FLAGS += -j $(THREADS) VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS) #VL_FLAGS += --threads $(THREADS)
# Debugigng # Debugging
ifdef DEBUG ifdef DEBUG
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS) VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS) CXXFLAGS += -g -O0 $(DBG_FLAGS)

View file

@ -13,6 +13,7 @@
#include <VX_config.h> #include <VX_config.h>
#include <VX_types.h> #include <VX_types.h>
#include <newlib.h>
#include "common.h" #include "common.h"
.section .init, "ax" .section .init, "ax"
@ -51,12 +52,10 @@ _start:
# la t0, trap_entry # la t0, trap_entry
# csrw mtvec, t0 # csrw mtvec, t0
# register global termination functions #ifdef HAVE_INITFINI_ARRAY
la a0, __libc_fini_array
call atexit
# run global initialization functions # run global initialization functions
call __libc_init_array call __libc_init_array
#endif
# call main program routine # call main program routine
call main call main

View file

@ -119,70 +119,13 @@ void __libc_fini_array (void) {
} }
#endif #endif
/* // This function will be called by LIBC at program exit.
#define MAX_CORES 64 // Since this platform only support statically linked programs,
volatile int g_cxa_locks[MAX_CORES] = {0}; // it is not required to support LIBC's exit functions registration via atexit().
*/ void __funcs_on_exit (void) {
#ifdef HAVE_INITFINI_ARRAY
void __cxa_lock() { __libc_fini_array();
/*int core_id = vx_core_id(); #endif
g_cxa_locks[core_id] = 1;
vx_fence();
for (int i = 1; i < MAX_CORES; ++i) {
int other = (core_id + i) % MAX_CORES;
while (g_cxa_locks[other]) {
vx_fence(); // cache coherence not supported, so we need to flush the caches
}
}*/
}
void __cxa_unlock() {
/*vx_fence();
int core_id = vx_core_id();
g_cxa_locks[core_id] = 0;*/
}
#define MAX_FEXITS 64
typedef struct {
void (*f[MAX_FEXITS])(void*);
void *a[MAX_FEXITS];
} fexit_list_t;
static fexit_list_t g_fexit_list;
static int g_num_fexits = 0;
void __funcs_on_exit() {
void (*func)(void *), *arg;
fexit_list_t* fexit_list = &g_fexit_list;
for (int i = 0; i < g_num_fexits; ++i) {
func = fexit_list->f[i];
arg = fexit_list->a[i];
func(arg);
}
}
void __cxa_finalize(void *dso) {}
int __cxa_atexit(void (*func)(void *), void *arg, void *dso) {
__cxa_lock();
int num_fexits = g_num_fexits;
if (num_fexits >= MAX_FEXITS)
return -1;
fexit_list_t* fexit_list = &g_fexit_list;
fexit_list->f[num_fexits] = func;
fexit_list->a[num_fexits] = arg;
g_num_fexits = num_fexits + 1;
__cxa_unlock();
return 0;
}
static void call(void *p) {
((void (*)(void))(uintptr_t)p)();
}
int atexit(void (*func)(void)) {
return __cxa_atexit(call, (void*)(uintptr_t)func, 0);
} }
#ifdef __cplusplus #ifdef __cplusplus

View file

@ -21,6 +21,7 @@
#include <cstdint> #include <cstdint>
#include <unordered_map> #include <unordered_map>
#include <array>
#define CACHE_BLOCK_SIZE 64 #define CACHE_BLOCK_SIZE 64

View file

@ -34,6 +34,7 @@ typedef void* vx_buffer_h;
#define VX_CAPS_GLOBAL_MEM_SIZE 0x5 #define VX_CAPS_GLOBAL_MEM_SIZE 0x5
#define VX_CAPS_LOCAL_MEM_SIZE 0x6 #define VX_CAPS_LOCAL_MEM_SIZE 0x6
#define VX_CAPS_ISA_FLAGS 0x7 #define VX_CAPS_ISA_FLAGS 0x7
#define VX_CAPS_NUM_MEM_BANKS 0x8
// device isa flags // device isa flags
#define VX_ISA_STD_A (1ull << ISA_STD_A) #define VX_ISA_STD_A (1ull << ISA_STD_A)

View file

@ -30,7 +30,7 @@ else
CXXFLAGS += -I$(SYN_DIR) CXXFLAGS += -I$(SYN_DIR)
endif endif
# Debugigng # Debugging
ifdef DEBUG ifdef DEBUG
CXXFLAGS += -g -O0 CXXFLAGS += -g -O0
else else

View file

@ -232,6 +232,9 @@ public:
case VX_CAPS_ISA_FLAGS: case VX_CAPS_ISA_FLAGS:
_value = isa_caps_; _value = isa_caps_;
break; break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default: default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
std::abort(); std::abort();

View file

@ -19,7 +19,7 @@ LDFLAGS += -L$(DESTDIR) -lrtlsim
SRCS := $(SRC_DIR)/vortex.cpp SRCS := $(SRC_DIR)/vortex.cpp
# Debugigng # Debugging
ifdef DEBUG ifdef DEBUG
CXXFLAGS += -g -O0 CXXFLAGS += -g -O0
else else

View file

@ -77,6 +77,9 @@ public:
case VX_CAPS_ISA_FLAGS: case VX_CAPS_ISA_FLAGS:
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD; _value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
break; break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default: default:
std::cout << "invalid caps id: " << caps_id << std::endl; std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort(); std::abort();

View file

@ -19,7 +19,7 @@ LDFLAGS += -L$(DESTDIR) -lsimx
SRCS := $(SRC_DIR)/vortex.cpp SRCS := $(SRC_DIR)/vortex.cpp
# Debugigng # Debugging
ifdef DEBUG ifdef DEBUG
CXXFLAGS += -g -O0 CXXFLAGS += -g -O0
else else

View file

@ -105,6 +105,9 @@ public:
case VX_CAPS_ISA_FLAGS: case VX_CAPS_ISA_FLAGS:
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD; _value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
break; break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default: default:
std::cout << "invalid caps id: " << caps_id << std::endl; std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort(); std::abort();

View file

@ -12,7 +12,7 @@ LDFLAGS += -shared -pthread -ldl
SRCS := $(SRC_DIR)/vortex.cpp $(SRC_DIR)/utils.cpp SRCS := $(SRC_DIR)/vortex.cpp $(SRC_DIR)/utils.cpp
# Debugigng # Debugging
ifdef DEBUG ifdef DEBUG
CXXFLAGS += -g -O0 CXXFLAGS += -g -O0
else else

View file

@ -211,6 +211,8 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
uint64_t mem_reads = 0; uint64_t mem_reads = 0;
uint64_t mem_writes = 0; uint64_t mem_writes = 0;
uint64_t mem_lat = 0; uint64_t mem_lat = 0;
uint64_t mem_req_counter = 0;
uint64_t mem_ticks = 0;
uint64_t num_cores; uint64_t num_cores;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), { CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), {
@ -221,6 +223,11 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_ISA_FLAGS, &isa_flags), { CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_ISA_FLAGS, &isa_flags), {
return err; return err;
}); });
uint64_t num_mem_bank_ports;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_MEM_BANKS, &num_mem_bank_ports), {
return err;
});
bool icache_enable = isa_flags & VX_ISA_EXT_ICACHE; bool icache_enable = isa_flags & VX_ISA_EXT_ICACHE;
bool dcache_enable = isa_flags & VX_ISA_EXT_DCACHE; bool dcache_enable = isa_flags & VX_ISA_EXT_DCACHE;
@ -314,7 +321,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
if (num_cores > 1) { if (num_cores > 1) {
uint64_t scrb_total = scrb_alu_per_core + scrb_fpu_per_core + scrb_lsu_per_core + scrb_csrs_per_core + scrb_wctl_per_core; uint64_t scrb_total = scrb_alu_per_core + scrb_fpu_per_core + scrb_lsu_per_core + scrb_csrs_per_core + scrb_wctl_per_core;
int scrb_percent_per_core = calcAvgPercent(scrb_stalls_per_core, cycles_per_core); int scrb_percent_per_core = calcAvgPercent(scrb_stalls_per_core, cycles_per_core);
fprintf(stream, "PERF: core%d: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, scrs=%d%%, wctl=%d%%)\n" fprintf(stream, "PERF: core%d: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, csrs=%d%%, wctl=%d%%)\n"
, core_id , core_id
, scrb_stalls_per_core , scrb_stalls_per_core
, scrb_percent_per_core , scrb_percent_per_core
@ -533,6 +540,12 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), { CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), {
return err; return err;
}); });
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_CNTR, core_id, &mem_req_counter), {
return err;
});
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_TICK, core_id, &mem_ticks), {
return err;
});
} }
} break; } break;
default: default:
@ -559,7 +572,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
fprintf(stream, "PERF: scheduler idle=%ld (%d%%)\n", sched_idles, sched_idles_percent); fprintf(stream, "PERF: scheduler idle=%ld (%d%%)\n", sched_idles, sched_idles_percent);
fprintf(stream, "PERF: scheduler stalls=%ld (%d%%)\n", sched_stalls, sched_stalls_percent); fprintf(stream, "PERF: scheduler stalls=%ld (%d%%)\n", sched_stalls, sched_stalls_percent);
fprintf(stream, "PERF: ibuffer stalls=%ld (%d%%)\n", ibuffer_stalls, ibuffer_percent); fprintf(stream, "PERF: ibuffer stalls=%ld (%d%%)\n", ibuffer_stalls, ibuffer_percent);
fprintf(stream, "PERF: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, scrs=%d%%, wctl=%d%%)\n" fprintf(stream, "PERF: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, csrs=%d%%, wctl=%d%%)\n"
, scrb_stalls , scrb_stalls
, scrb_percent , scrb_percent
, calcAvgPercent(scrb_alu, scrb_total) , calcAvgPercent(scrb_alu, scrb_total)
@ -599,7 +612,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
int read_hit_ratio = calcRatio(l3cache_read_misses, l3cache_reads); int read_hit_ratio = calcRatio(l3cache_read_misses, l3cache_reads);
int write_hit_ratio = calcRatio(l3cache_write_misses, l3cache_writes); int write_hit_ratio = calcRatio(l3cache_write_misses, l3cache_writes);
int bank_utilization = calcAvgPercent(l3cache_reads + l3cache_writes, l3cache_reads + l3cache_writes + l3cache_bank_stalls); int bank_utilization = calcAvgPercent(l3cache_reads + l3cache_writes, l3cache_reads + l3cache_writes + l3cache_bank_stalls);
int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls); int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls);
fprintf(stream, "PERF: l3cache reads=%ld\n", l3cache_reads); fprintf(stream, "PERF: l3cache reads=%ld\n", l3cache_reads);
fprintf(stream, "PERF: l3cache writes=%ld\n", l3cache_writes); fprintf(stream, "PERF: l3cache writes=%ld\n", l3cache_writes);
fprintf(stream, "PERF: l3cache read misses=%ld (hit ratio=%d%%)\n", l3cache_read_misses, read_hit_ratio); fprintf(stream, "PERF: l3cache read misses=%ld (hit ratio=%d%%)\n", l3cache_read_misses, read_hit_ratio);
@ -609,8 +622,10 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
} }
int mem_avg_lat = caclAverage(mem_lat, mem_reads); int mem_avg_lat = caclAverage(mem_lat, mem_reads);
int memory_bank_port_utilization = calcAvgPercent(mem_req_counter, (mem_ticks * num_mem_bank_ports));
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes); fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat); fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat);
fprintf(stream, "PERF: memory bank port utilization=%d%%\n", memory_bank_port_utilization);
} break; } break;
default: default:
break; break;

View file

@ -26,7 +26,7 @@ endif
PROJECT := libvortex-xrt.so PROJECT := libvortex-xrt.so
# Debugigng # Debugging
ifdef DEBUG ifdef DEBUG
CXXFLAGS += -g -O0 CXXFLAGS += -g -O0
else else

View file

@ -404,6 +404,9 @@ public:
case VX_CAPS_ISA_FLAGS: case VX_CAPS_ISA_FLAGS:
_value = isa_caps_; _value = isa_caps_;
break; break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default: default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
std::abort(); std::abort();

View file

@ -41,11 +41,11 @@ public:
dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2"; dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2";
dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb"; dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb";
dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192; dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192;
dram_config["MemorySystem"]["DRAM"]["org"]["channel"] = 8;
dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps"; dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps";
dram_config["MemorySystem"]["Controller"]["impl"] = "Generic"; dram_config["MemorySystem"]["Controller"]["impl"] = "Generic";
dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS"; dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS";
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank"; dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy"; dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy";
{ {
YAML::Node draw_plugin; YAML::Node draw_plugin;
@ -66,7 +66,7 @@ public:
auto original_buf = std::cout.rdbuf(); auto original_buf = std::cout.rdbuf();
std::cout.rdbuf(nullstream.rdbuf()); std::cout.rdbuf(nullstream.rdbuf());
ramulator_frontend_->finalize(); ramulator_frontend_->finalize();
ramulator_memorysystem_->finalize(); ramulator_memorysystem_->finalize();
std::cout.rdbuf(original_buf); std::cout.rdbuf(original_buf);
} }

View file

@ -59,7 +59,7 @@ void RamMemDevice::read(void* data, uint64_t addr, uint64_t size) {
if ((addr & (wordSize_-1)) if ((addr & (wordSize_-1))
|| (addr_end & (wordSize_-1)) || (addr_end & (wordSize_-1))
|| (addr_end <= contents_.size())) { || (addr_end <= contents_.size())) {
std::cout << "lookup of 0x" << std::hex << (addr_end-1) << " failed.\n"; std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
throw BadAddress(); throw BadAddress();
} }
@ -74,7 +74,7 @@ void RamMemDevice::write(const void* data, uint64_t addr, uint64_t size) {
if ((addr & (wordSize_-1)) if ((addr & (wordSize_-1))
|| (addr_end & (wordSize_-1)) || (addr_end & (wordSize_-1))
|| (addr_end <= contents_.size())) { || (addr_end <= contents_.size())) {
std::cout << "lookup of 0x" << std::hex << (addr_end-1) << " failed.\n"; std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
throw BadAddress(); throw BadAddress();
} }
@ -115,8 +115,7 @@ void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) {
void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) {
mem_accessor_t ma; mem_accessor_t ma;
if (!this->lookup(addr, size, &ma)) { if (!this->lookup(addr, size, &ma)) {
assert(0); std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
std::cout << "lookup of 0x" << std::hex << addr << " failed.\n";
throw BadAddress(); throw BadAddress();
} }
ma.md->read(data, ma.addr, size); ma.md->read(data, ma.addr, size);
@ -125,8 +124,7 @@ void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) {
void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) { void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) {
mem_accessor_t ma; mem_accessor_t ma;
if (!this->lookup(addr, size, &ma)) { if (!this->lookup(addr, size, &ma)) {
assert(0); std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
std::cout << "lookup of 0x" << std::hex << addr << " failed.\n";
throw BadAddress(); throw BadAddress();
} }
ma.md->write(data, ma.addr, size); ma.md->write(data, ma.addr, size);
@ -408,7 +406,7 @@ bool ACLManager::check(uint64_t addr, uint64_t size, int flags) const {
while (it != acl_map_.end() && it->first < end) { while (it != acl_map_.end() && it->first < end) {
if (it->second.end > addr) { if (it->second.end > addr) {
if ((it->second.flags & flags) != flags) { if ((it->second.flags & flags) != flags) {
std::cout << "Memory access violation from 0x" << std::hex << addr << " to 0x" << end << ", curent flags=" << it->second.flags << ", access flags=" << flags << std::endl; std::cout << "Memory access violation from 0x" << std::hex << addr << " to 0x" << end << ", curent flags=" << it->second.flags << ", access flags=" << flags << std::dec << std::endl;
return false; // Overlapping entry is missing at least one required flag bit return false; // Overlapping entry is missing at least one required flag bit
} }
addr = it->second.end; // Move to the end of the current matching range addr = it->second.end; // Move to the end of the current matching range
@ -759,4 +757,4 @@ std::pair<uint64_t, uint8_t> MemoryUnit::page_table_walk(uint64_t vAddr_bits, AC
return std::make_pair(cur_base_ppn, flags); return std::make_pair(cur_base_ppn, flags);
} }
#endif #endif

View file

@ -168,23 +168,23 @@ public:
{} {}
void* operator new(size_t /*size*/) { void* operator new(size_t /*size*/) {
return allocator().allocate(); return allocator_.allocate();
} }
void operator delete(void* ptr) { void operator delete(void* ptr) {
allocator().deallocate(ptr); allocator_.deallocate(ptr);
} }
protected: protected:
Func func_; Func func_;
Pkt pkt_; Pkt pkt_;
static MemoryPool<SimCallEvent<Pkt>>& allocator() { static MemoryPool<SimCallEvent<Pkt>> allocator_;
static MemoryPool<SimCallEvent<Pkt>> instance(64);
return instance;
}
}; };
template <typename Pkt>
MemoryPool<SimCallEvent<Pkt>> SimCallEvent<Pkt>::allocator_(64);
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
template <typename Pkt> template <typename Pkt>
@ -201,23 +201,23 @@ public:
{} {}
void* operator new(size_t /*size*/) { void* operator new(size_t /*size*/) {
return allocator().allocate(); return allocator_.allocate();
} }
void operator delete(void* ptr) { void operator delete(void* ptr) {
allocator().deallocate(ptr); allocator_.deallocate(ptr);
} }
protected: protected:
const SimPort<Pkt>* port_; const SimPort<Pkt>* port_;
Pkt pkt_; Pkt pkt_;
static MemoryPool<SimPortEvent<Pkt>>& allocator() { static MemoryPool<SimPortEvent<Pkt>> allocator_;
static MemoryPool<SimPortEvent<Pkt>> instance(64);
return instance;
}
}; };
template <typename Pkt>
MemoryPool<SimPortEvent<Pkt>> SimPortEvent<Pkt>::allocator_(64);
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
class SimContext; class SimContext;

View file

@ -70,4 +70,28 @@ const char* fileExtension(const char* filepath);
#endif #endif
void *aligned_malloc(size_t size, size_t alignment); void *aligned_malloc(size_t size, size_t alignment);
void aligned_free(void *ptr); void aligned_free(void *ptr);
namespace vortex {
// Verilator data type casting
template <typename R, size_t W, typename Enable = void>
class VDataCast;
template <typename R, size_t W>
class VDataCast<R, W, typename std::enable_if<(W > 8)>::type> {
public:
template <typename T>
static R get(T& obj) {
return reinterpret_cast<R>(obj.data());
}
};
template <typename R, size_t W>
class VDataCast<R, W, typename std::enable_if<(W <= 8)>::type> {
public:
template <typename T>
static R get(T& obj) {
return reinterpret_cast<R>(&obj);
}
};
}

View file

@ -83,13 +83,13 @@ THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count()
VL_FLAGS += -j $(THREADS) VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS) #VL_FLAGS += --threads $(THREADS)
# Debugigng # Debugging
ifdef DEBUG ifdef DEBUG
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS) VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS) CXXFLAGS += -g -O0 $(DBG_FLAGS)
else else
VL_FLAGS += -DNDEBUG VL_FLAGS += -DNDEBUG
CXXFLAGS += -O3 -DNDEBUG CXXFLAGS += -O2 -DNDEBUG
endif endif
# Enable scope analyzer # Enable scope analyzer
@ -123,7 +123,7 @@ $(DESTDIR)/vortex_afu.h : $(AFU_DIR)/vortex_afu.vh
$(SCRIPT_DIR)/gen_config.py -i $^ -o $@ $(SCRIPT_DIR)/gen_config.py -i $^ -o $@
$(DESTDIR)/$(PROJECT): $(SRCS) $(DESTDIR)/vortex_afu.h $(SCOPE_JSON) $(DESTDIR)/$(PROJECT): $(SRCS) $(DESTDIR)/vortex_afu.h $(SCOPE_JSON)
verilator --build --exe -O3 $(VL_FLAGS) --cc $(TOP) --top-module $(TOP) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' --Mdir $@.obj_dir -o $@ verilator --build --exe $(VL_FLAGS) --cc $(TOP) --top-module $(TOP) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' --Mdir $@.obj_dir -o $@
clean: clean:
rm -rf $(DESTDIR)/$(PROJECT).obj_dir rm -rf $(DESTDIR)/$(PROJECT).obj_dir

View file

@ -35,13 +35,13 @@
#include <unordered_map> #include <unordered_map>
#include <util.h> #include <util.h>
#ifndef MEMORY_BANKS //#ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#else #else
#define MEMORY_BANKS 2 #define MEMORY_BANKS 2
#endif #endif
#endif //#endif
#ifndef MEM_CLOCK_RATIO #ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1 #define MEM_CLOCK_RATIO 1
@ -380,7 +380,7 @@ private:
device_->vcp2af_sRxPort_c0_hdr_resp_type = 0; device_->vcp2af_sRxPort_c0_hdr_resp_type = 0;
memcpy(device_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE); memcpy(device_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE);
device_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata; device_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata); /*printf("%0ld: [sim] CCI Rd Rsp: addr=0x%lx, mdata=0x%x, data=0x", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i) for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]); printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]);
printf("\n");*/ printf("\n");*/
@ -398,7 +398,7 @@ private:
cci_req.mdata = device_->af2cp_sTxPort_c0_hdr_mdata; cci_req.mdata = device_->af2cp_sTxPort_c0_hdr_mdata;
auto host_ptr = (uint64_t*)(device_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE); auto host_ptr = (uint64_t*)(device_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE); memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE);
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, device_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata); //printf("%0ld: [sim] CCI Rd Req: addr=0x%lx, mdata=0x%x\n", timestamp, device_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
cci_reads_.emplace_back(cci_req); cci_reads_.emplace_back(cci_req);
} }
@ -453,7 +453,7 @@ private:
} }
} }
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, byte_addr); /*printf("%0ld: [sim] MEM Wr Req: bank=%d, 0x%x, data=0x", timestamp, b, byte_addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) { for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
} }

View file

@ -65,7 +65,7 @@ THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count()
VL_FLAGS += -j $(THREADS) VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS) #VL_FLAGS += --threads $(THREADS)
# Debugigng # Debugging
ifdef DEBUG ifdef DEBUG
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS) VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS) CXXFLAGS += -g -O0 $(DBG_FLAGS)

View file

@ -39,6 +39,7 @@ typedef VVortex Device;
#include <unordered_map> #include <unordered_map>
#include <dram_sim.h> #include <dram_sim.h>
#include <util.h>
#ifndef MEMORY_BANKS #ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
@ -316,11 +317,11 @@ private:
auto mem_rsp_it = pending_mem_reqs_.begin(); auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_rsp = *mem_rsp_it; auto mem_rsp = *mem_rsp_it;
/* /*
printf("%0ld: [sim] MEM Rd Rsp: addr=%0lx, data=", timestamp, mem_rsp->addr); printf("%0ld: [sim] MEM Rd Rsp: addr=0x%0lx, data=0x", timestamp, mem_rsp->addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) { for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]); printf("%02x", mem_rsp->block[i]);
} }
printf("\n"); printf("\n");
*/ */
device_->m_axi_rvalid[0] = 1; device_->m_axi_rvalid[0] = 1;
device_->m_axi_rid[0] = mem_rsp->tag; device_->m_axi_rid[0] = mem_rsp->tag;
@ -347,7 +348,7 @@ private:
auto mem_rsp_it = pending_mem_reqs_.begin(); auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_rsp = *mem_rsp_it; auto mem_rsp = *mem_rsp_it;
/* /*
printf("%0ld: [sim] MEM Wr Rsp: addr=%0lx\n", timestamp, mem_rsp->addr); printf("%0ld: [sim] MEM Wr Rsp: addr=0x%0lx\n", timestamp, mem_rsp->addr);
*/ */
device_->m_axi_bvalid[0] = 1; device_->m_axi_bvalid[0] = 1;
device_->m_axi_bid[0] = mem_rsp->tag; device_->m_axi_bid[0] = mem_rsp->tag;
@ -387,11 +388,15 @@ private:
} else { } else {
// process writes // process writes
/* /*
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen); printf("%0ld: [sim] MEM Wr: addr=0x%0lx, byteen=0x", timestamp, base_addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) { for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); printf("%x", (int)((byteen >> (4 * i)) & 0xf));
} }
printf("\n"); printf(", data=0x");
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%02x", data[i]);
}
printf("\n");
*/ */
for (int i = 0; i < MEM_BLOCK_SIZE; i++) { for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) { if ((byteen >> i) & 0x1) {
@ -459,13 +464,13 @@ private:
auto mem_rsp_it = pending_mem_reqs_.begin(); auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_rsp = *mem_rsp_it; auto mem_rsp = *mem_rsp_it;
/* /*
printf("%0ld: [sim] MEM Rd: tag=%0lx, addr=%0lx, data=", timestamp, mem_rsp->tag, mem_rsp->addr); printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) { for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]); printf("%02x", mem_rsp->block[i]);
} }
printf("\n"); printf("\n");
*/ */
memcpy(device_->mem_rsp_data.data(), mem_rsp->block.data(), MEM_BLOCK_SIZE); memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data), mem_rsp->block.data(), MEM_BLOCK_SIZE);
device_->mem_rsp_tag = mem_rsp->tag; device_->mem_rsp_tag = mem_rsp->tag;
pending_mem_reqs_.erase(mem_rsp_it); pending_mem_reqs_.erase(mem_rsp_it);
mem_rd_rsp_active_ = true; mem_rd_rsp_active_ = true;
@ -480,7 +485,7 @@ private:
uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE); uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
if (device_->mem_req_rw) { if (device_->mem_req_rw) {
auto byteen = device_->mem_req_byteen; auto byteen = device_->mem_req_byteen;
auto data = (uint8_t*)(device_->mem_req_data.data()); auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data);
if (byte_addr >= uint64_t(IO_COUT_ADDR) if (byte_addr >= uint64_t(IO_COUT_ADDR)
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
@ -499,11 +504,15 @@ private:
} else { } else {
// process writes // process writes
/* /*
printf("%0ld: [sim] MEM Wr: tag=%0lx, addr=%0x, byteen=%0lx, data=", timestamp, device_->mem_req_tag, byte_addr, byteen); printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) { for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); printf("%x", (int)((byteen >> (4 * i)) & 0xf));
} }
printf("\n"); printf(", data=0x");
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%d=%02x,", i, data[i]);
}
printf("\n");
*/ */
for (int i = 0; i < MEM_BLOCK_SIZE; i++) { for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) { if ((byteen >> i) & 0x1) {
@ -530,7 +539,7 @@ private:
ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE);
pending_mem_reqs_.emplace_back(mem_req); pending_mem_reqs_.emplace_back(mem_req);
//printf("%0ld: [sim] MEM Rd Req: addr=%0x, tag=%0lx\n", timestamp, byte_addr, device_->mem_req_tag); //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
// send dram request // send dram request
dram_queue_.push(mem_req); dram_queue_.push(mem_req);

View file

@ -24,7 +24,7 @@ LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulato
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
# Debugigng # Debugging
ifdef DEBUG ifdef DEBUG
CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG) CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG)
#CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG) -fsanitize=address -fno-omit-frame-pointer #CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG) -fsanitize=address -fno-omit-frame-pointer

View file

@ -77,8 +77,8 @@ public:
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i)); caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
} }
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i)); caches_.at(i)->MemReqPorts.at(0).bind(&cache_arb->ReqIn.at(i));
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort); cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(0));
} }
cache_arb->ReqOut.at(0).bind(&this->MemReqPort); cache_arb->ReqOut.at(0).bind(&this->MemReqPort);

Some files were not shown because too many files have changed in this diff Show more