mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-25 06:17:38 -04:00
merge from master branch
This commit is contained in:
commit
e91eb4aed4
124 changed files with 1933 additions and 1718 deletions
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
|
@ -219,7 +219,9 @@ jobs:
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-20.04
|
||||||
needs: build_vm
|
needs: build_vm
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
|
name: [regression, opencl, cache, config1, config2, debug, stress, vm]
|
||||||
xlen: [32, 64]
|
xlen: [32, 64]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
|
|
@ -44,10 +44,10 @@ clean: clean-build
|
||||||
$(MAKE) -C $(VORTEX_HOME)/third_party clean
|
$(MAKE) -C $(VORTEX_HOME)/third_party clean
|
||||||
|
|
||||||
# Install setup
|
# Install setup
|
||||||
KERNEL_INC_DST = $(PREFIX)/kernel/include
|
KERNEL_INC_DST = $(INSTALLDIR)/kernel/include
|
||||||
KERNEL_LIB_DST = $(PREFIX)/kernel/lib$(XLEN)
|
KERNEL_LIB_DST = $(INSTALLDIR)/kernel/lib$(XLEN)
|
||||||
RUNTIME_INC_DST = $(PREFIX)/runtime/include
|
RUNTIME_INC_DST = $(INSTALLDIR)/runtime/include
|
||||||
RUNTIME_LIB_DST = $(PREFIX)/runtime/lib
|
RUNTIME_LIB_DST = $(INSTALLDIR)/runtime/lib
|
||||||
|
|
||||||
KERNEL_HEADERS = $(wildcard $(VORTEX_HOME)/kernel/include/*.h)
|
KERNEL_HEADERS = $(wildcard $(VORTEX_HOME)/kernel/include/*.h)
|
||||||
KERNEL_LIBS = $(wildcard kernel/*.a)
|
KERNEL_LIBS = $(wildcard kernel/*.a)
|
||||||
|
|
69
README.md
69
README.md
|
@ -1,5 +1,3 @@
|
||||||
[](https://travis-ci.com/vortexgpgpu/vortex)
|
|
||||||
|
|
||||||
# Vortex GPGPU
|
# Vortex GPGPU
|
||||||
|
|
||||||
Vortex is a full-stack open-source RISC-V GPGPU.
|
Vortex is a full-stack open-source RISC-V GPGPU.
|
||||||
|
@ -47,20 +45,20 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
|
||||||
- [Yosys](https://github.com/YosysHQ/yosys)
|
- [Yosys](https://github.com/YosysHQ/yosys)
|
||||||
- [Sv2v](https://github.com/zachjs/sv2v)
|
- [Sv2v](https://github.com/zachjs/sv2v)
|
||||||
### Install development tools
|
### Install development tools
|
||||||
```
|
```sh
|
||||||
sudo apt-get install build-essential
|
sudo apt-get install build-essential
|
||||||
sudo apt-get install binutils
|
sudo apt-get install binutils
|
||||||
sudo apt-get install python
|
sudo apt-get install python
|
||||||
sudo apt-get install uuid-dev
|
sudo apt-get install uuid-dev
|
||||||
sudo apt-get install git
|
sudo apt-get install git
|
||||||
```
|
```
|
||||||
### Install Vortex codebase
|
### Install Vortex codebase
|
||||||
|
```sh
|
||||||
|
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||||
|
cd vortex
|
||||||
```
|
```
|
||||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git -b vortex_vm
|
|
||||||
cd vortex
|
|
||||||
```
|
|
||||||
|
|
||||||
### Configure your build folder
|
### Configure your build folder
|
||||||
|
```sh
|
||||||
#
|
#
|
||||||
# By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir.
|
# By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir.
|
||||||
# This is the example for volvo server
|
# This is the example for volvo server
|
||||||
|
@ -72,38 +70,45 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
|
||||||
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR
|
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR
|
||||||
# Run the following instead to enable virtual memory feature in compilation
|
# Run the following instead to enable virtual memory feature in compilation
|
||||||
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR --vm_enable=1
|
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR --vm_enable=1
|
||||||
|
```
|
||||||
### Install prebuilt toolchain
|
### Install prebuilt toolchain
|
||||||
# We will use the precomipled tools in volvo toolchanin directory
|
# We will use the precomipled tools in volvo toolchanin directory
|
||||||
### set environment variables
|
### set environment variables
|
||||||
|
```sh
|
||||||
# should always run before using the toolchain!
|
# should always run before using the toolchain!
|
||||||
source ./ci/toolchain_env.sh
|
source ./ci/toolchain_env.sh
|
||||||
|
```
|
||||||
### Building Vortex
|
### Building Vortex
|
||||||
make -s
|
```sh
|
||||||
|
make -s
|
||||||
|
```
|
||||||
|
|
||||||
### Quick demo running vecadd OpenCL kernel on 2 cores
|
### Quick demo running vecadd OpenCL kernel on 2 cores
|
||||||
$ ./ci/blackbox.sh --cores=2 --app=vecadd
|
```sh
|
||||||
|
./ci/blackbox.sh --cores=2 --app=vecadd
|
||||||
|
```
|
||||||
|
|
||||||
### Common Developer Tips
|
### Common Developer Tips
|
||||||
- Installing Vortex kernel and runtime libraries to use with external tools requires passing --prefix=<install-path> to the configure script.
|
- Installing Vortex kernel and runtime libraries to use with external tools requires passing --prefix=<install-path> to the configure script.
|
||||||
```sh
|
```sh
|
||||||
$ ../configure --xlen=32 --tooldir=$HOME/tools --prefix=<install-path>
|
../configure --xlen=32 --tooldir=$HOME/tools --prefix=<install-path>
|
||||||
$ make -s
|
make -s
|
||||||
$ make install
|
make install
|
||||||
``````
|
```
|
||||||
- Building Vortex 64-bit simply requires using --xlen=64 configure option.
|
- Building Vortex 64-bit simply requires using --xlen=64 configure option.
|
||||||
```sh
|
```sh
|
||||||
$ ../configure --xlen=32 --tooldir=$HOME/tools
|
../configure --xlen=32 --tooldir=$HOME/tools
|
||||||
```
|
```
|
||||||
- Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source <build-path>/ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login.
|
- Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source <build-path>/ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login.
|
||||||
```sh
|
```sh
|
||||||
$ echo "source <build-path>/ci/toolchain_env.sh" >> ~/.bashrc
|
echo "source <build-path>/ci/toolchain_env.sh" >> ~/.bashrc
|
||||||
```
|
```
|
||||||
- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder.
|
- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder.
|
||||||
```sh
|
```sh
|
||||||
$ ../configure
|
../configure
|
||||||
```
|
```
|
||||||
- To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information.
|
- To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information.
|
||||||
```sh
|
```sh
|
||||||
$ ./ci/blackbox.sh --app=demo --debug=3
|
./ci/blackbox.sh --app=demo --debug=3
|
||||||
```
|
```
|
||||||
- For additional information, check out the /docs.
|
- For additional information, check out the /docs.
|
||||||
|
|
|
@ -23,6 +23,8 @@ rm -f blackbox.*.cache
|
||||||
|
|
||||||
XLEN=${XLEN:=@XLEN@}
|
XLEN=${XLEN:=@XLEN@}
|
||||||
|
|
||||||
|
XSIZE=$((XLEN / 8))
|
||||||
|
|
||||||
echo "Vortex Regression Test: XLEN=$XLEN"
|
echo "Vortex Regression Test: XLEN=$XLEN"
|
||||||
|
|
||||||
unittest()
|
unittest()
|
||||||
|
@ -99,11 +101,11 @@ regression()
|
||||||
|
|
||||||
# test global barrier
|
# test global barrier
|
||||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
|
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tgbar" --cores=2
|
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||||
|
|
||||||
# test local barrier
|
# test local barrier
|
||||||
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
|
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
|
||||||
./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tbar"
|
./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar"
|
||||||
|
|
||||||
echo "regression tests done!"
|
echo "regression tests done!"
|
||||||
}
|
}
|
||||||
|
@ -148,32 +150,54 @@ vm(){
|
||||||
echo "vm tests done!"
|
echo "vm tests done!"
|
||||||
}
|
}
|
||||||
|
|
||||||
test_csv_trace()
|
cache()
|
||||||
{
|
{
|
||||||
# test CSV trace generation
|
echo "begin cache tests..."
|
||||||
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
|
|
||||||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
|
||||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
|
||||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
|
||||||
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
|
||||||
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
|
||||||
diff trace_rtlsim.csv trace_simx.csv
|
|
||||||
# clean build
|
|
||||||
make -C sim/simx clean
|
|
||||||
make -C sim/rtlsim clean
|
|
||||||
}
|
|
||||||
|
|
||||||
debug()
|
# disable local memory
|
||||||
{
|
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
|
||||||
echo "begin debugging tests..."
|
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
|
||||||
|
|
||||||
test_csv_trace
|
# disable L1 cache
|
||||||
|
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||||
|
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||||
|
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||||
|
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||||
|
|
||||||
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
# reduce l1 line size
|
||||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
|
||||||
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
|
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=io_addr
|
||||||
|
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||||
|
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||||
|
|
||||||
echo "debugging tests done!"
|
# test cache ways
|
||||||
|
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||||
|
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||||
|
|
||||||
|
# test cache banking
|
||||||
|
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||||
|
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||||
|
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||||
|
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||||
|
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||||
|
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||||
|
|
||||||
|
# test writeback
|
||||||
|
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||||
|
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||||
|
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||||
|
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||||
|
|
||||||
|
# cache clustering
|
||||||
|
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2
|
||||||
|
|
||||||
|
# L2/L3
|
||||||
|
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=diverge --args="-n1"
|
||||||
|
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=4 --l2cache --app=diverge --args="-n1"
|
||||||
|
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
|
||||||
|
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
|
||||||
|
|
||||||
|
echo "begin cache tests..."
|
||||||
}
|
}
|
||||||
|
|
||||||
config1()
|
config1()
|
||||||
|
@ -189,10 +213,12 @@ config1()
|
||||||
./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge
|
./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge
|
||||||
|
|
||||||
# cores clustering
|
# cores clustering
|
||||||
./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1"
|
./ci/blackbox.sh --driver=rtlsim --cores=4 --app=diverge --args="-n1"
|
||||||
|
./ci/blackbox.sh --driver=simx --cores=4 --app=diverge --args="-n1"
|
||||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1"
|
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1"
|
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||||
|
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||||
|
|
||||||
# issue width
|
# issue width
|
||||||
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||||
|
@ -212,22 +238,19 @@ config1()
|
||||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||||
|
|
||||||
|
# FPU's PE scaling
|
||||||
|
CONFIGS="-DFMA_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfmadd"
|
||||||
|
CONFIGS="-DFCVT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tftoi"
|
||||||
|
CONFIGS="-DFDIV_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfdiv"
|
||||||
|
CONFIGS="-DFSQRT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfsqrt"
|
||||||
|
CONFIGS="-DFNCP_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfclamp"
|
||||||
|
|
||||||
# LSU scaling
|
# LSU scaling
|
||||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||||
|
|
||||||
# L2/L3
|
|
||||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1"
|
|
||||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1"
|
|
||||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
|
|
||||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1"
|
|
||||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1"
|
|
||||||
|
|
||||||
# multiple L1 caches per socket
|
|
||||||
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=8 --warps=1 --threads=2
|
|
||||||
|
|
||||||
echo "configuration-1 tests done!"
|
echo "configuration-1 tests done!"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -262,55 +285,63 @@ config2()
|
||||||
# disabling ZICOND extension
|
# disabling ZICOND extension
|
||||||
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
|
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
|
||||||
|
|
||||||
# disable local memory
|
|
||||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
|
|
||||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
|
|
||||||
|
|
||||||
# test AXI bus
|
# test AXI bus
|
||||||
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=demo
|
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||||
|
|
||||||
# disable L1 cache
|
|
||||||
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
|
||||||
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
|
||||||
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
|
||||||
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
|
||||||
|
|
||||||
# reduce l1 line size
|
|
||||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
|
|
||||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=simx --app=io_addr
|
|
||||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
|
||||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
|
||||||
|
|
||||||
# test cache ways
|
|
||||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
|
||||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
|
||||||
|
|
||||||
# test cache banking
|
|
||||||
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
|
||||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
|
||||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
|
||||||
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
|
||||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
|
||||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
|
||||||
|
|
||||||
# test 128-bit MEM block
|
# test 128-bit MEM block
|
||||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=demo
|
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||||
|
|
||||||
|
# test XLEN-bit MEM block
|
||||||
|
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||||
|
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||||
|
|
||||||
|
# test memory coalescing
|
||||||
|
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
|
||||||
|
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
|
||||||
|
|
||||||
# test single-bank DRAM
|
# test single-bank DRAM
|
||||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=demo
|
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||||
|
|
||||||
# test 27-bit DRAM address
|
# test 27-bit DRAM address
|
||||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=demo
|
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||||
|
|
||||||
echo "configuration-2 tests done!"
|
echo "configuration-2 tests done!"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test_csv_trace()
|
||||||
|
{
|
||||||
|
# test CSV trace generation
|
||||||
|
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
|
||||||
|
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
||||||
|
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||||
|
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||||
|
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||||
|
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||||
|
diff trace_rtlsim.csv trace_simx.csv
|
||||||
|
# clean build
|
||||||
|
make -C sim/simx clean
|
||||||
|
make -C sim/rtlsim clean
|
||||||
|
}
|
||||||
|
|
||||||
|
debug()
|
||||||
|
{
|
||||||
|
echo "begin debugging tests..."
|
||||||
|
|
||||||
|
test_csv_trace
|
||||||
|
|
||||||
|
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||||
|
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||||
|
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
|
||||||
|
|
||||||
|
echo "debugging tests done!"
|
||||||
|
}
|
||||||
|
|
||||||
stress()
|
stress()
|
||||||
{
|
{
|
||||||
echo "begin stress tests..."
|
echo "begin stress tests..."
|
||||||
|
|
||||||
# test verilator reset values
|
# test verilator reset values
|
||||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache
|
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache
|
||||||
|
|
||||||
echo "stress tests done!"
|
echo "stress tests done!"
|
||||||
|
@ -329,19 +360,14 @@ synthesis()
|
||||||
show_usage()
|
show_usage()
|
||||||
{
|
{
|
||||||
echo "Vortex Regression Test"
|
echo "Vortex Regression Test"
|
||||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]"
|
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]"
|
||||||
}
|
}
|
||||||
|
|
||||||
start=$SECONDS
|
|
||||||
|
|
||||||
declare -a tests=()
|
declare -a tests=()
|
||||||
clean=0
|
clean=0
|
||||||
|
|
||||||
while [ "$1" != "" ]; do
|
while [ "$1" != "" ]; do
|
||||||
case $1 in
|
case $1 in
|
||||||
--vm )
|
|
||||||
tests+=("vm")
|
|
||||||
;;
|
|
||||||
--clean )
|
--clean )
|
||||||
clean=1
|
clean=1
|
||||||
;;
|
;;
|
||||||
|
@ -360,6 +386,12 @@ while [ "$1" != "" ]; do
|
||||||
--opencl )
|
--opencl )
|
||||||
tests+=("opencl")
|
tests+=("opencl")
|
||||||
;;
|
;;
|
||||||
|
--cache )
|
||||||
|
tests+=("cache")
|
||||||
|
;;
|
||||||
|
--vm )
|
||||||
|
tests+=("vm")
|
||||||
|
;;
|
||||||
--config1 )
|
--config1 )
|
||||||
tests+=("config1")
|
tests+=("config1")
|
||||||
;;
|
;;
|
||||||
|
@ -382,6 +414,7 @@ while [ "$1" != "" ]; do
|
||||||
tests+=("kernel")
|
tests+=("kernel")
|
||||||
tests+=("regression")
|
tests+=("regression")
|
||||||
tests+=("opencl")
|
tests+=("opencl")
|
||||||
|
tests+=("cache")
|
||||||
tests+=("config1")
|
tests+=("config1")
|
||||||
tests+=("config2")
|
tests+=("config2")
|
||||||
tests+=("debug")
|
tests+=("debug")
|
||||||
|
@ -405,6 +438,8 @@ then
|
||||||
make -s
|
make -s
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
start=$SECONDS
|
||||||
|
|
||||||
for test in "${tests[@]}"; do
|
for test in "${tests[@]}"; do
|
||||||
$test
|
$test
|
||||||
done
|
done
|
||||||
|
|
|
@ -19,6 +19,8 @@ import csv
|
||||||
import re
|
import re
|
||||||
import inspect
|
import inspect
|
||||||
|
|
||||||
|
configs = None
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser(description='CPU trace log to CSV format converter.')
|
parser = argparse.ArgumentParser(description='CPU trace log to CSV format converter.')
|
||||||
parser.add_argument('-t', '--type', default='simx', help='log type (rtlsim or simx)')
|
parser.add_argument('-t', '--type', default='simx', help='log type (rtlsim or simx)')
|
||||||
|
@ -26,6 +28,24 @@ def parse_args():
|
||||||
parser.add_argument('log', help='Input log file')
|
parser.add_argument('log', help='Input log file')
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
def load_config(filename):
|
||||||
|
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=0x([0-9a-fA-F]+), num_barriers=(\d+)"
|
||||||
|
with open(filename, 'r') as file:
|
||||||
|
for line in file:
|
||||||
|
config_match = re.search(config_pattern, line)
|
||||||
|
if config_match:
|
||||||
|
config = {
|
||||||
|
'num_threads': int(config_match.group(1)),
|
||||||
|
'num_warps': int(config_match.group(2)),
|
||||||
|
'num_cores': int(config_match.group(3)),
|
||||||
|
'num_clusters': int(config_match.group(4)),
|
||||||
|
'socket_size': int(config_match.group(5)),
|
||||||
|
'local_mem_base': int(config_match.group(6), 16),
|
||||||
|
'num_barriers': int(config_match.group(7)),
|
||||||
|
}
|
||||||
|
return config
|
||||||
|
return None
|
||||||
|
|
||||||
def parse_simx(log_lines):
|
def parse_simx(log_lines):
|
||||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||||
instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
|
instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
|
||||||
|
@ -46,10 +66,10 @@ def parse_simx(log_lines):
|
||||||
instr_data = {}
|
instr_data = {}
|
||||||
instr_data["lineno"] = lineno
|
instr_data["lineno"] = lineno
|
||||||
instr_data["PC"] = re.search(pc_pattern, line).group(1)
|
instr_data["PC"] = re.search(pc_pattern, line).group(1)
|
||||||
instr_data["core_id"] = re.search(core_id_pattern, line).group(1)
|
instr_data["core_id"] = int(re.search(core_id_pattern, line).group(1))
|
||||||
instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1)
|
instr_data["warp_id"] = int(re.search(warp_id_pattern, line).group(1))
|
||||||
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
|
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
|
||||||
instr_data["uuid"] = re.search(uuid_pattern, line).group(1)
|
instr_data["uuid"] = int(re.search(uuid_pattern, line).group(1))
|
||||||
elif line.startswith("DEBUG Instr"):
|
elif line.startswith("DEBUG Instr"):
|
||||||
instr_data["instr"] = re.search(instr_pattern, line).group(1)
|
instr_data["instr"] = re.search(instr_pattern, line).group(1)
|
||||||
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
|
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
|
||||||
|
@ -60,6 +80,7 @@ def parse_simx(log_lines):
|
||||||
instr_data["destination"] = re.search(destination_pattern, line).group(1)
|
instr_data["destination"] = re.search(destination_pattern, line).group(1)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Error at line {}: {}".format(lineno, e))
|
print("Error at line {}: {}".format(lineno, e))
|
||||||
|
instr_data = None
|
||||||
if instr_data:
|
if instr_data:
|
||||||
entries.append(instr_data)
|
entries.append(instr_data)
|
||||||
return entries
|
return entries
|
||||||
|
@ -95,7 +116,7 @@ def append_value(text, reg, value, tmask_arr, sep):
|
||||||
return text, sep
|
return text, sep
|
||||||
|
|
||||||
def parse_rtlsim(log_lines):
|
def parse_rtlsim(log_lines):
|
||||||
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=(\d+), num_barriers=(\d+)"
|
global configs
|
||||||
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
|
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
|
||||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||||
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
|
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
|
||||||
|
@ -117,36 +138,20 @@ def parse_rtlsim(log_lines):
|
||||||
uuid_pattern = r"#(\d+)"
|
uuid_pattern = r"#(\d+)"
|
||||||
entries = []
|
entries = []
|
||||||
instr_data = {}
|
instr_data = {}
|
||||||
num_threads = 0
|
num_cores = configs['num_cores']
|
||||||
num_warps = 0
|
socket_size = configs['socket_size']
|
||||||
num_cores = 0
|
num_sockets = (num_cores + socket_size - 1) // socket_size
|
||||||
num_clusters = 0
|
|
||||||
socket_size = 0
|
|
||||||
local_mem_base = 0
|
|
||||||
num_barriers = 0
|
|
||||||
num_sockets = 0
|
|
||||||
for lineno, line in enumerate(log_lines, start=1):
|
for lineno, line in enumerate(log_lines, start=1):
|
||||||
try:
|
try:
|
||||||
config_match = re.search(config_pattern, line)
|
|
||||||
if config_match:
|
|
||||||
num_threads = int(config_match.group(1))
|
|
||||||
num_warps = int(config_match.group(2))
|
|
||||||
num_cores = int(config_match.group(3))
|
|
||||||
num_clusters = int(config_match.group(4))
|
|
||||||
socket_size = int(config_match.group(5))
|
|
||||||
local_mem_base = int(config_match.group(6))
|
|
||||||
num_barriers = int(config_match.group(7))
|
|
||||||
num_sockets = (num_cores + socket_size - 1) // socket_size
|
|
||||||
continue
|
|
||||||
line_match = re.search(line_pattern, line)
|
line_match = re.search(line_pattern, line)
|
||||||
if line_match:
|
if line_match:
|
||||||
PC = re.search(pc_pattern, line).group(1)
|
PC = re.search(pc_pattern, line).group(1)
|
||||||
warp_id = re.search(warp_id_pattern, line).group(1)
|
warp_id = int(re.search(warp_id_pattern, line).group(1))
|
||||||
tmask = re.search(tmask_pattern, line).group(1)
|
tmask = re.search(tmask_pattern, line).group(1)
|
||||||
uuid = re.search(uuid_pattern, line).group(1)
|
uuid = int(re.search(uuid_pattern, line).group(1))
|
||||||
cluster_id = line_match.group(1)
|
cluster_id = int(line_match.group(1))
|
||||||
socket_id = line_match.group(2)
|
socket_id = int(line_match.group(2))
|
||||||
core_id = line_match.group(3)
|
core_id = int(line_match.group(3))
|
||||||
stage = line_match.group(4)
|
stage = line_match.group(4)
|
||||||
if stage == "decode":
|
if stage == "decode":
|
||||||
trace = {}
|
trace = {}
|
||||||
|
@ -273,7 +278,9 @@ def split_log_file(log_filename):
|
||||||
return sublogs
|
return sublogs
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
global configs
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
configs = load_config(args.log)
|
||||||
sublogs = split_log_file(args.log)
|
sublogs = split_log_file(args.log)
|
||||||
write_csv(sublogs, args.csv, args.type)
|
write_csv(sublogs, args.csv, args.type)
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ TOOLDIR ?= @TOOLDIR@
|
||||||
|
|
||||||
OSVERSION ?= @OSVERSION@
|
OSVERSION ?= @OSVERSION@
|
||||||
|
|
||||||
PREFIX ?= @PREFIX@
|
INSTALLDIR ?= @INSTALLDIR@
|
||||||
|
|
||||||
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
|
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
|
||||||
|
|
||||||
|
|
2
configure
vendored
2
configure
vendored
|
@ -63,7 +63,7 @@ copy_files() {
|
||||||
filename_no_ext="${filename%.in}"
|
filename_no_ext="${filename%.in}"
|
||||||
dest_file="$dest_dir/$filename_no_ext"
|
dest_file="$dest_dir/$filename_no_ext"
|
||||||
mkdir -p "$dest_dir"
|
mkdir -p "$dest_dir"
|
||||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file"
|
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file"
|
||||||
# apply permissions to bash scripts
|
# apply permissions to bash scripts
|
||||||
read -r firstline < "$dest_file"
|
read -r firstline < "$dest_file"
|
||||||
if [[ "$firstline" =~ ^#!.*bash ]]; then
|
if [[ "$firstline" =~ ^#!.*bash ]]; then
|
||||||
|
|
|
@ -53,9 +53,9 @@ A waveform trace `trace.vcd` will be generated in the current directory during t
|
||||||
## Analyzing Vortex trace log
|
## Analyzing Vortex trace log
|
||||||
|
|
||||||
When debugging Vortex RTL or SimX Simulator, reading the trace run.log file can be overwhelming when the trace gets really large.
|
When debugging Vortex RTL or SimX Simulator, reading the trace run.log file can be overwhelming when the trace gets really large.
|
||||||
We provide a trace sanitizer tool under ./hw/scripts/trace_csv.py that you can use to convert the large trace into a CSV file containing all the instructions that executed with their source and destination operands. To increase compatibility between traces you will need to initialize RTLSIM's GPRs to zero by defining GPR_RESET.
|
We provide a trace sanitizer tool under ./hw/scripts/trace_csv.py that you can use to convert the large trace into a CSV file containing all the instructions that executed with their source and destination operands.
|
||||||
|
|
||||||
$ CONFIGS="-DGPR_RESET" ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=3 --log=run_rtlsim.log
|
$ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=3 --log=run_rtlsim.log
|
||||||
$ ./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
$ ./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||||
|
|
||||||
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=3 --log=run_simx.log
|
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=3 --log=run_simx.log
|
||||||
|
|
|
@ -96,10 +96,11 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
||||||
.CRSQ_SIZE (`L2_CRSQ_SIZE),
|
.CRSQ_SIZE (`L2_CRSQ_SIZE),
|
||||||
.MSHR_SIZE (`L2_MSHR_SIZE),
|
.MSHR_SIZE (`L2_MSHR_SIZE),
|
||||||
.MRSQ_SIZE (`L2_MRSQ_SIZE),
|
.MRSQ_SIZE (`L2_MRSQ_SIZE),
|
||||||
.MREQ_SIZE (`L2_MREQ_SIZE),
|
.MREQ_SIZE (`L2_WRITEBACK ? `L2_MSHR_SIZE : `L2_MREQ_SIZE),
|
||||||
.TAG_WIDTH (L2_TAG_WIDTH),
|
.TAG_WIDTH (L2_TAG_WIDTH),
|
||||||
.WRITE_ENABLE (1),
|
.WRITE_ENABLE (1),
|
||||||
.WRITEBACK (`L2_WRITEBACK),
|
.WRITEBACK (`L2_WRITEBACK),
|
||||||
|
.DIRTY_BYTES (`L2_WRITEBACK),
|
||||||
.UUID_WIDTH (`UUID_WIDTH),
|
.UUID_WIDTH (`UUID_WIDTH),
|
||||||
.CORE_OUT_BUF (2),
|
.CORE_OUT_BUF (2),
|
||||||
.MEM_OUT_BUF (2),
|
.MEM_OUT_BUF (2),
|
||||||
|
|
|
@ -217,7 +217,7 @@
|
||||||
`ifndef IO_COUT_ADDR
|
`ifndef IO_COUT_ADDR
|
||||||
`define IO_COUT_ADDR `IO_BASE_ADDR
|
`define IO_COUT_ADDR `IO_BASE_ADDR
|
||||||
`endif
|
`endif
|
||||||
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
|
`define IO_COUT_SIZE 64
|
||||||
|
|
||||||
`ifndef IO_MPM_ADDR
|
`ifndef IO_MPM_ADDR
|
||||||
`define IO_MPM_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE)
|
`define IO_MPM_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE)
|
||||||
|
@ -685,7 +685,7 @@
|
||||||
|
|
||||||
// Number of Banks
|
// Number of Banks
|
||||||
`ifndef L3_NUM_BANKS
|
`ifndef L3_NUM_BANKS
|
||||||
`define L3_NUM_BANKS `MIN(4, `NUM_CLUSTERS)
|
`define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS)
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Core Response Queue Size
|
// Core Response Queue Size
|
||||||
|
@ -718,6 +718,15 @@
|
||||||
`define L3_WRITEBACK 0
|
`define L3_WRITEBACK 0
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
`ifndef MEMORY_BANKS
|
||||||
|
`define MEMORY_BANKS 8
|
||||||
|
`endif
|
||||||
|
|
||||||
|
// Number of Memory Ports from LLC
|
||||||
|
`ifndef NUM_MEM_PORTS
|
||||||
|
`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS)
|
||||||
|
`endif
|
||||||
|
|
||||||
// ISA Extensions /////////////////////////////////////////////////////////////
|
// ISA Extensions /////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`ifdef EXT_A_ENABLE
|
`ifdef EXT_A_ENABLE
|
||||||
|
|
|
@ -238,11 +238,11 @@
|
||||||
`define RESET_RELAY(dst, src) \
|
`define RESET_RELAY(dst, src) \
|
||||||
`RESET_RELAY_EX (dst, src, 1, 0)
|
`RESET_RELAY_EX (dst, src, 1, 0)
|
||||||
|
|
||||||
// size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2
|
// size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2, 5 -> 2
|
||||||
`define TO_OUT_BUF_SIZE(out_reg) `MIN(out_reg, 2)
|
`define TO_OUT_BUF_SIZE(s) `MIN(s, 2)
|
||||||
|
|
||||||
// reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2
|
// reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2, 5 > 3
|
||||||
`define TO_OUT_BUF_REG(out_reg) ((out_reg & 1) + ((out_reg >> 2) << 1))
|
`define TO_OUT_BUF_REG(s) ((s < 2) ? s : (s - 2))
|
||||||
|
|
||||||
`define REPEAT(n,f,s) `_REPEAT_``n(f,s)
|
`define REPEAT(n,f,s) `_REPEAT_``n(f,s)
|
||||||
`define _REPEAT_0(f,s)
|
`define _REPEAT_0(f,s)
|
||||||
|
|
|
@ -145,11 +145,12 @@ module VX_socket import VX_gpu_pkg::*; #(
|
||||||
.CRSQ_SIZE (`DCACHE_CRSQ_SIZE),
|
.CRSQ_SIZE (`DCACHE_CRSQ_SIZE),
|
||||||
.MSHR_SIZE (`DCACHE_MSHR_SIZE),
|
.MSHR_SIZE (`DCACHE_MSHR_SIZE),
|
||||||
.MRSQ_SIZE (`DCACHE_MRSQ_SIZE),
|
.MRSQ_SIZE (`DCACHE_MRSQ_SIZE),
|
||||||
.MREQ_SIZE (`DCACHE_MREQ_SIZE),
|
.MREQ_SIZE (`DCACHE_WRITEBACK ? `DCACHE_MSHR_SIZE : `DCACHE_MREQ_SIZE),
|
||||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||||
.UUID_WIDTH (`UUID_WIDTH),
|
.UUID_WIDTH (`UUID_WIDTH),
|
||||||
.WRITE_ENABLE (1),
|
.WRITE_ENABLE (1),
|
||||||
.WRITEBACK (`DCACHE_WRITEBACK),
|
.WRITEBACK (`DCACHE_WRITEBACK),
|
||||||
|
.DIRTY_BYTES (`DCACHE_WRITEBACK),
|
||||||
.NC_ENABLE (1),
|
.NC_ENABLE (1),
|
||||||
.CORE_OUT_BUF (2),
|
.CORE_OUT_BUF (2),
|
||||||
.MEM_OUT_BUF (2)
|
.MEM_OUT_BUF (2)
|
||||||
|
@ -178,8 +179,6 @@ module VX_socket import VX_gpu_pkg::*; #(
|
||||||
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[0], icache_mem_bus_if, L1_MEM_TAG_WIDTH, ICACHE_MEM_TAG_WIDTH);
|
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[0], icache_mem_bus_if, L1_MEM_TAG_WIDTH, ICACHE_MEM_TAG_WIDTH);
|
||||||
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[1], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
|
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[1], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
|
||||||
|
|
||||||
`RESET_RELAY (mem_arb_reset, reset);
|
|
||||||
|
|
||||||
VX_mem_arb #(
|
VX_mem_arb #(
|
||||||
.NUM_INPUTS (2),
|
.NUM_INPUTS (2),
|
||||||
.DATA_SIZE (`L1_LINE_SIZE),
|
.DATA_SIZE (`L1_LINE_SIZE),
|
||||||
|
@ -190,7 +189,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
||||||
.RSP_OUT_BUF (2)
|
.RSP_OUT_BUF (2)
|
||||||
) mem_arb (
|
) mem_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (mem_arb_reset),
|
.reset (reset),
|
||||||
.bus_in_if (l1_mem_bus_if),
|
.bus_in_if (l1_mem_bus_if),
|
||||||
.bus_out_if (l1_mem_arb_bus_if)
|
.bus_out_if (l1_mem_arb_bus_if)
|
||||||
);
|
);
|
||||||
|
|
|
@ -166,6 +166,10 @@
|
||||||
`define VX_CSR_MPM_MEM_WRITES_H 12'hB99
|
`define VX_CSR_MPM_MEM_WRITES_H 12'hB99
|
||||||
`define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency
|
`define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency
|
||||||
`define VX_CSR_MPM_MEM_LT_H 12'hB9A
|
`define VX_CSR_MPM_MEM_LT_H 12'hB9A
|
||||||
|
`define VX_CSR_MPM_MEM_BANK_CNTR 12'hB1E // memory bank requests
|
||||||
|
`define VX_CSR_MPM_MEM_BANK_CNTR_H 12'hB9E
|
||||||
|
`define VX_CSR_MPM_MEM_BANK_TICK 12'hB1F // memory ticks
|
||||||
|
`define VX_CSR_MPM_MEM_BANK_TICK_H 12'hB9F
|
||||||
// PERF: lmem
|
// PERF: lmem
|
||||||
`define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads
|
`define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads
|
||||||
`define VX_CSR_MPM_LMEM_READS_H 12'hB9B
|
`define VX_CSR_MPM_LMEM_READS_H 12'hB9B
|
||||||
|
|
|
@ -80,10 +80,11 @@ module Vortex import VX_gpu_pkg::*; (
|
||||||
.CRSQ_SIZE (`L3_CRSQ_SIZE),
|
.CRSQ_SIZE (`L3_CRSQ_SIZE),
|
||||||
.MSHR_SIZE (`L3_MSHR_SIZE),
|
.MSHR_SIZE (`L3_MSHR_SIZE),
|
||||||
.MRSQ_SIZE (`L3_MRSQ_SIZE),
|
.MRSQ_SIZE (`L3_MRSQ_SIZE),
|
||||||
.MREQ_SIZE (`L3_MREQ_SIZE),
|
.MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE),
|
||||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
|
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
|
||||||
.WRITE_ENABLE (1),
|
.WRITE_ENABLE (1),
|
||||||
.WRITEBACK (`L3_WRITEBACK),
|
.WRITEBACK (`L3_WRITEBACK),
|
||||||
|
.DIRTY_BYTES (`L3_WRITEBACK),
|
||||||
.UUID_WIDTH (`UUID_WIDTH),
|
.UUID_WIDTH (`UUID_WIDTH),
|
||||||
.CORE_OUT_BUF (2),
|
.CORE_OUT_BUF (2),
|
||||||
.MEM_OUT_BUF (2),
|
.MEM_OUT_BUF (2),
|
||||||
|
@ -192,12 +193,12 @@ module Vortex import VX_gpu_pkg::*; (
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (mem_req_fire) begin
|
if (mem_req_fire) begin
|
||||||
if (mem_req_rw)
|
if (mem_req_rw)
|
||||||
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data));
|
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data));
|
||||||
else
|
else
|
||||||
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen));
|
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen));
|
||||||
end
|
end
|
||||||
if (mem_rsp_fire) begin
|
if (mem_rsp_fire) begin
|
||||||
`TRACE(1, ("%d: MEM Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag, mem_rsp_data));
|
`TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data));
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
|
@ -240,13 +240,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
||||||
MMIO_CMD_ARG0: begin
|
MMIO_CMD_ARG0: begin
|
||||||
cmd_args[0] <= 64'(cp2af_sRxPort.c0.data);
|
cmd_args[0] <= 64'(cp2af_sRxPort.c0.data);
|
||||||
`ifdef DBG_TRACE_AFU
|
`ifdef DBG_TRACE_AFU
|
||||||
`TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%0h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
`TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||||
`endif
|
`endif
|
||||||
end
|
end
|
||||||
MMIO_CMD_ARG1: begin
|
MMIO_CMD_ARG1: begin
|
||||||
cmd_args[1] <= 64'(cp2af_sRxPort.c0.data);
|
cmd_args[1] <= 64'(cp2af_sRxPort.c0.data);
|
||||||
`ifdef DBG_TRACE_AFU
|
`ifdef DBG_TRACE_AFU
|
||||||
`TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%0h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
`TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||||
`endif
|
`endif
|
||||||
end
|
end
|
||||||
MMIO_CMD_ARG2: begin
|
MMIO_CMD_ARG2: begin
|
||||||
|
@ -263,13 +263,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
||||||
`ifdef SCOPE
|
`ifdef SCOPE
|
||||||
MMIO_SCOPE_WRITE: begin
|
MMIO_SCOPE_WRITE: begin
|
||||||
`ifdef DBG_TRACE_AFU
|
`ifdef DBG_TRACE_AFU
|
||||||
`TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%0h\n", $time, cmd_scope_wdata));
|
`TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata));
|
||||||
`endif
|
`endif
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
default: begin
|
default: begin
|
||||||
`ifdef DBG_TRACE_AFU
|
`ifdef DBG_TRACE_AFU
|
||||||
`TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%0h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)));
|
`TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)));
|
||||||
`endif
|
`endif
|
||||||
end
|
end
|
||||||
endcase
|
endcase
|
||||||
|
@ -305,14 +305,14 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
||||||
MMIO_SCOPE_READ: begin
|
MMIO_SCOPE_READ: begin
|
||||||
mmio_tx.data <= cmd_scope_rdata;
|
mmio_tx.data <= cmd_scope_rdata;
|
||||||
`ifdef DBG_TRACE_AFU
|
`ifdef DBG_TRACE_AFU
|
||||||
`TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%0h\n", $time, cmd_scope_rdata));
|
`TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata));
|
||||||
`endif
|
`endif
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
MMIO_DEV_CAPS: begin
|
MMIO_DEV_CAPS: begin
|
||||||
mmio_tx.data <= dev_caps;
|
mmio_tx.data <= dev_caps;
|
||||||
`ifdef DBG_TRACE_AFU
|
`ifdef DBG_TRACE_AFU
|
||||||
`TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%0h\n", $time, dev_caps));
|
`TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps));
|
||||||
`endif
|
`endif
|
||||||
end
|
end
|
||||||
MMIO_ISA_CAPS: begin
|
MMIO_ISA_CAPS: begin
|
||||||
|
@ -580,8 +580,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
||||||
.TAG_WIDTH (AVS_REQ_TAGW+1)
|
.TAG_WIDTH (AVS_REQ_TAGW+1)
|
||||||
) mem_bus_if[1]();
|
) mem_bus_if[1]();
|
||||||
|
|
||||||
`RESET_RELAY (mem_arb_reset, reset);
|
|
||||||
|
|
||||||
VX_mem_arb #(
|
VX_mem_arb #(
|
||||||
.NUM_INPUTS (2),
|
.NUM_INPUTS (2),
|
||||||
.DATA_SIZE (LMEM_DATA_SIZE),
|
.DATA_SIZE (LMEM_DATA_SIZE),
|
||||||
|
@ -592,7 +590,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
||||||
.RSP_OUT_BUF (0)
|
.RSP_OUT_BUF (0)
|
||||||
) mem_arb (
|
) mem_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (mem_arb_reset),
|
.reset (reset),
|
||||||
.bus_in_if (cci_vx_mem_bus_if),
|
.bus_in_if (cci_vx_mem_bus_if),
|
||||||
.bus_out_if (mem_bus_if)
|
.bus_out_if (mem_bus_if)
|
||||||
);
|
);
|
||||||
|
@ -760,7 +758,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
||||||
cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE);
|
cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE);
|
||||||
end
|
end
|
||||||
`ifdef DBG_TRACE_AFU
|
`ifdef DBG_TRACE_AFU
|
||||||
`TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%0h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data));
|
`TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data));
|
||||||
`endif
|
`endif
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -778,14 +776,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
`RESET_RELAY (cci_rdq_reset, reset);
|
|
||||||
|
|
||||||
VX_fifo_queue #(
|
VX_fifo_queue #(
|
||||||
.DATAW (CCI_RD_QUEUE_DATAW),
|
.DATAW (CCI_RD_QUEUE_DATAW),
|
||||||
.DEPTH (CCI_RD_QUEUE_SIZE)
|
.DEPTH (CCI_RD_QUEUE_SIZE)
|
||||||
) cci_rd_req_queue (
|
) cci_rd_req_queue (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (cci_rdq_reset),
|
.reset (reset),
|
||||||
.push (cci_rdq_push),
|
.push (cci_rdq_push),
|
||||||
.pop (cci_rdq_pop),
|
.pop (cci_rdq_pop),
|
||||||
.data_in (cci_rdq_din),
|
.data_in (cci_rdq_din),
|
||||||
|
@ -906,7 +902,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
||||||
cci_wr_req_done <= 1;
|
cci_wr_req_done <= 1;
|
||||||
end
|
end
|
||||||
`ifdef DBG_TRACE_AFU
|
`ifdef DBG_TRACE_AFU
|
||||||
`TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%0h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data));
|
`TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data));
|
||||||
`endif
|
`endif
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1093,13 +1089,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin
|
for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin
|
||||||
if (avs_write[i] && ~avs_waitrequest[i]) begin
|
if (avs_write[i] && ~avs_waitrequest[i]) begin
|
||||||
`TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]));
|
`TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]));
|
||||||
end
|
end
|
||||||
if (avs_read[i] && ~avs_waitrequest[i]) begin
|
if (avs_read[i] && ~avs_waitrequest[i]) begin
|
||||||
`TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i]));
|
`TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i]));
|
||||||
end
|
end
|
||||||
if (avs_readdatavalid[i]) begin
|
if (avs_readdatavalid[i]) begin
|
||||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%0h\n", $time, i, avs_readdata[i]));
|
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i]));
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -377,13 +377,13 @@ module VX_afu_wrap #(
|
||||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]));
|
`TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]));
|
||||||
end
|
end
|
||||||
if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin
|
if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin
|
||||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%0h\n", $time, i, m_axi_mem_wdata_a[i]));
|
`TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i]));
|
||||||
end
|
end
|
||||||
if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin
|
if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin
|
||||||
`TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i]));
|
`TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i]));
|
||||||
end
|
end
|
||||||
if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin
|
if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin
|
||||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]));
|
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]));
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
94
hw/rtl/cache/VX_bank_flush.sv
vendored
94
hw/rtl/cache/VX_bank_flush.sv
vendored
|
@ -14,6 +14,7 @@
|
||||||
`include "VX_cache_define.vh"
|
`include "VX_cache_define.vh"
|
||||||
|
|
||||||
module VX_bank_flush #(
|
module VX_bank_flush #(
|
||||||
|
parameter BANK_ID = 0,
|
||||||
// Size of cache in bytes
|
// Size of cache in bytes
|
||||||
parameter CACHE_SIZE = 1024,
|
parameter CACHE_SIZE = 1024,
|
||||||
// Size of line inside a bank in bytes
|
// Size of line inside a bank in bytes
|
||||||
|
@ -27,33 +28,36 @@ module VX_bank_flush #(
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
input wire flush_in_valid,
|
input wire flush_begin,
|
||||||
output wire flush_in_ready,
|
output wire flush_end,
|
||||||
output wire flush_out_init,
|
output wire flush_init,
|
||||||
output wire flush_out_valid,
|
output wire flush_valid,
|
||||||
output wire [`CS_LINE_SEL_BITS-1:0] flush_out_line,
|
output wire [`CS_LINE_SEL_BITS-1:0] flush_line,
|
||||||
output wire [NUM_WAYS-1:0] flush_out_way,
|
output wire [NUM_WAYS-1:0] flush_way,
|
||||||
input wire flush_out_ready,
|
input wire flush_ready,
|
||||||
input wire mshr_empty
|
input wire mshr_empty,
|
||||||
|
input wire bank_empty
|
||||||
);
|
);
|
||||||
parameter CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);
|
// ways interation is only needed when eviction is enabled
|
||||||
|
localparam CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);
|
||||||
|
|
||||||
parameter STATE_IDLE = 2'd0;
|
localparam STATE_IDLE = 0;
|
||||||
parameter STATE_INIT = 2'd1;
|
localparam STATE_INIT = 1;
|
||||||
parameter STATE_FLUSH = 2'd2;
|
localparam STATE_WAIT1 = 2;
|
||||||
|
localparam STATE_FLUSH = 3;
|
||||||
|
localparam STATE_WAIT2 = 4;
|
||||||
|
localparam STATE_DONE = 5;
|
||||||
|
|
||||||
|
reg [2:0] state_r, state_n;
|
||||||
|
|
||||||
reg [CTR_WIDTH-1:0] counter_r;
|
reg [CTR_WIDTH-1:0] counter_r;
|
||||||
reg [1:0] state_r, state_n;
|
|
||||||
reg flush_in_ready_r, flush_in_ready_n;
|
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
state_n = state_r;
|
state_n = state_r;
|
||||||
flush_in_ready_n = 0;
|
|
||||||
case (state_r)
|
case (state_r)
|
||||||
// STATE_IDLE
|
STATE_IDLE: begin
|
||||||
default: begin
|
if (flush_begin) begin
|
||||||
if (flush_in_valid && mshr_empty) begin
|
state_n = STATE_WAIT1;
|
||||||
state_n = STATE_FLUSH;
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
STATE_INIT: begin
|
STATE_INIT: begin
|
||||||
|
@ -61,25 +65,41 @@ module VX_bank_flush #(
|
||||||
state_n = STATE_IDLE;
|
state_n = STATE_IDLE;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
STATE_FLUSH: begin
|
STATE_WAIT1: begin
|
||||||
if (counter_r == ((2 ** CTR_WIDTH)-1)) begin
|
// wait for pending requests to complete
|
||||||
state_n = STATE_IDLE;
|
if (mshr_empty) begin
|
||||||
flush_in_ready_n = 1;
|
state_n = STATE_FLUSH;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
STATE_FLUSH: begin
|
||||||
|
if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
|
||||||
|
state_n = (BANK_ID == 0) ? STATE_DONE : STATE_WAIT2;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
STATE_WAIT2: begin
|
||||||
|
// ensure the bank is empty before notifying the cache flush unit,
|
||||||
|
// because the flush request to lower caches only goes through bank0
|
||||||
|
// and it is important that request gets send out last.
|
||||||
|
if (bank_empty) begin
|
||||||
|
state_n = STATE_DONE;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
STATE_DONE: begin
|
||||||
|
// generate a completion pulse
|
||||||
|
state_n = STATE_IDLE;
|
||||||
|
end
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
state_r <= STATE_INIT;
|
state_r <= STATE_INIT;
|
||||||
counter_r <= '0;
|
counter_r <= '0;
|
||||||
flush_in_ready_r <= '0;
|
|
||||||
end else begin
|
end else begin
|
||||||
state_r <= state_n;
|
state_r <= state_n;
|
||||||
flush_in_ready_r <= flush_in_ready_n;
|
|
||||||
if (state_r != STATE_IDLE) begin
|
if (state_r != STATE_IDLE) begin
|
||||||
if ((state_r == STATE_INIT) || flush_out_ready) begin
|
if ((state_r == STATE_INIT)
|
||||||
|
|| ((state_r == STATE_FLUSH) && flush_ready)) begin
|
||||||
counter_r <= counter_r + CTR_WIDTH'(1);
|
counter_r <= counter_r + CTR_WIDTH'(1);
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
|
@ -88,22 +108,20 @@ module VX_bank_flush #(
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign flush_in_ready = flush_in_ready_r;
|
assign flush_end = (state_r == STATE_DONE);
|
||||||
|
assign flush_init = (state_r == STATE_INIT);
|
||||||
assign flush_out_init = (state_r == STATE_INIT);
|
assign flush_valid = (state_r == STATE_FLUSH);
|
||||||
|
assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
||||||
assign flush_out_valid = (state_r == STATE_FLUSH);
|
|
||||||
assign flush_out_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
|
||||||
|
|
||||||
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin
|
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin
|
||||||
reg [NUM_WAYS-1:0] flush_out_way_r;
|
reg [NUM_WAYS-1:0] flush_way_r;
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
flush_out_way_r = '0;
|
flush_way_r = '0;
|
||||||
flush_out_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
|
flush_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
|
||||||
end
|
end
|
||||||
assign flush_out_way = flush_out_way_r;
|
assign flush_way = flush_way_r;
|
||||||
end else begin
|
end else begin
|
||||||
assign flush_out_way = {NUM_WAYS{1'b1}};
|
assign flush_way = {NUM_WAYS{1'b1}};
|
||||||
end
|
end
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|
52
hw/rtl/cache/VX_cache.sv
vendored
52
hw/rtl/cache/VX_cache.sv
vendored
|
@ -45,6 +45,9 @@ module VX_cache import VX_gpu_pkg::*; #(
|
||||||
// Enable cache writeback
|
// Enable cache writeback
|
||||||
parameter WRITEBACK = 0,
|
parameter WRITEBACK = 0,
|
||||||
|
|
||||||
|
// Enable dirty bytes on writeback
|
||||||
|
parameter DIRTY_BYTES = 0,
|
||||||
|
|
||||||
// Request debug identifier
|
// Request debug identifier
|
||||||
parameter UUID_WIDTH = 0,
|
parameter UUID_WIDTH = 0,
|
||||||
|
|
||||||
|
@ -69,8 +72,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
||||||
VX_mem_bus_if.master mem_bus_if
|
VX_mem_bus_if.master mem_bus_if
|
||||||
);
|
);
|
||||||
|
|
||||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter: number of banks must be power of 2"))
|
||||||
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter"))
|
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable"))
|
||||||
|
`STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback"))
|
||||||
|
|
||||||
|
// In writeback mode, memory fill response may issue a new memory request to handle evicted blocks.
|
||||||
|
// We need to ensure that the memory request queue never fills up to avoid deadlock.
|
||||||
|
`STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE"))
|
||||||
|
|
||||||
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
|
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
|
||||||
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
|
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
|
||||||
|
@ -101,26 +109,23 @@ module VX_cache import VX_gpu_pkg::*; #(
|
||||||
.TAG_WIDTH (TAG_WIDTH)
|
.TAG_WIDTH (TAG_WIDTH)
|
||||||
) core_bus2_if[NUM_REQS]();
|
) core_bus2_if[NUM_REQS]();
|
||||||
|
|
||||||
wire [NUM_BANKS-1:0] per_bank_flush_valid;
|
wire [NUM_BANKS-1:0] per_bank_flush_begin;
|
||||||
wire [NUM_BANKS-1:0] per_bank_flush_ready;
|
wire [NUM_BANKS-1:0] per_bank_flush_end;
|
||||||
|
|
||||||
wire [NUM_BANKS-1:0] per_bank_core_req_fire;
|
wire [NUM_BANKS-1:0] per_bank_core_req_fire;
|
||||||
|
|
||||||
// this reset relay is required to sync with bank initialization
|
|
||||||
`RESET_RELAY (flush_reset, reset);
|
|
||||||
|
|
||||||
VX_cache_flush #(
|
VX_cache_flush #(
|
||||||
.NUM_REQS (NUM_REQS),
|
.NUM_REQS (NUM_REQS),
|
||||||
.NUM_BANKS (NUM_BANKS),
|
.NUM_BANKS (NUM_BANKS),
|
||||||
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
|
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
|
||||||
) flush_unit (
|
) flush_unit (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (flush_reset),
|
.reset (reset),
|
||||||
.core_bus_in_if (core_bus_if),
|
.core_bus_in_if (core_bus_if),
|
||||||
.core_bus_out_if (core_bus2_if),
|
.core_bus_out_if (core_bus2_if),
|
||||||
.bank_req_fire (per_bank_core_req_fire),
|
.bank_req_fire (per_bank_core_req_fire),
|
||||||
.flush_valid (per_bank_flush_valid),
|
.flush_begin (per_bank_flush_begin),
|
||||||
.flush_ready (per_bank_flush_ready)
|
.flush_end (per_bank_flush_end)
|
||||||
);
|
);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -131,9 +136,9 @@ module VX_cache import VX_gpu_pkg::*; #(
|
||||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
|
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
|
||||||
wire [NUM_REQS-1:0] core_rsp_ready_s;
|
wire [NUM_REQS-1:0] core_rsp_ready_s;
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
`RESET_RELAY_EX (core_rsp_reset, reset, NUM_REQS, `MAX_FANOUT);
|
||||||
|
|
||||||
`RESET_RELAY (core_rsp_reset, reset);
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
|
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
|
||||||
|
@ -141,7 +146,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
||||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||||
) core_rsp_buf (
|
) core_rsp_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (core_rsp_reset),
|
.reset (core_rsp_reset[i]),
|
||||||
.valid_in (core_rsp_valid_s[i]),
|
.valid_in (core_rsp_valid_s[i]),
|
||||||
.ready_in (core_rsp_ready_s[i]),
|
.ready_in (core_rsp_ready_s[i]),
|
||||||
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
|
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
|
||||||
|
@ -165,15 +170,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
||||||
|
|
||||||
wire mem_bus_if_flush;
|
wire mem_bus_if_flush;
|
||||||
|
|
||||||
`RESET_RELAY (mem_req_reset, reset);
|
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||||
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||||
) mem_req_buf (
|
) mem_req_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (mem_req_reset),
|
.reset (reset),
|
||||||
.valid_in (mem_req_valid_s),
|
.valid_in (mem_req_valid_s),
|
||||||
.ready_in (mem_req_ready_s),
|
.ready_in (mem_req_ready_s),
|
||||||
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}),
|
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}),
|
||||||
|
@ -192,15 +195,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
||||||
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
|
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||||
wire mem_rsp_ready_s;
|
wire mem_rsp_ready_s;
|
||||||
|
|
||||||
`RESET_RELAY (mem_rsp_reset, reset);
|
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
|
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
|
||||||
.SIZE (MRSQ_SIZE),
|
.SIZE (MRSQ_SIZE),
|
||||||
.OUT_REG (MRSQ_SIZE > 2)
|
.OUT_REG (MRSQ_SIZE > 2)
|
||||||
) mem_rsp_queue (
|
) mem_rsp_queue (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (mem_rsp_reset),
|
.reset (reset),
|
||||||
.valid_in (mem_bus_if.rsp_valid),
|
.valid_in (mem_bus_if.rsp_valid),
|
||||||
.ready_in (mem_bus_if.rsp_ready),
|
.ready_in (mem_bus_if.rsp_ready),
|
||||||
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
|
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
|
||||||
|
@ -316,6 +317,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
||||||
.NUM_OUTPUTS (NUM_BANKS),
|
.NUM_OUTPUTS (NUM_BANKS),
|
||||||
.DATAW (CORE_REQ_DATAW),
|
.DATAW (CORE_REQ_DATAW),
|
||||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||||
|
.ARBITER ("F"),
|
||||||
.OUT_BUF (REQ_XBAR_BUF)
|
.OUT_BUF (REQ_XBAR_BUF)
|
||||||
) req_xbar (
|
) req_xbar (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
@ -373,6 +375,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
||||||
.MSHR_SIZE (MSHR_SIZE),
|
.MSHR_SIZE (MSHR_SIZE),
|
||||||
.MREQ_SIZE (MREQ_SIZE),
|
.MREQ_SIZE (MREQ_SIZE),
|
||||||
.WRITE_ENABLE (WRITE_ENABLE),
|
.WRITE_ENABLE (WRITE_ENABLE),
|
||||||
|
.DIRTY_BYTES (DIRTY_BYTES),
|
||||||
.WRITEBACK (WRITEBACK),
|
.WRITEBACK (WRITEBACK),
|
||||||
.UUID_WIDTH (UUID_WIDTH),
|
.UUID_WIDTH (UUID_WIDTH),
|
||||||
.TAG_WIDTH (TAG_WIDTH),
|
.TAG_WIDTH (TAG_WIDTH),
|
||||||
|
@ -423,8 +426,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
||||||
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
|
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
|
||||||
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
|
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
|
||||||
|
|
||||||
.flush_valid (per_bank_flush_valid[bank_id]),
|
.flush_begin (per_bank_flush_begin[bank_id]),
|
||||||
.flush_ready (per_bank_flush_ready[bank_id])
|
.flush_end (per_bank_flush_end[bank_id])
|
||||||
);
|
);
|
||||||
|
|
||||||
if (NUM_BANKS == 1) begin
|
if (NUM_BANKS == 1) begin
|
||||||
|
@ -448,7 +451,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
||||||
VX_stream_xbar #(
|
VX_stream_xbar #(
|
||||||
.NUM_INPUTS (NUM_BANKS),
|
.NUM_INPUTS (NUM_BANKS),
|
||||||
.NUM_OUTPUTS (NUM_REQS),
|
.NUM_OUTPUTS (NUM_REQS),
|
||||||
.DATAW (CORE_RSP_DATAW)
|
.DATAW (CORE_RSP_DATAW),
|
||||||
|
.ARBITER ("F")
|
||||||
) rsp_xbar (
|
) rsp_xbar (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (rsp_xbar_reset),
|
.reset (rsp_xbar_reset),
|
||||||
|
@ -494,15 +498,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
||||||
};
|
};
|
||||||
end
|
end
|
||||||
|
|
||||||
`RESET_RELAY (mem_arb_reset, reset);
|
|
||||||
|
|
||||||
VX_stream_arb #(
|
VX_stream_arb #(
|
||||||
.NUM_INPUTS (NUM_BANKS),
|
.NUM_INPUTS (NUM_BANKS),
|
||||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1),
|
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1),
|
||||||
.ARBITER ("F")
|
.ARBITER ("F")
|
||||||
) mem_req_arb (
|
) mem_req_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (mem_arb_reset),
|
.reset (reset),
|
||||||
.valid_in (per_bank_mem_req_valid),
|
.valid_in (per_bank_mem_req_valid),
|
||||||
.ready_in (per_bank_mem_req_ready),
|
.ready_in (per_bank_mem_req_ready),
|
||||||
.data_in (data_in),
|
.data_in (data_in),
|
||||||
|
|
238
hw/rtl/cache/VX_cache_bank.sv
vendored
238
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -44,6 +44,9 @@ module VX_cache_bank #(
|
||||||
// Enable cache writeback
|
// Enable cache writeback
|
||||||
parameter WRITEBACK = 0,
|
parameter WRITEBACK = 0,
|
||||||
|
|
||||||
|
// Enable dirty bytes on writeback
|
||||||
|
parameter DIRTY_BYTES = 0,
|
||||||
|
|
||||||
// Request debug identifier
|
// Request debug identifier
|
||||||
parameter UUID_WIDTH = 0,
|
parameter UUID_WIDTH = 0,
|
||||||
|
|
||||||
|
@ -105,8 +108,8 @@ module VX_cache_bank #(
|
||||||
output wire mem_rsp_ready,
|
output wire mem_rsp_ready,
|
||||||
|
|
||||||
// flush
|
// flush
|
||||||
input wire flush_valid,
|
input wire flush_begin,
|
||||||
output wire flush_ready
|
output wire flush_end
|
||||||
);
|
);
|
||||||
|
|
||||||
localparam PIPELINE_STAGES = 2;
|
localparam PIPELINE_STAGES = 2;
|
||||||
|
@ -117,6 +120,7 @@ module VX_cache_bank #(
|
||||||
|
|
||||||
wire crsp_queue_stall;
|
wire crsp_queue_stall;
|
||||||
wire mshr_alm_full;
|
wire mshr_alm_full;
|
||||||
|
wire mreq_queue_empty;
|
||||||
wire mreq_queue_alm_full;
|
wire mreq_queue_alm_full;
|
||||||
|
|
||||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
|
wire [`CS_LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
|
||||||
|
@ -132,11 +136,12 @@ module VX_cache_bank #(
|
||||||
wire [MSHR_ADDR_WIDTH-1:0] replay_id;
|
wire [MSHR_ADDR_WIDTH-1:0] replay_id;
|
||||||
wire replay_ready;
|
wire replay_ready;
|
||||||
|
|
||||||
wire is_init_st0;
|
wire is_init_st0, is_init_st1;
|
||||||
wire is_flush_st0, is_flush_st1;
|
wire is_flush_st0, is_flush_st1;
|
||||||
wire [NUM_WAYS-1:0] flush_way_st0;
|
wire [NUM_WAYS-1:0] flush_way_st0;
|
||||||
|
|
||||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
|
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
|
||||||
|
wire [`CS_LINE_SEL_BITS-1:0] line_sel_st0, line_sel_st1;
|
||||||
wire rw_sel, rw_st0, rw_st1;
|
wire rw_sel, rw_st0, rw_st1;
|
||||||
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
|
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
|
||||||
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
|
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
|
||||||
|
@ -149,7 +154,8 @@ module VX_cache_bank #(
|
||||||
wire is_creq_st0, is_creq_st1;
|
wire is_creq_st0, is_creq_st1;
|
||||||
wire is_fill_st0, is_fill_st1;
|
wire is_fill_st0, is_fill_st1;
|
||||||
wire is_replay_st0, is_replay_st1;
|
wire is_replay_st0, is_replay_st1;
|
||||||
wire creq_flush_st0, creq_flush_st1;
|
wire creq_flush_sel, creq_flush_st0, creq_flush_st1;
|
||||||
|
wire evict_dirty_st0, evict_dirty_st1;
|
||||||
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
||||||
wire [NUM_WAYS-1:0] tag_matches_st0;
|
wire [NUM_WAYS-1:0] tag_matches_st0;
|
||||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
|
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
|
||||||
|
@ -157,73 +163,82 @@ module VX_cache_bank #(
|
||||||
wire mshr_pending_st0, mshr_pending_st1;
|
wire mshr_pending_st0, mshr_pending_st1;
|
||||||
wire mshr_empty;
|
wire mshr_empty;
|
||||||
|
|
||||||
wire line_flush_valid;
|
wire flush_valid;
|
||||||
wire line_flush_init;
|
wire init_valid;
|
||||||
wire [`CS_LINE_SEL_BITS-1:0] line_flush_sel;
|
wire [`CS_LINE_SEL_BITS-1:0] flush_sel;
|
||||||
wire [NUM_WAYS-1:0] line_flush_way;
|
wire [NUM_WAYS-1:0] flush_way;
|
||||||
wire line_flush_ready;
|
wire flush_ready;
|
||||||
|
|
||||||
|
// ensure we have no pending memory request in the bank
|
||||||
|
wire no_pending_req = ~valid_st0 && ~valid_st1 && mreq_queue_empty;
|
||||||
|
|
||||||
// flush unit
|
// flush unit
|
||||||
VX_bank_flush #(
|
VX_bank_flush #(
|
||||||
|
.BANK_ID (BANK_ID),
|
||||||
.CACHE_SIZE (CACHE_SIZE),
|
.CACHE_SIZE (CACHE_SIZE),
|
||||||
.LINE_SIZE (LINE_SIZE),
|
.LINE_SIZE (LINE_SIZE),
|
||||||
.NUM_BANKS (NUM_BANKS),
|
.NUM_BANKS (NUM_BANKS),
|
||||||
.NUM_WAYS (NUM_WAYS),
|
.NUM_WAYS (NUM_WAYS),
|
||||||
.WRITEBACK (WRITEBACK)
|
.WRITEBACK (WRITEBACK)
|
||||||
) flush_unit (
|
) flush_unit (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.flush_in_valid (flush_valid),
|
.flush_begin (flush_begin),
|
||||||
.flush_in_ready (flush_ready),
|
.flush_end (flush_end),
|
||||||
.flush_out_init (line_flush_init),
|
.flush_init (init_valid),
|
||||||
.flush_out_valid (line_flush_valid),
|
.flush_valid (flush_valid),
|
||||||
.flush_out_line (line_flush_sel),
|
.flush_line (flush_sel),
|
||||||
.flush_out_way (line_flush_way),
|
.flush_way (flush_way),
|
||||||
.flush_out_ready (line_flush_ready),
|
.flush_ready (flush_ready),
|
||||||
.mshr_empty (mshr_empty)
|
.mshr_empty (mshr_empty),
|
||||||
|
.bank_empty (no_pending_req)
|
||||||
);
|
);
|
||||||
|
|
||||||
wire rdw_hazard_st0;
|
wire rdw_hazard1_sel;
|
||||||
reg rdw_hazard_st1;
|
wire rdw_hazard2_sel;
|
||||||
|
reg rdw_hazard3_st1;
|
||||||
|
|
||||||
wire pipe_stall = crsp_queue_stall || rdw_hazard_st1;
|
wire pipe_stall = crsp_queue_stall || rdw_hazard3_st1;
|
||||||
|
|
||||||
// inputs arbitration:
|
// inputs arbitration:
|
||||||
// mshr replay has highest priority to maximize utilization since there is no miss.
|
// mshr replay has highest priority to maximize utilization since there is no miss.
|
||||||
// handle memory responses next to prevent deadlock with potential memory request from a miss.
|
// handle memory responses next to prevent deadlock with potential memory request from a miss.
|
||||||
// flush has precedence over core requests to ensure that the cache is in a consistent state.
|
// flush has precedence over core requests to ensure that the cache is in a consistent state.
|
||||||
wire replay_grant = ~line_flush_init;
|
wire replay_grant = ~init_valid;
|
||||||
wire replay_enable = replay_grant && replay_valid;
|
wire replay_enable = replay_grant && replay_valid;
|
||||||
|
|
||||||
wire fill_grant = ~line_flush_init && ~replay_enable;
|
wire fill_grant = ~init_valid && ~replay_enable;
|
||||||
wire fill_enable = fill_grant && mem_rsp_valid;
|
wire fill_enable = fill_grant && mem_rsp_valid;
|
||||||
|
|
||||||
wire flush_grant = ~line_flush_init && ~replay_enable && ~fill_enable;
|
wire flush_grant = ~init_valid && ~replay_enable && ~fill_enable;
|
||||||
wire flush_enable = flush_grant && line_flush_valid;
|
wire flush_enable = flush_grant && flush_valid;
|
||||||
|
|
||||||
wire creq_grant = ~line_flush_init && ~replay_enable && ~fill_enable && ~flush_enable;
|
wire creq_grant = ~init_valid && ~replay_enable && ~fill_enable && ~flush_enable;
|
||||||
wire creq_enable = creq_grant && core_req_valid;
|
wire creq_enable = creq_grant && core_req_valid;
|
||||||
|
|
||||||
assign replay_ready = replay_grant
|
assign replay_ready = replay_grant
|
||||||
&& ~rdw_hazard_st0
|
&& ~rdw_hazard1_sel
|
||||||
&& ~pipe_stall;
|
&& ~pipe_stall;
|
||||||
|
|
||||||
assign mem_rsp_ready = fill_grant
|
assign mem_rsp_ready = fill_grant
|
||||||
|
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||||
|
&& ~rdw_hazard2_sel
|
||||||
&& ~pipe_stall;
|
&& ~pipe_stall;
|
||||||
|
|
||||||
assign line_flush_ready = flush_grant
|
assign flush_ready = flush_grant
|
||||||
&& ~mreq_queue_alm_full
|
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||||
&& ~pipe_stall;
|
&& ~rdw_hazard2_sel
|
||||||
|
&& ~pipe_stall;
|
||||||
|
|
||||||
assign core_req_ready = creq_grant
|
assign core_req_ready = creq_grant
|
||||||
&& ~mreq_queue_alm_full
|
&& ~mreq_queue_alm_full
|
||||||
&& ~mshr_alm_full
|
&& ~mshr_alm_full
|
||||||
&& ~pipe_stall;
|
&& ~pipe_stall;
|
||||||
|
|
||||||
wire init_fire = line_flush_init;
|
wire init_fire = init_valid;
|
||||||
wire replay_fire = replay_valid && replay_ready;
|
wire replay_fire = replay_valid && replay_ready;
|
||||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||||
wire flush_fire = line_flush_valid && line_flush_ready;
|
wire flush_fire = flush_valid && flush_ready;
|
||||||
wire core_req_fire = core_req_valid && core_req_ready;
|
wire core_req_fire = core_req_valid && core_req_ready;
|
||||||
|
|
||||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
|
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
|
||||||
|
@ -232,8 +247,9 @@ module VX_cache_bank #(
|
||||||
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
|
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
|
||||||
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
|
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
|
||||||
assign tag_sel = replay_valid ? replay_tag : core_req_tag;
|
assign tag_sel = replay_valid ? replay_tag : core_req_tag;
|
||||||
|
assign creq_flush_sel = core_req_valid && core_req_flush;
|
||||||
|
|
||||||
assign addr_sel = (line_flush_init | line_flush_valid) ? `CS_LINE_ADDR_WIDTH'(line_flush_sel) :
|
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
|
||||||
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||||
|
|
||||||
if (WRITE_ENABLE) begin
|
if (WRITE_ENABLE) begin
|
||||||
|
@ -260,8 +276,8 @@ module VX_cache_bank #(
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.enable (~pipe_stall),
|
.enable (~pipe_stall),
|
||||||
.data_in ({valid_sel, line_flush_init, replay_enable, fill_enable, flush_enable, creq_enable, core_req_flush, line_flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
||||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||||
);
|
);
|
||||||
|
|
||||||
if (UUID_WIDTH != 0) begin
|
if (UUID_WIDTH != 0) begin
|
||||||
|
@ -273,18 +289,20 @@ module VX_cache_bank #(
|
||||||
wire do_init_st0 = valid_st0 && is_init_st0;
|
wire do_init_st0 = valid_st0 && is_init_st0;
|
||||||
wire do_flush_st0 = valid_st0 && is_flush_st0;
|
wire do_flush_st0 = valid_st0 && is_flush_st0;
|
||||||
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
|
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
|
||||||
|
wire do_creq_wr_st0 = valid_st0 && is_creq_st0 && rw_st0;
|
||||||
wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0;
|
wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0;
|
||||||
|
wire do_replay_wr_st0 = valid_st0 && is_replay_st0 && rw_st0;
|
||||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||||
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_init_st0);
|
|
||||||
|
|
||||||
wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0;
|
wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0;
|
||||||
|
wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0;
|
||||||
|
wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0;
|
||||||
|
|
||||||
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
||||||
|
|
||||||
wire [NUM_WAYS-1:0] repl_way_st0;
|
assign line_sel_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
|
||||||
wire [`CS_TAG_SEL_BITS-1:0] repl_tag_st0;
|
|
||||||
|
|
||||||
`RESET_RELAY (tag_reset, reset);
|
wire [NUM_WAYS-1:0] evict_way_st0;
|
||||||
|
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0;
|
||||||
|
|
||||||
VX_cache_tags #(
|
VX_cache_tags #(
|
||||||
.INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)),
|
.INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)),
|
||||||
|
@ -294,42 +312,51 @@ module VX_cache_bank #(
|
||||||
.NUM_BANKS (NUM_BANKS),
|
.NUM_BANKS (NUM_BANKS),
|
||||||
.NUM_WAYS (NUM_WAYS),
|
.NUM_WAYS (NUM_WAYS),
|
||||||
.WORD_SIZE (WORD_SIZE),
|
.WORD_SIZE (WORD_SIZE),
|
||||||
|
.WRITEBACK (WRITEBACK),
|
||||||
.UUID_WIDTH (UUID_WIDTH)
|
.UUID_WIDTH (UUID_WIDTH)
|
||||||
) cache_tags (
|
) cache_tags (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (tag_reset),
|
.reset (reset),
|
||||||
|
|
||||||
.req_uuid (req_uuid_st0),
|
.req_uuid (req_uuid_st0),
|
||||||
|
|
||||||
.stall (pipe_stall),
|
.stall (pipe_stall),
|
||||||
|
|
||||||
// init/fill/lookup/flush
|
// init/flush/fill/write/lookup
|
||||||
.init (do_init_st0 || do_flush_st0),
|
.init (do_init_st0),
|
||||||
|
.flush (do_flush_st0),
|
||||||
.fill (do_fill_st0),
|
.fill (do_fill_st0),
|
||||||
|
.write (do_cache_wr_st0),
|
||||||
.lookup (do_lookup_st0),
|
.lookup (do_lookup_st0),
|
||||||
.line_addr (addr_st0),
|
.line_addr (addr_st0),
|
||||||
|
.way_sel (flush_way_st0),
|
||||||
.tag_matches(tag_matches_st0),
|
.tag_matches(tag_matches_st0),
|
||||||
|
|
||||||
// replacement
|
// replacement
|
||||||
.repl_way (repl_way_st0),
|
.evict_dirty(evict_dirty_st0),
|
||||||
.repl_tag (repl_tag_st0)
|
.evict_way (evict_way_st0),
|
||||||
|
.evict_tag (evict_tag_st0)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
wire [`CS_LINE_ADDR_WIDTH-1:0] addr2_st0;
|
||||||
|
|
||||||
|
wire is_flush2_st0 = WRITEBACK && is_flush_st0;
|
||||||
|
|
||||||
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
|
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
|
||||||
|
|
||||||
assign way_sel_st0 = is_fill_st0 ? repl_way_st0 : (is_flush_st0 ? flush_way_st0 : tag_matches_st0);
|
assign way_sel_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
|
||||||
|
|
||||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_r_st0 = (is_fill_st0 || is_flush_st0) ? {repl_tag_st0, addr_st0[`CS_LINE_SEL_BITS-1:0]} : addr_st0;
|
assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0;
|
||||||
|
|
||||||
VX_pipe_register #(
|
VX_pipe_register #(
|
||||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1),
|
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
|
||||||
.RESETW (1)
|
.RESETW (1)
|
||||||
) pipe_reg1 (
|
) pipe_reg1 (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.enable (~pipe_stall),
|
.enable (~pipe_stall),
|
||||||
.data_in ({valid_st0, is_flush_st0, is_replay_st0, is_fill_st0, is_creq_st0, creq_flush_st0, rw_st0, addr_r_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, mshr_pending_st0}),
|
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, creq_flush_st0, rw_st0, addr2_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, evict_dirty_st0, mshr_pending_st0}),
|
||||||
.data_out ({valid_st1, is_flush_st1, is_replay_st1, is_fill_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, mshr_pending_st1})
|
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, evict_dirty_st1, mshr_pending_st1})
|
||||||
);
|
);
|
||||||
|
|
||||||
// we have a tag hit
|
// we have a tag hit
|
||||||
|
@ -343,35 +370,40 @@ module VX_cache_bank #(
|
||||||
|
|
||||||
wire is_read_st1 = is_creq_st1 && ~rw_st1;
|
wire is_read_st1 = is_creq_st1 && ~rw_st1;
|
||||||
wire is_write_st1 = is_creq_st1 && rw_st1;
|
wire is_write_st1 = is_creq_st1 && rw_st1;
|
||||||
|
|
||||||
|
wire do_init_st1 = valid_st1 && is_init_st1;
|
||||||
|
wire do_fill_st1 = valid_st1 && is_fill_st1;
|
||||||
|
wire do_flush_st1 = valid_st1 && is_flush_st1;
|
||||||
|
|
||||||
wire do_creq_rd_st1 = valid_st1 && is_read_st1;
|
wire do_creq_rd_st1 = valid_st1 && is_read_st1;
|
||||||
wire do_creq_wr_st1 = valid_st1 && is_write_st1;
|
wire do_creq_wr_st1 = valid_st1 && is_write_st1;
|
||||||
wire do_fill_st1 = valid_st1 && is_fill_st1;
|
|
||||||
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
|
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
|
||||||
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
|
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
|
||||||
|
|
||||||
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
|
|
||||||
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
|
|
||||||
|
|
||||||
wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1;
|
wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1;
|
||||||
wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1;
|
wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1;
|
||||||
|
|
||||||
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
|
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
|
||||||
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
|
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
|
||||||
|
|
||||||
wire do_flush_st1 = valid_st1 && is_flush_st1;
|
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
|
||||||
|
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
|
||||||
|
|
||||||
|
assign line_sel_st1 = addr_st1[`CS_LINE_SEL_BITS-1:0];
|
||||||
|
|
||||||
`UNUSED_VAR (do_write_miss_st1)
|
`UNUSED_VAR (do_write_miss_st1)
|
||||||
|
|
||||||
// ensure mshr replay always get a hit
|
// ensure mshr replay always get a hit
|
||||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: invalid mshr replay"));
|
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("missed mshr replay"));
|
||||||
|
|
||||||
// detect BRAM's read-during-write hazard
|
// both tag and data stores use BRAM with no read-during-write protection.
|
||||||
assign rdw_hazard_st0 = do_fill_st0; // stall cycle after a fill
|
// we ned to stall the pipeline to prevent read-after-write hazards.
|
||||||
wire rdw_case1 = do_cache_rd_st0 && do_cache_wr_st1 && (addr_st0 == addr_st1); // standard cache access
|
assign rdw_hazard1_sel = do_fill_st0; // stall first replay following a fill
|
||||||
wire rdw_case2 = WRITEBACK && (do_flush_st0 || do_fill_st0) && do_cache_wr_st1; // a writeback can evict preceeding write
|
assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write
|
||||||
always @(posedge clk) begin // after a write to same address
|
always @(posedge clk) begin
|
||||||
rdw_hazard_st1 <= (rdw_case1 || rdw_case2)
|
// stall reads following writes to same line address
|
||||||
&& ~rdw_hazard_st1; // invalidate if pipeline stalled to avoid repeats
|
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_sel_st0 == line_sel_st1)
|
||||||
|
&& ~rdw_hazard3_st1; // release pipeline stall
|
||||||
end
|
end
|
||||||
|
|
||||||
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
|
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
|
||||||
|
@ -380,7 +412,6 @@ module VX_cache_bank #(
|
||||||
|
|
||||||
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
|
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
|
||||||
wire [LINE_SIZE-1:0] dirty_byteen_st1;
|
wire [LINE_SIZE-1:0] dirty_byteen_st1;
|
||||||
wire dirty_valid_st1;
|
|
||||||
|
|
||||||
if (`CS_WORDS_PER_LINE > 1) begin
|
if (`CS_WORDS_PER_LINE > 1) begin
|
||||||
reg [LINE_SIZE-1:0] write_byteen_r;
|
reg [LINE_SIZE-1:0] write_byteen_r;
|
||||||
|
@ -393,8 +424,6 @@ module VX_cache_bank #(
|
||||||
assign write_byteen_st1 = byteen_st1;
|
assign write_byteen_st1 = byteen_st1;
|
||||||
end
|
end
|
||||||
|
|
||||||
`RESET_RELAY (data_reset, reset);
|
|
||||||
|
|
||||||
VX_cache_data #(
|
VX_cache_data #(
|
||||||
.INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
|
.INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
|
||||||
.BANK_ID (BANK_ID),
|
.BANK_ID (BANK_ID),
|
||||||
|
@ -405,17 +434,19 @@ module VX_cache_bank #(
|
||||||
.WORD_SIZE (WORD_SIZE),
|
.WORD_SIZE (WORD_SIZE),
|
||||||
.WRITE_ENABLE (WRITE_ENABLE),
|
.WRITE_ENABLE (WRITE_ENABLE),
|
||||||
.WRITEBACK (WRITEBACK),
|
.WRITEBACK (WRITEBACK),
|
||||||
|
.DIRTY_BYTES (DIRTY_BYTES),
|
||||||
.UUID_WIDTH (UUID_WIDTH)
|
.UUID_WIDTH (UUID_WIDTH)
|
||||||
) cache_data (
|
) cache_data (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (data_reset),
|
.reset (reset),
|
||||||
|
|
||||||
.req_uuid (req_uuid_st1),
|
.req_uuid (req_uuid_st1),
|
||||||
|
|
||||||
.stall (pipe_stall),
|
.stall (pipe_stall),
|
||||||
|
|
||||||
|
.init (do_init_st1),
|
||||||
.read (do_cache_rd_st1),
|
.read (do_cache_rd_st1),
|
||||||
.fill (do_fill_st1 && ~rdw_hazard_st1),
|
.fill (do_fill_st1),
|
||||||
.flush (do_flush_st1),
|
.flush (do_flush_st1),
|
||||||
.write (do_cache_wr_st1),
|
.write (do_cache_wr_st1),
|
||||||
.way_sel (way_sel_st1),
|
.way_sel (way_sel_st1),
|
||||||
|
@ -425,7 +456,6 @@ module VX_cache_bank #(
|
||||||
.write_data (write_data_st1),
|
.write_data (write_data_st1),
|
||||||
.write_byteen(write_byteen_st1),
|
.write_byteen(write_byteen_st1),
|
||||||
.read_data (read_data_st1),
|
.read_data (read_data_st1),
|
||||||
.dirty_valid(dirty_valid_st1),
|
|
||||||
.dirty_data (dirty_data_st1),
|
.dirty_data (dirty_data_st1),
|
||||||
.dirty_byteen(dirty_byteen_st1)
|
.dirty_byteen(dirty_byteen_st1)
|
||||||
);
|
);
|
||||||
|
@ -461,8 +491,6 @@ module VX_cache_bank #(
|
||||||
`UNUSED_PIN (size)
|
`UNUSED_PIN (size)
|
||||||
);
|
);
|
||||||
|
|
||||||
`RESET_RELAY (mshr_reset, reset);
|
|
||||||
|
|
||||||
VX_cache_mshr #(
|
VX_cache_mshr #(
|
||||||
.INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)),
|
.INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)),
|
||||||
.BANK_ID (BANK_ID),
|
.BANK_ID (BANK_ID),
|
||||||
|
@ -473,7 +501,7 @@ module VX_cache_bank #(
|
||||||
.DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH)
|
.DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH)
|
||||||
) cache_mshr (
|
) cache_mshr (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (mshr_reset),
|
.reset (reset),
|
||||||
|
|
||||||
.deq_req_uuid (req_uuid_sel),
|
.deq_req_uuid (req_uuid_sel),
|
||||||
.lkp_req_uuid (req_uuid_st0),
|
.lkp_req_uuid (req_uuid_st0),
|
||||||
|
@ -536,16 +564,14 @@ module VX_cache_bank #(
|
||||||
assign crsp_queue_data = read_data_st1;
|
assign crsp_queue_data = read_data_st1;
|
||||||
assign crsp_queue_tag = tag_st1;
|
assign crsp_queue_tag = tag_st1;
|
||||||
|
|
||||||
`RESET_RELAY (crsp_queue_reset, reset);
|
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (TAG_WIDTH + `CS_WORD_WIDTH + REQ_SEL_WIDTH),
|
.DATAW (TAG_WIDTH + `CS_WORD_WIDTH + REQ_SEL_WIDTH),
|
||||||
.SIZE (CRSQ_SIZE),
|
.SIZE (CRSQ_SIZE),
|
||||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||||
) core_rsp_queue (
|
) core_rsp_queue (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (crsp_queue_reset),
|
.reset (reset),
|
||||||
.valid_in (crsp_queue_valid && ~rdw_hazard_st1),
|
.valid_in (crsp_queue_valid && ~rdw_hazard3_st1),
|
||||||
.ready_in (crsp_queue_ready),
|
.ready_in (crsp_queue_ready),
|
||||||
.data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}),
|
.data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}),
|
||||||
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
|
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
|
||||||
|
@ -557,7 +583,7 @@ module VX_cache_bank #(
|
||||||
|
|
||||||
// schedule memory request
|
// schedule memory request
|
||||||
|
|
||||||
wire mreq_queue_push, mreq_queue_pop, mreq_queue_empty;
|
wire mreq_queue_push, mreq_queue_pop;
|
||||||
wire [`CS_LINE_WIDTH-1:0] mreq_queue_data;
|
wire [`CS_LINE_WIDTH-1:0] mreq_queue_data;
|
||||||
wire [LINE_SIZE-1:0] mreq_queue_byteen;
|
wire [LINE_SIZE-1:0] mreq_queue_byteen;
|
||||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
|
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
|
||||||
|
@ -565,30 +591,42 @@ module VX_cache_bank #(
|
||||||
wire mreq_queue_rw;
|
wire mreq_queue_rw;
|
||||||
wire mreq_queue_flush;
|
wire mreq_queue_flush;
|
||||||
|
|
||||||
wire is_evict_st1 = (is_fill_st1 || is_flush_st1) && dirty_valid_st1;
|
wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1;
|
||||||
wire do_writeback_st1 = valid_st1 && is_evict_st1;
|
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
|
||||||
`UNUSED_VAR (do_writeback_st1)
|
wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1;
|
||||||
|
|
||||||
if (WRITEBACK) begin
|
if (WRITEBACK) begin
|
||||||
|
if (DIRTY_BYTES) begin
|
||||||
|
// ensure dirty bytes match the tag info
|
||||||
|
wire has_dirty_bytes = (| dirty_byteen_st1);
|
||||||
|
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)));
|
||||||
|
end
|
||||||
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|
||||||
|| do_writeback_st1)
|
|| do_writeback_st1)
|
||||||
&& ~rdw_hazard_st1;
|
&& ~rdw_hazard3_st1;
|
||||||
end else begin
|
end else begin
|
||||||
`UNUSED_VAR (dirty_valid_st1)
|
`UNUSED_VAR (do_writeback_st1)
|
||||||
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|
||||||
|| do_creq_wr_st1)
|
|| do_creq_wr_st1)
|
||||||
&& ~rdw_hazard_st1;
|
&& ~rdw_hazard3_st1;
|
||||||
end
|
end
|
||||||
|
|
||||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||||
assign mreq_queue_rw = WRITE_ENABLE && (WRITEBACK ? is_evict_st1 : rw_st1);
|
|
||||||
assign mreq_queue_addr = addr_st1;
|
assign mreq_queue_addr = addr_st1;
|
||||||
assign mreq_queue_id = mshr_id_st1;
|
assign mreq_queue_id = mshr_id_st1;
|
||||||
assign mreq_queue_data = is_write_st1 ? write_data_st1 : dirty_data_st1;
|
|
||||||
assign mreq_queue_byteen = is_write_st1 ? write_byteen_st1 : dirty_byteen_st1;
|
|
||||||
assign mreq_queue_flush = creq_flush_st1;
|
assign mreq_queue_flush = creq_flush_st1;
|
||||||
|
|
||||||
`RESET_RELAY (mreq_queue_reset, reset);
|
if (WRITE_ENABLE) begin
|
||||||
|
assign mreq_queue_rw = WRITEBACK ? is_fill_or_flush_st1 : rw_st1;
|
||||||
|
assign mreq_queue_data = WRITEBACK ? dirty_data_st1 : write_data_st1;
|
||||||
|
assign mreq_queue_byteen = WRITEBACK ? dirty_byteen_st1 : write_byteen_st1;
|
||||||
|
end else begin
|
||||||
|
assign mreq_queue_rw = 0;
|
||||||
|
assign mreq_queue_data = 0;
|
||||||
|
assign mreq_queue_byteen = 0;
|
||||||
|
`UNUSED_VAR (dirty_data_st1)
|
||||||
|
`UNUSED_VAR (dirty_byteen_st1)
|
||||||
|
end
|
||||||
|
|
||||||
VX_fifo_queue #(
|
VX_fifo_queue #(
|
||||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1),
|
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1),
|
||||||
|
@ -597,7 +635,7 @@ module VX_cache_bank #(
|
||||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||||
) mem_req_queue (
|
) mem_req_queue (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (mreq_queue_reset),
|
.reset (reset),
|
||||||
.push (mreq_queue_push),
|
.push (mreq_queue_push),
|
||||||
.pop (mreq_queue_pop),
|
.pop (mreq_queue_pop),
|
||||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}),
|
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}),
|
||||||
|
@ -621,32 +659,32 @@ module VX_cache_bank #(
|
||||||
|
|
||||||
`ifdef DBG_TRACE_CACHE
|
`ifdef DBG_TRACE_CACHE
|
||||||
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
|
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
|
||||||
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid || line_flush_valid)
|
wire input_stall = (replay_valid || mem_rsp_valid || core_req_valid || flush_valid)
|
||||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || line_flush_valid);
|
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (pipeline_stall) begin
|
if (input_stall || pipe_stall) begin
|
||||||
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw_st0=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard_st0));
|
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1));
|
||||||
end
|
end
|
||||||
if (mem_rsp_fire) begin
|
if (mem_rsp_fire) begin
|
||||||
`TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
|
`TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
|
||||||
end
|
end
|
||||||
if (replay_fire) begin
|
if (replay_fire) begin
|
||||||
`TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
|
`TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
|
||||||
end
|
end
|
||||||
if (core_req_fire) begin
|
if (core_req_fire) begin
|
||||||
if (core_req_rw)
|
if (core_req_rw)
|
||||||
`TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
|
`TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
|
||||||
else
|
else
|
||||||
`TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
|
`TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
|
||||||
end
|
end
|
||||||
if (crsp_queue_fire) begin
|
if (crsp_queue_fire) begin
|
||||||
`TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
|
`TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
|
||||||
end
|
end
|
||||||
if (mreq_queue_push) begin
|
if (mreq_queue_push) begin
|
||||||
if (do_creq_wr_st1 && !WRITEBACK)
|
if (do_creq_wr_st1 && !WRITEBACK)
|
||||||
`TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
|
`TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
|
||||||
else if (do_writeback_st1)
|
else if (do_writeback_st1)
|
||||||
`TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%b, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data));
|
`TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data));
|
||||||
else
|
else
|
||||||
`TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1));
|
`TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1));
|
||||||
end
|
end
|
||||||
|
|
10
hw/rtl/cache/VX_cache_cluster.sv
vendored
10
hw/rtl/cache/VX_cache_cluster.sv
vendored
|
@ -49,6 +49,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
||||||
// Enable cache writeback
|
// Enable cache writeback
|
||||||
parameter WRITEBACK = 0,
|
parameter WRITEBACK = 0,
|
||||||
|
|
||||||
|
// Enable dirty bytes on writeback
|
||||||
|
parameter DIRTY_BYTES = 0,
|
||||||
|
|
||||||
// Request debug identifier
|
// Request debug identifier
|
||||||
parameter UUID_WIDTH = 0,
|
parameter UUID_WIDTH = 0,
|
||||||
|
|
||||||
|
@ -99,6 +102,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
||||||
.TAG_WIDTH (ARB_TAG_WIDTH)
|
.TAG_WIDTH (ARB_TAG_WIDTH)
|
||||||
) arb_core_bus_if[NUM_CACHES * NUM_REQS]();
|
) arb_core_bus_if[NUM_CACHES * NUM_REQS]();
|
||||||
|
|
||||||
|
`RESET_RELAY_EX (cache_arb_reset, reset, NUM_REQS, `MAX_FANOUT);
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||||
VX_mem_bus_if #(
|
VX_mem_bus_if #(
|
||||||
.DATA_SIZE (WORD_SIZE),
|
.DATA_SIZE (WORD_SIZE),
|
||||||
|
@ -114,8 +119,6 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
||||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
|
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
|
||||||
end
|
end
|
||||||
|
|
||||||
`RESET_RELAY (arb_reset, reset);
|
|
||||||
|
|
||||||
VX_mem_arb #(
|
VX_mem_arb #(
|
||||||
.NUM_INPUTS (NUM_INPUTS),
|
.NUM_INPUTS (NUM_INPUTS),
|
||||||
.NUM_OUTPUTS (NUM_CACHES),
|
.NUM_OUTPUTS (NUM_CACHES),
|
||||||
|
@ -127,7 +130,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
||||||
.RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0)
|
.RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0)
|
||||||
) cache_arb (
|
) cache_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (arb_reset),
|
.reset (cache_arb_reset[i]),
|
||||||
.bus_in_if (core_bus_tmp_if),
|
.bus_in_if (core_bus_tmp_if),
|
||||||
.bus_out_if (arb_core_bus_tmp_if)
|
.bus_out_if (arb_core_bus_tmp_if)
|
||||||
);
|
);
|
||||||
|
@ -155,6 +158,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
||||||
.MREQ_SIZE (MREQ_SIZE),
|
.MREQ_SIZE (MREQ_SIZE),
|
||||||
.WRITE_ENABLE (WRITE_ENABLE),
|
.WRITE_ENABLE (WRITE_ENABLE),
|
||||||
.WRITEBACK (WRITEBACK),
|
.WRITEBACK (WRITEBACK),
|
||||||
|
.DIRTY_BYTES (DIRTY_BYTES),
|
||||||
.UUID_WIDTH (UUID_WIDTH),
|
.UUID_WIDTH (UUID_WIDTH),
|
||||||
.TAG_WIDTH (ARB_TAG_WIDTH),
|
.TAG_WIDTH (ARB_TAG_WIDTH),
|
||||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||||
|
|
123
hw/rtl/cache/VX_cache_data.sv
vendored
123
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -30,6 +30,8 @@ module VX_cache_data #(
|
||||||
parameter WRITE_ENABLE = 1,
|
parameter WRITE_ENABLE = 1,
|
||||||
// Enable cache writeback
|
// Enable cache writeback
|
||||||
parameter WRITEBACK = 0,
|
parameter WRITEBACK = 0,
|
||||||
|
// Enable dirty bytes on writeback
|
||||||
|
parameter DIRTY_BYTES = 0,
|
||||||
// Request debug identifier
|
// Request debug identifier
|
||||||
parameter UUID_WIDTH = 0
|
parameter UUID_WIDTH = 0
|
||||||
) (
|
) (
|
||||||
|
@ -42,6 +44,7 @@ module VX_cache_data #(
|
||||||
|
|
||||||
input wire stall,
|
input wire stall,
|
||||||
|
|
||||||
|
input wire init,
|
||||||
input wire read,
|
input wire read,
|
||||||
input wire fill,
|
input wire fill,
|
||||||
input wire flush,
|
input wire flush,
|
||||||
|
@ -53,89 +56,88 @@ module VX_cache_data #(
|
||||||
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
|
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
|
||||||
input wire [NUM_WAYS-1:0] way_sel,
|
input wire [NUM_WAYS-1:0] way_sel,
|
||||||
output wire [`CS_WORD_WIDTH-1:0] read_data,
|
output wire [`CS_WORD_WIDTH-1:0] read_data,
|
||||||
output wire dirty_valid,
|
|
||||||
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
|
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
|
||||||
output wire [LINE_SIZE-1:0] dirty_byteen
|
output wire [LINE_SIZE-1:0] dirty_byteen
|
||||||
);
|
);
|
||||||
`UNUSED_SPARAM (INSTANCE_ID)
|
`UNUSED_SPARAM (INSTANCE_ID)
|
||||||
`UNUSED_PARAM (BANK_ID)
|
`UNUSED_PARAM (BANK_ID)
|
||||||
`UNUSED_PARAM (WORD_SIZE)
|
`UNUSED_PARAM (WORD_SIZE)
|
||||||
`UNUSED_VAR (reset)
|
|
||||||
`UNUSED_VAR (stall)
|
`UNUSED_VAR (stall)
|
||||||
`UNUSED_VAR (line_addr)
|
`UNUSED_VAR (line_addr)
|
||||||
|
`UNUSED_VAR (init)
|
||||||
`UNUSED_VAR (read)
|
`UNUSED_VAR (read)
|
||||||
`UNUSED_VAR (flush)
|
`UNUSED_VAR (flush)
|
||||||
|
|
||||||
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
|
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
|
||||||
|
|
||||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||||
|
|
||||||
|
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
|
||||||
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
|
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
|
||||||
|
|
||||||
if (WRITEBACK) begin
|
if (WRITEBACK) begin
|
||||||
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0][LINE_SIZE-1:0] dirty_bytes_r;
|
if (DIRTY_BYTES) begin
|
||||||
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0] dirty_blocks_r;
|
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata;
|
||||||
|
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata;
|
||||||
|
|
||||||
wire [`CLOG2(`CS_LINES_PER_BANK * NUM_WAYS)-1:0] way_addr;
|
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||||
if (NUM_WAYS > 1) begin
|
wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]);
|
||||||
assign way_addr = {line_sel, way_idx};
|
assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]);
|
||||||
|
end
|
||||||
|
|
||||||
|
VX_sp_ram #(
|
||||||
|
.DATAW (LINE_SIZE * NUM_WAYS),
|
||||||
|
.SIZE (`CS_LINES_PER_BANK)
|
||||||
|
) byteen_store (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.read (write || fill || flush),
|
||||||
|
.write (init || write || fill || flush),
|
||||||
|
.wren (1'b1),
|
||||||
|
.addr (line_sel),
|
||||||
|
.wdata (bs_wdata),
|
||||||
|
.rdata (bs_rdata)
|
||||||
|
);
|
||||||
|
|
||||||
|
assign dirty_byteen = bs_rdata[way_idx];
|
||||||
end else begin
|
end else begin
|
||||||
assign way_addr = line_sel;
|
assign dirty_byteen = {LINE_SIZE{1'b1}};
|
||||||
end
|
end
|
||||||
|
|
||||||
always @(posedge clk) begin
|
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata;
|
||||||
if (fill) begin
|
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||||
dirty_bytes_r[way_addr] <= '0;
|
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||||
end else if (write) begin
|
assign flipped_rdata[j][i] = line_rdata[i][j];
|
||||||
dirty_bytes_r[way_addr] <= dirty_bytes_r[way_addr] | write_byteen;
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
assign dirty_data = flipped_rdata[way_idx];
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
for (integer i = 0; i < `CS_LINES_PER_BANK * NUM_WAYS; ++i) begin
|
|
||||||
dirty_blocks_r[i] <= 0;
|
|
||||||
end
|
|
||||||
end else begin
|
|
||||||
if (fill) begin
|
|
||||||
dirty_blocks_r[way_addr] <= 0;
|
|
||||||
end else if (write) begin
|
|
||||||
dirty_blocks_r[way_addr] <= 1;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
assign dirty_byteen = dirty_bytes_r[way_addr];
|
|
||||||
assign dirty_valid = dirty_blocks_r[way_addr];
|
|
||||||
end else begin
|
end else begin
|
||||||
assign dirty_byteen = '0;
|
assign dirty_byteen = '0;
|
||||||
assign dirty_valid = 0;
|
assign dirty_data = '0;
|
||||||
end
|
end
|
||||||
|
|
||||||
// order the data layout to perform ways multiplexing last.
|
// order the data layout to perform ways multiplexing last.
|
||||||
// this allows converting way index to binary in parallel with BRAM read.
|
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
|
||||||
|
|
||||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
|
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||||
wire [BYTEENW-1:0] wren;
|
wire [BYTEENW-1:0] line_wren;
|
||||||
|
|
||||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
|
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
|
||||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
|
||||||
assign wdata[i] = (fill || !WRITE_ENABLE) ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{write_data[i]}};
|
|
||||||
end
|
|
||||||
|
|
||||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||||
|
assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i];
|
||||||
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
|
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
|
||||||
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
|
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
assign wren = wren_w;
|
assign line_wren = wren_w;
|
||||||
end else begin
|
end else begin
|
||||||
`UNUSED_VAR (write)
|
`UNUSED_VAR (write)
|
||||||
`UNUSED_VAR (write_byteen)
|
`UNUSED_VAR (write_byteen)
|
||||||
`UNUSED_VAR (write_data)
|
`UNUSED_VAR (write_data)
|
||||||
assign wdata = fill_data;
|
assign line_wdata = fill_data;
|
||||||
assign wren = fill;
|
assign line_wren = fill;
|
||||||
end
|
end
|
||||||
|
|
||||||
VX_onehot_encoder #(
|
VX_onehot_encoder #(
|
||||||
|
@ -146,53 +148,50 @@ module VX_cache_data #(
|
||||||
`UNUSED_PIN (valid_out)
|
`UNUSED_PIN (valid_out)
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata;
|
wire line_read = (read && ~stall)
|
||||||
|
|| (WRITEBACK && (fill || flush));
|
||||||
|
|
||||||
|
wire line_write = write || fill;
|
||||||
|
|
||||||
VX_sp_ram #(
|
VX_sp_ram #(
|
||||||
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
|
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
|
||||||
.SIZE (`CS_LINES_PER_BANK),
|
.SIZE (`CS_LINES_PER_BANK),
|
||||||
.WRENW (BYTEENW),
|
.WRENW (BYTEENW),
|
||||||
.NO_RWCHECK (1)
|
.NO_RWCHECK (1),
|
||||||
|
.RW_ASSERT (1)
|
||||||
) data_store (
|
) data_store (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.read (1'b1),
|
.reset (reset),
|
||||||
.write (write || fill),
|
.read (line_read),
|
||||||
.wren (wren),
|
.write (line_write),
|
||||||
|
.wren (line_wren),
|
||||||
.addr (line_sel),
|
.addr (line_sel),
|
||||||
.wdata (wdata),
|
.wdata (line_wdata),
|
||||||
.rdata (rdata)
|
.rdata (line_rdata)
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
||||||
if (`CS_WORDS_PER_LINE > 1) begin
|
if (`CS_WORDS_PER_LINE > 1) begin
|
||||||
assign per_way_rdata = rdata[wsel];
|
assign per_way_rdata = line_rdata[wsel];
|
||||||
end else begin
|
end else begin
|
||||||
`UNUSED_VAR (wsel)
|
`UNUSED_VAR (wsel)
|
||||||
assign per_way_rdata = rdata;
|
assign per_way_rdata = line_rdata;
|
||||||
end
|
end
|
||||||
assign read_data = per_way_rdata[way_idx];
|
assign read_data = per_way_rdata[way_idx];
|
||||||
|
|
||||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] dirty_data_w;
|
|
||||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
|
||||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
|
||||||
assign dirty_data_w[j][i] = rdata[i][j];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
assign dirty_data = dirty_data_w[way_idx];
|
|
||||||
|
|
||||||
`ifdef DBG_TRACE_CACHE
|
`ifdef DBG_TRACE_CACHE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (fill && ~stall) begin
|
if (fill && ~stall) begin
|
||||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
|
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
|
||||||
end
|
end
|
||||||
if (flush && ~stall) begin
|
if (flush && ~stall) begin
|
||||||
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b, byteen=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_valid, dirty_byteen));
|
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data));
|
||||||
end
|
end
|
||||||
if (read && ~stall) begin
|
if (read && ~stall) begin
|
||||||
`TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid));
|
`TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid));
|
||||||
end
|
end
|
||||||
if (write && ~stall) begin
|
if (write && ~stall) begin
|
||||||
`TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid));
|
`TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid));
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
33
hw/rtl/cache/VX_cache_flush.sv
vendored
33
hw/rtl/cache/VX_cache_flush.sv
vendored
|
@ -26,13 +26,16 @@ module VX_cache_flush #(
|
||||||
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
|
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
|
||||||
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
|
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
|
||||||
input wire [NUM_BANKS-1:0] bank_req_fire,
|
input wire [NUM_BANKS-1:0] bank_req_fire,
|
||||||
output wire [NUM_BANKS-1:0] flush_valid,
|
output wire [NUM_BANKS-1:0] flush_begin,
|
||||||
input wire [NUM_BANKS-1:0] flush_ready
|
input wire [NUM_BANKS-1:0] flush_end
|
||||||
);
|
);
|
||||||
localparam STATE_IDLE = 0;
|
localparam STATE_IDLE = 0;
|
||||||
localparam STATE_WAIT = 1;
|
localparam STATE_WAIT1 = 1;
|
||||||
localparam STATE_FLUSH = 2;
|
localparam STATE_FLUSH = 2;
|
||||||
localparam STATE_DONE = 3;
|
localparam STATE_WAIT2 = 3;
|
||||||
|
localparam STATE_DONE = 4;
|
||||||
|
|
||||||
|
reg [2:0] state, state_n;
|
||||||
|
|
||||||
// track in-flight core requests
|
// track in-flight core requests
|
||||||
|
|
||||||
|
@ -76,8 +79,6 @@ module VX_cache_flush #(
|
||||||
`UNUSED_VAR (bank_req_fire)
|
`UNUSED_VAR (bank_req_fire)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
reg [1:0] state, state_n;
|
|
||||||
reg [NUM_BANKS-1:0] flush_done, flush_done_n;
|
reg [NUM_BANKS-1:0] flush_done, flush_done_n;
|
||||||
|
|
||||||
wire [NUM_REQS-1:0] flush_req_mask;
|
wire [NUM_REQS-1:0] flush_req_mask;
|
||||||
|
@ -113,22 +114,32 @@ module VX_cache_flush #(
|
||||||
case (state)
|
case (state)
|
||||||
STATE_IDLE: begin
|
STATE_IDLE: begin
|
||||||
if (flush_req_enable) begin
|
if (flush_req_enable) begin
|
||||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT : STATE_FLUSH;
|
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
STATE_WAIT: begin
|
STATE_WAIT1: begin
|
||||||
if (no_inflight_reqs) begin
|
if (no_inflight_reqs) begin
|
||||||
state_n = STATE_FLUSH;
|
state_n = STATE_FLUSH;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
STATE_FLUSH: begin
|
STATE_FLUSH: begin
|
||||||
flush_done_n = flush_done | flush_ready;
|
// generate a flush request pulse
|
||||||
if (flush_done_n == 0) begin
|
state_n = STATE_WAIT2;
|
||||||
|
end
|
||||||
|
STATE_WAIT2: begin
|
||||||
|
// wait for all banks to finish flushing
|
||||||
|
flush_done_n = flush_done | flush_end;
|
||||||
|
if (flush_done_n == {NUM_BANKS{1'b1}}) begin
|
||||||
state_n = STATE_DONE;
|
state_n = STATE_DONE;
|
||||||
|
flush_done_n = '0;
|
||||||
|
// only release current flush requests
|
||||||
|
// and keep normal requests locked
|
||||||
lock_released_n = flush_req_mask;
|
lock_released_n = flush_req_mask;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
STATE_DONE: begin
|
STATE_DONE: begin
|
||||||
|
// wait until released flush requests are issued
|
||||||
|
// when returning to IDLE state other requests will unlock
|
||||||
lock_released_n = lock_released & ~core_bus_out_ready;
|
lock_released_n = lock_released & ~core_bus_out_ready;
|
||||||
if (lock_released_n == 0) begin
|
if (lock_released_n == 0) begin
|
||||||
state_n = STATE_IDLE;
|
state_n = STATE_IDLE;
|
||||||
|
@ -149,6 +160,6 @@ module VX_cache_flush #(
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign flush_valid = {NUM_BANKS{state == STATE_FLUSH}};
|
assign flush_begin = {NUM_BANKS{state == STATE_FLUSH}};
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|
52
hw/rtl/cache/VX_cache_init.sv
vendored
52
hw/rtl/cache/VX_cache_init.sv
vendored
|
@ -1,52 +0,0 @@
|
||||||
// Copyright © 2019-2023
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
`include "VX_cache_define.vh"
|
|
||||||
|
|
||||||
// cache flush unit
|
|
||||||
module VX_cache_init #(
|
|
||||||
// Size of cache in bytes
|
|
||||||
parameter CACHE_SIZE = 1024,
|
|
||||||
// Size of line inside a bank in bytes
|
|
||||||
parameter LINE_SIZE = 16,
|
|
||||||
// Number of banks
|
|
||||||
parameter NUM_BANKS = 1,
|
|
||||||
// Number of associative ways
|
|
||||||
parameter NUM_WAYS = 1
|
|
||||||
) (
|
|
||||||
input wire clk,
|
|
||||||
input wire reset,
|
|
||||||
output wire [`CS_LINE_SEL_BITS-1:0] addr_out,
|
|
||||||
output wire valid_out
|
|
||||||
);
|
|
||||||
reg enabled;
|
|
||||||
reg [`CS_LINE_SEL_BITS-1:0] line_ctr;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
enabled <= 1;
|
|
||||||
line_ctr <= '0;
|
|
||||||
end else begin
|
|
||||||
if (enabled) begin
|
|
||||||
if (line_ctr == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
|
|
||||||
enabled <= 0;
|
|
||||||
end
|
|
||||||
line_ctr <= line_ctr + `CS_LINE_SEL_BITS'(1);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
assign addr_out = line_ctr;
|
|
||||||
assign valid_out = enabled;
|
|
||||||
|
|
||||||
endmodule
|
|
3
hw/rtl/cache/VX_cache_mshr.sv
vendored
3
hw/rtl/cache/VX_cache_mshr.sv
vendored
|
@ -232,9 +232,10 @@ module VX_cache_mshr #(
|
||||||
.LUTRAM (1)
|
.LUTRAM (1)
|
||||||
) entries (
|
) entries (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
.read (1'b1),
|
.read (1'b1),
|
||||||
.write (allocate_valid),
|
.write (allocate_valid),
|
||||||
`UNUSED_PIN (wren),
|
.wren (1'b1),
|
||||||
.waddr (allocate_id_r),
|
.waddr (allocate_id_r),
|
||||||
.wdata (allocate_data),
|
.wdata (allocate_data),
|
||||||
.raddr (dequeue_id_r),
|
.raddr (dequeue_id_r),
|
||||||
|
|
96
hw/rtl/cache/VX_cache_tags.sv
vendored
96
hw/rtl/cache/VX_cache_tags.sv
vendored
|
@ -26,6 +26,8 @@ module VX_cache_tags #(
|
||||||
parameter NUM_WAYS = 1,
|
parameter NUM_WAYS = 1,
|
||||||
// Size of a word in bytes
|
// Size of a word in bytes
|
||||||
parameter WORD_SIZE = 1,
|
parameter WORD_SIZE = 1,
|
||||||
|
// Enable cache writeback
|
||||||
|
parameter WRITEBACK = 0,
|
||||||
// Request debug identifier
|
// Request debug identifier
|
||||||
parameter UUID_WIDTH = 0
|
parameter UUID_WIDTH = 0
|
||||||
) (
|
) (
|
||||||
|
@ -40,74 +42,100 @@ module VX_cache_tags #(
|
||||||
|
|
||||||
// init/fill/lookup
|
// init/fill/lookup
|
||||||
input wire init,
|
input wire init,
|
||||||
|
input wire flush,
|
||||||
input wire fill,
|
input wire fill,
|
||||||
|
input wire write,
|
||||||
input wire lookup,
|
input wire lookup,
|
||||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||||
|
input wire [NUM_WAYS-1:0] way_sel,
|
||||||
output wire [NUM_WAYS-1:0] tag_matches,
|
output wire [NUM_WAYS-1:0] tag_matches,
|
||||||
|
|
||||||
// replacement
|
// eviction
|
||||||
output wire [NUM_WAYS-1:0] repl_way,
|
output wire evict_dirty,
|
||||||
output wire [`CS_TAG_SEL_BITS-1:0] repl_tag
|
output wire [NUM_WAYS-1:0] evict_way,
|
||||||
|
output wire [`CS_TAG_SEL_BITS-1:0] evict_tag
|
||||||
);
|
);
|
||||||
`UNUSED_SPARAM (INSTANCE_ID)
|
`UNUSED_SPARAM (INSTANCE_ID)
|
||||||
`UNUSED_PARAM (BANK_ID)
|
`UNUSED_PARAM (BANK_ID)
|
||||||
`UNUSED_VAR (reset)
|
|
||||||
`UNUSED_VAR (lookup)
|
`UNUSED_VAR (lookup)
|
||||||
|
|
||||||
// valid, tag
|
// valid, dirty, tag
|
||||||
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS;
|
localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS;
|
||||||
|
|
||||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
|
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
|
||||||
|
|
||||||
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
|
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||||
wire [NUM_WAYS-1:0] read_valid;
|
wire [NUM_WAYS-1:0] read_valid;
|
||||||
|
wire [NUM_WAYS-1:0] read_dirty;
|
||||||
|
|
||||||
if (NUM_WAYS > 1) begin
|
if (NUM_WAYS > 1) begin
|
||||||
reg [NUM_WAYS-1:0] repl_way_r;
|
reg [NUM_WAYS-1:0] evict_way_r;
|
||||||
// cyclic assignment of replacement way
|
// cyclic assignment of replacement way
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
repl_way_r <= 1;
|
evict_way_r <= 1;
|
||||||
end else if (~stall) begin // hold the value on stalls prevent filling different slots twice
|
end else if (~stall) begin // holding the value on stalls prevents filling different slots twice
|
||||||
repl_way_r <= {repl_way_r[NUM_WAYS-2:0], repl_way_r[NUM_WAYS-1]};
|
evict_way_r <= {evict_way_r[NUM_WAYS-2:0], evict_way_r[NUM_WAYS-1]};
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign repl_way = repl_way_r;
|
assign evict_way = fill ? evict_way_r : way_sel;
|
||||||
|
|
||||||
VX_onehot_mux #(
|
VX_onehot_mux #(
|
||||||
.DATAW (`CS_TAG_SEL_BITS),
|
.DATAW (`CS_TAG_SEL_BITS),
|
||||||
.N (NUM_WAYS)
|
.N (NUM_WAYS)
|
||||||
) repl_tag_sel (
|
) evict_tag_sel (
|
||||||
.data_in (read_tag),
|
.data_in (read_tag),
|
||||||
.sel_in (repl_way_r),
|
.sel_in (evict_way),
|
||||||
.data_out (repl_tag)
|
.data_out (evict_tag)
|
||||||
);
|
);
|
||||||
end else begin
|
end else begin
|
||||||
`UNUSED_VAR (stall)
|
`UNUSED_VAR (stall)
|
||||||
assign repl_way = 1'b1;
|
assign evict_way = 1'b1;
|
||||||
assign repl_tag = read_tag;
|
assign evict_tag = read_tag;
|
||||||
end
|
end
|
||||||
|
|
||||||
|
// fill and flush need to also read in writeback mode
|
||||||
|
wire fill_s = fill && (!WRITEBACK || ~stall);
|
||||||
|
wire flush_s = flush && (!WRITEBACK || ~stall);
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||||
|
|
||||||
wire do_fill = fill && repl_way[i];
|
wire do_fill = fill_s && evict_way[i];
|
||||||
wire do_write = init || do_fill;
|
wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode
|
||||||
wire line_valid = ~init;
|
wire do_write = WRITEBACK && write && tag_matches[i];
|
||||||
|
|
||||||
|
wire line_read = (WRITEBACK && (fill_s || flush_s));
|
||||||
|
wire line_write = init || do_fill || do_flush || do_write;
|
||||||
|
wire line_valid = ~(init || flush);
|
||||||
|
|
||||||
|
wire [TAG_WIDTH-1:0] line_wdata;
|
||||||
|
wire [TAG_WIDTH-1:0] line_rdata;
|
||||||
|
|
||||||
|
if (WRITEBACK) begin
|
||||||
|
assign line_wdata = {line_valid, write, line_tag};
|
||||||
|
assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata;
|
||||||
|
end else begin
|
||||||
|
assign line_wdata = {line_valid, line_tag};
|
||||||
|
assign {read_valid[i], read_tag[i]} = line_rdata;
|
||||||
|
assign read_dirty[i] = 1'b0;
|
||||||
|
end
|
||||||
|
|
||||||
VX_sp_ram #(
|
VX_sp_ram #(
|
||||||
.DATAW (TAG_WIDTH),
|
.DATAW (TAG_WIDTH),
|
||||||
.SIZE (`CS_LINES_PER_BANK),
|
.SIZE (`CS_LINES_PER_BANK),
|
||||||
.NO_RWCHECK (1)
|
.NO_RWCHECK (1),
|
||||||
|
.RW_ASSERT (1)
|
||||||
) tag_store (
|
) tag_store (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.read (1'b1),
|
.reset (reset),
|
||||||
.write (do_write),
|
.read (line_read),
|
||||||
`UNUSED_PIN (wren),
|
.write (line_write),
|
||||||
|
.wren (1'b1),
|
||||||
.addr (line_sel),
|
.addr (line_sel),
|
||||||
.wdata ({line_valid, line_tag}),
|
.wdata (line_wdata),
|
||||||
.rdata ({read_valid[i], read_tag[i]})
|
.rdata (line_rdata)
|
||||||
);
|
);
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -115,19 +143,31 @@ module VX_cache_tags #(
|
||||||
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
|
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
|
||||||
end
|
end
|
||||||
|
|
||||||
|
assign evict_dirty = | (read_dirty & evict_way);
|
||||||
|
|
||||||
`ifdef DBG_TRACE_CACHE
|
`ifdef DBG_TRACE_CACHE
|
||||||
|
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel};
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (fill && ~stall) begin
|
if (fill && ~stall) begin
|
||||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), repl_way, line_sel, line_tag));
|
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)));
|
||||||
end
|
end
|
||||||
if (init) begin
|
if (init) begin
|
||||||
`TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
|
`TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
|
||||||
end
|
end
|
||||||
|
if (flush && ~stall) begin
|
||||||
|
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty));
|
||||||
|
end
|
||||||
if (lookup && ~stall) begin
|
if (lookup && ~stall) begin
|
||||||
if (tag_matches != 0) begin
|
if (tag_matches != 0) begin
|
||||||
`TRACE(3, ("%d: %s hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
if (write)
|
||||||
|
`TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||||
|
else
|
||||||
|
`TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||||
end else begin
|
end else begin
|
||||||
`TRACE(3, ("%d: %s miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
if (write)
|
||||||
|
`TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||||
|
else
|
||||||
|
`TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
8
hw/rtl/cache/VX_cache_top.sv
vendored
8
hw/rtl/cache/VX_cache_top.sv
vendored
|
@ -42,6 +42,12 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
||||||
// Enable cache writeable
|
// Enable cache writeable
|
||||||
parameter WRITE_ENABLE = 1,
|
parameter WRITE_ENABLE = 1,
|
||||||
|
|
||||||
|
// Enable cache writeback
|
||||||
|
parameter WRITEBACK = 0,
|
||||||
|
|
||||||
|
// Enable dirty bytes on writeback
|
||||||
|
parameter DIRTY_BYTES = 0,
|
||||||
|
|
||||||
// Request debug identifier
|
// Request debug identifier
|
||||||
parameter UUID_WIDTH = 0,
|
parameter UUID_WIDTH = 0,
|
||||||
|
|
||||||
|
@ -156,6 +162,8 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
||||||
.TAG_WIDTH (TAG_WIDTH),
|
.TAG_WIDTH (TAG_WIDTH),
|
||||||
.UUID_WIDTH (UUID_WIDTH),
|
.UUID_WIDTH (UUID_WIDTH),
|
||||||
.WRITE_ENABLE (WRITE_ENABLE),
|
.WRITE_ENABLE (WRITE_ENABLE),
|
||||||
|
.WRITEBACK (WRITEBACK),
|
||||||
|
.DIRTY_BYTES (DIRTY_BYTES),
|
||||||
.CORE_OUT_BUF (CORE_OUT_BUF),
|
.CORE_OUT_BUF (CORE_OUT_BUF),
|
||||||
.MEM_OUT_BUF (MEM_OUT_BUF)
|
.MEM_OUT_BUF (MEM_OUT_BUF)
|
||||||
) cache (
|
) cache (
|
||||||
|
|
12
hw/rtl/cache/VX_cache_wrap.sv
vendored
12
hw/rtl/cache/VX_cache_wrap.sv
vendored
|
@ -48,6 +48,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
||||||
// Enable cache writeback
|
// Enable cache writeback
|
||||||
parameter WRITEBACK = 0,
|
parameter WRITEBACK = 0,
|
||||||
|
|
||||||
|
// Enable dirty bytes on writeback
|
||||||
|
parameter DIRTY_BYTES = 0,
|
||||||
|
|
||||||
// Request debug identifier
|
// Request debug identifier
|
||||||
parameter UUID_WIDTH = 0,
|
parameter UUID_WIDTH = 0,
|
||||||
|
|
||||||
|
@ -187,6 +190,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
||||||
.MREQ_SIZE (MREQ_SIZE),
|
.MREQ_SIZE (MREQ_SIZE),
|
||||||
.WRITE_ENABLE (WRITE_ENABLE),
|
.WRITE_ENABLE (WRITE_ENABLE),
|
||||||
.WRITEBACK (WRITEBACK),
|
.WRITEBACK (WRITEBACK),
|
||||||
|
.DIRTY_BYTES (DIRTY_BYTES),
|
||||||
.UUID_WIDTH (UUID_WIDTH),
|
.UUID_WIDTH (UUID_WIDTH),
|
||||||
.TAG_WIDTH (TAG_WIDTH),
|
.TAG_WIDTH (TAG_WIDTH),
|
||||||
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
|
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
|
||||||
|
@ -223,12 +227,12 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (core_req_fire) begin
|
if (core_req_fire) begin
|
||||||
if (core_bus_if[i].req_data.rw)
|
if (core_bus_if[i].req_data.rw)
|
||||||
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
|
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
|
||||||
else
|
else
|
||||||
`TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid));
|
`TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid));
|
||||||
end
|
end
|
||||||
if (core_rsp_fire) begin
|
if (core_rsp_fire) begin
|
||||||
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
|
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -250,14 +254,14 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (mem_req_fire) begin
|
if (mem_req_fire) begin
|
||||||
if (mem_bus_if.req_data.rw)
|
if (mem_bus_if.req_data.rw)
|
||||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n",
|
||||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
|
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
|
||||||
else
|
else
|
||||||
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
|
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
|
||||||
end
|
end
|
||||||
if (mem_rsp_fire) begin
|
if (mem_rsp_fire) begin
|
||||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
|
||||||
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
|
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -83,7 +83,7 @@ module VX_alu_muldiv #(
|
||||||
.DEPTH (`LATENCY_IMUL),
|
.DEPTH (`LATENCY_IMUL),
|
||||||
.RESETW (1)
|
.RESETW (1)
|
||||||
) mul_shift_reg (
|
) mul_shift_reg (
|
||||||
.clk(clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.enable (mul_ready_in),
|
.enable (mul_ready_in),
|
||||||
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, mul_result_tmp}),
|
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, mul_result_tmp}),
|
||||||
|
@ -324,6 +324,7 @@ module VX_alu_muldiv #(
|
||||||
VX_stream_arb #(
|
VX_stream_arb #(
|
||||||
.NUM_INPUTS (2),
|
.NUM_INPUTS (2),
|
||||||
.DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)),
|
.DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)),
|
||||||
|
.ARBITER ("F"),
|
||||||
.OUT_BUF (1)
|
.OUT_BUF (1)
|
||||||
) rsp_buf (
|
) rsp_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
|
|
@ -57,7 +57,7 @@ module VX_alu_unit #(
|
||||||
|
|
||||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||||
|
|
||||||
`RESET_RELAY (block_reset, reset);
|
`RESET_RELAY_EN (block_reset, reset,(BLOCK_SIZE > 1));
|
||||||
|
|
||||||
wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV);
|
wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV);
|
||||||
|
|
||||||
|
@ -72,15 +72,13 @@ module VX_alu_unit #(
|
||||||
assign int_execute_if.valid = per_block_execute_if[block_idx].valid && ~is_muldiv_op;
|
assign int_execute_if.valid = per_block_execute_if[block_idx].valid && ~is_muldiv_op;
|
||||||
assign int_execute_if.data = per_block_execute_if[block_idx].data;
|
assign int_execute_if.data = per_block_execute_if[block_idx].data;
|
||||||
|
|
||||||
`RESET_RELAY (int_reset, block_reset);
|
|
||||||
|
|
||||||
VX_alu_int #(
|
VX_alu_int #(
|
||||||
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
|
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
|
||||||
.BLOCK_IDX (block_idx),
|
.BLOCK_IDX (block_idx),
|
||||||
.NUM_LANES (NUM_LANES)
|
.NUM_LANES (NUM_LANES)
|
||||||
) alu_int (
|
) alu_int (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (int_reset),
|
.reset (block_reset),
|
||||||
.execute_if (int_execute_if),
|
.execute_if (int_execute_if),
|
||||||
.branch_ctl_if (branch_ctl_if[block_idx]),
|
.branch_ctl_if (branch_ctl_if[block_idx]),
|
||||||
.commit_if (int_commit_if)
|
.commit_if (int_commit_if)
|
||||||
|
@ -99,14 +97,12 @@ module VX_alu_unit #(
|
||||||
assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
|
assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
|
||||||
assign muldiv_execute_if.data = per_block_execute_if[block_idx].data;
|
assign muldiv_execute_if.data = per_block_execute_if[block_idx].data;
|
||||||
|
|
||||||
`RESET_RELAY (muldiv_reset, block_reset);
|
|
||||||
|
|
||||||
VX_alu_muldiv #(
|
VX_alu_muldiv #(
|
||||||
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
|
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
|
||||||
.NUM_LANES (NUM_LANES)
|
.NUM_LANES (NUM_LANES)
|
||||||
) muldiv_unit (
|
) muldiv_unit (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (muldiv_reset),
|
.reset (block_reset),
|
||||||
.execute_if (muldiv_execute_if),
|
.execute_if (muldiv_execute_if),
|
||||||
.commit_if (muldiv_commit_if)
|
.commit_if (muldiv_commit_if)
|
||||||
);
|
);
|
||||||
|
@ -121,15 +117,14 @@ module VX_alu_unit #(
|
||||||
|
|
||||||
// send response
|
// send response
|
||||||
|
|
||||||
`RESET_RELAY (arb_reset, block_reset);
|
|
||||||
|
|
||||||
VX_stream_arb #(
|
VX_stream_arb #(
|
||||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||||
.DATAW (RSP_ARB_DATAW),
|
.DATAW (RSP_ARB_DATAW),
|
||||||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
.OUT_BUF (PARTIAL_BW ? 1 : 3),
|
||||||
|
.ARBITER ("F")
|
||||||
) rsp_arb (
|
) rsp_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (arb_reset),
|
.reset (block_reset),
|
||||||
.valid_in ({
|
.valid_in ({
|
||||||
`ifdef EXT_M_ENABLE
|
`ifdef EXT_M_ENABLE
|
||||||
muldiv_commit_if.valid,
|
muldiv_commit_if.valid,
|
||||||
|
|
|
@ -313,6 +313,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
||||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||||
.TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH),
|
.TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH),
|
||||||
|
.ARBITER ("P"),
|
||||||
.REQ_OUT_BUF (0),
|
.REQ_OUT_BUF (0),
|
||||||
.RSP_OUT_BUF (0)
|
.RSP_OUT_BUF (0)
|
||||||
) lsu_adapter (
|
) lsu_adapter (
|
||||||
|
|
|
@ -52,7 +52,7 @@ module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; (
|
||||||
if (dcr_bus_if.write_valid) begin
|
if (dcr_bus_if.write_valid) begin
|
||||||
`TRACE(1, ("%d: base-dcr: state=", $time));
|
`TRACE(1, ("%d: base-dcr: state=", $time));
|
||||||
trace_base_dcr(1, dcr_bus_if.write_addr);
|
trace_base_dcr(1, dcr_bus_if.write_addr);
|
||||||
`TRACE(1, (", data=0x%0h\n", dcr_bus_if.write_data));
|
`TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data));
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
|
@ -40,7 +40,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
||||||
localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH);
|
localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH);
|
||||||
localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN);
|
localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN);
|
||||||
localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||||
localparam FANOUT_ENABLE= (`NUM_THREADS > MAX_FANOUT);
|
localparam FANOUT_ENABLE= (`NUM_THREADS > (MAX_FANOUT + MAX_FANOUT /2));
|
||||||
|
|
||||||
localparam DATA_TMASK_OFF = IN_DATAW - (`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS);
|
localparam DATA_TMASK_OFF = IN_DATAW - (`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS);
|
||||||
localparam DATA_REGS_OFF = 0;
|
localparam DATA_REGS_OFF = 0;
|
||||||
|
@ -85,6 +85,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
||||||
wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx);
|
wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx);
|
||||||
assign issue_indices[block_idx] = issue_idx;
|
assign issue_indices[block_idx] = issue_idx;
|
||||||
|
|
||||||
|
`RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1));
|
||||||
|
|
||||||
wire valid_p, ready_p;
|
wire valid_p, ready_p;
|
||||||
|
|
||||||
if (`NUM_THREADS != NUM_LANES) begin
|
if (`NUM_THREADS != NUM_LANES) begin
|
||||||
|
@ -100,7 +102,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
||||||
wire fire_eop = fire_p && is_last_p;
|
wire fire_eop = fire_p && is_last_p;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (block_reset) begin
|
||||||
sent_mask_p <= '0;
|
sent_mask_p <= '0;
|
||||||
is_first_p <= 1;
|
is_first_p <= 1;
|
||||||
end else begin
|
end else begin
|
||||||
|
@ -215,8 +217,6 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
||||||
assign isw = block_idx;
|
assign isw = block_idx;
|
||||||
end
|
end
|
||||||
|
|
||||||
`RESET_RELAY(buf_out_reset, reset);
|
|
||||||
|
|
||||||
wire [`NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_data[issue_idx][DATA_TMASK_OFF+`NUM_THREADS +: ISSUE_WIS_W], isw);
|
wire [`NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_data[issue_idx][DATA_TMASK_OFF+`NUM_THREADS +: ISSUE_WIS_W], isw);
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
|
@ -225,7 +225,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
||||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||||
) buf_out (
|
) buf_out (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (buf_out_reset),
|
.reset (block_reset),
|
||||||
.valid_in (valid_p),
|
.valid_in (valid_p),
|
||||||
.ready_in (ready_p),
|
.ready_in (ready_p),
|
||||||
.data_in ({
|
.data_in ({
|
||||||
|
|
|
@ -56,9 +56,10 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||||
.LUTRAM (1)
|
.LUTRAM (1)
|
||||||
) tag_store (
|
) tag_store (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
.read (1'b1),
|
.read (1'b1),
|
||||||
.write (icache_req_fire),
|
.write (icache_req_fire),
|
||||||
`UNUSED_PIN (wren),
|
.wren (1'b1),
|
||||||
.waddr (req_tag),
|
.waddr (req_tag),
|
||||||
.wdata ({schedule_if.data.PC, schedule_if.data.tmask}),
|
.wdata ({schedule_if.data.PC, schedule_if.data.tmask}),
|
||||||
.raddr (rsp_tag),
|
.raddr (rsp_tag),
|
||||||
|
|
|
@ -57,7 +57,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
||||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.tid)
|
`UNUSED_VAR (per_block_execute_if[block_idx].data.tid)
|
||||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.wb)
|
`UNUSED_VAR (per_block_execute_if[block_idx].data.wb)
|
||||||
|
|
||||||
`RESET_RELAY (block_reset, reset);
|
`RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1));
|
||||||
|
|
||||||
// Store request info
|
// Store request info
|
||||||
wire fpu_req_valid, fpu_req_ready;
|
wire fpu_req_valid, fpu_req_ready;
|
||||||
|
@ -84,14 +84,12 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
||||||
wire execute_fire = per_block_execute_if[block_idx].valid && per_block_execute_if[block_idx].ready;
|
wire execute_fire = per_block_execute_if[block_idx].valid && per_block_execute_if[block_idx].ready;
|
||||||
wire fpu_rsp_fire = fpu_rsp_valid && fpu_rsp_ready;
|
wire fpu_rsp_fire = fpu_rsp_valid && fpu_rsp_ready;
|
||||||
|
|
||||||
`RESET_RELAY (ibuf_reset, block_reset);
|
|
||||||
|
|
||||||
VX_index_buffer #(
|
VX_index_buffer #(
|
||||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + PID_WIDTH + 1 + 1),
|
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + PID_WIDTH + 1 + 1),
|
||||||
.SIZE (`FPUQ_SIZE)
|
.SIZE (`FPUQ_SIZE)
|
||||||
) tag_store (
|
) tag_store (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (ibuf_reset),
|
.reset (block_reset),
|
||||||
.acquire_en (execute_fire),
|
.acquire_en (execute_fire),
|
||||||
.write_addr (fpu_req_tag),
|
.write_addr (fpu_req_tag),
|
||||||
.write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}),
|
.write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}),
|
||||||
|
@ -113,8 +111,6 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
||||||
assign fpu_req_valid = per_block_execute_if[block_idx].valid && ~mdata_full;
|
assign fpu_req_valid = per_block_execute_if[block_idx].valid && ~mdata_full;
|
||||||
assign per_block_execute_if[block_idx].ready = fpu_req_ready && ~mdata_full;
|
assign per_block_execute_if[block_idx].ready = fpu_req_ready && ~mdata_full;
|
||||||
|
|
||||||
`RESET_RELAY (fpu_reset, block_reset);
|
|
||||||
|
|
||||||
`ifdef FPU_DPI
|
`ifdef FPU_DPI
|
||||||
|
|
||||||
VX_fpu_dpi #(
|
VX_fpu_dpi #(
|
||||||
|
@ -123,7 +119,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
||||||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||||
) fpu_dpi (
|
) fpu_dpi (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (fpu_reset),
|
.reset (block_reset),
|
||||||
|
|
||||||
.valid_in (fpu_req_valid),
|
.valid_in (fpu_req_valid),
|
||||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||||
|
@ -152,7 +148,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
||||||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||||
) fpu_fpnew (
|
) fpu_fpnew (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (fpu_reset),
|
.reset (block_reset),
|
||||||
|
|
||||||
.valid_in (fpu_req_valid),
|
.valid_in (fpu_req_valid),
|
||||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||||
|
@ -181,7 +177,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
||||||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||||
) fpu_dsp (
|
) fpu_dsp (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (fpu_reset),
|
.reset (block_reset),
|
||||||
|
|
||||||
.valid_in (fpu_req_valid),
|
.valid_in (fpu_req_valid),
|
||||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||||
|
@ -228,14 +224,12 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
||||||
|
|
||||||
// send response
|
// send response
|
||||||
|
|
||||||
`RESET_RELAY (rsp_reset, block_reset);
|
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
||||||
.SIZE (0)
|
.SIZE (0)
|
||||||
) rsp_buf (
|
) rsp_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (rsp_reset),
|
.reset (block_reset),
|
||||||
.valid_in (fpu_rsp_valid),
|
.valid_in (fpu_rsp_valid),
|
||||||
.ready_in (fpu_rsp_ready),
|
.ready_in (fpu_rsp_ready),
|
||||||
.data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),
|
.data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),
|
||||||
|
|
|
@ -79,15 +79,13 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
||||||
.NUM_LANES (NUM_LANES)
|
.NUM_LANES (NUM_LANES)
|
||||||
) commit_tmp_if();
|
) commit_tmp_if();
|
||||||
|
|
||||||
`RESET_RELAY(commit_out_reset, reset);
|
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||||
) out_buf (
|
) out_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (commit_out_reset),
|
.reset (reset),
|
||||||
.valid_in (commit_out_valid[i]),
|
.valid_in (commit_out_valid[i]),
|
||||||
.ready_in (commit_out_ready[i]),
|
.ready_in (commit_out_ready[i]),
|
||||||
.data_in (commit_out_data[i]),
|
.data_in (commit_out_data[i]),
|
||||||
|
|
|
@ -72,9 +72,10 @@ module VX_ipdom_stack #(
|
||||||
.LUTRAM (OUT_REG ? 0 : 1)
|
.LUTRAM (OUT_REG ? 0 : 1)
|
||||||
) store (
|
) store (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
.read (1'b1),
|
.read (1'b1),
|
||||||
.write (push),
|
.write (push),
|
||||||
`UNUSED_PIN (wren),
|
.wren (1'b1),
|
||||||
.waddr (wr_ptr),
|
.waddr (wr_ptr),
|
||||||
.wdata ({q1, q0}),
|
.wdata ({q1, q0}),
|
||||||
.raddr (rd_ptr),
|
.raddr (rd_ptr),
|
||||||
|
|
|
@ -39,6 +39,8 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
||||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||||
) lsu_switch_if[`NUM_LSU_BLOCKS]();
|
) lsu_switch_if[`NUM_LSU_BLOCKS]();
|
||||||
|
|
||||||
|
`RESET_RELAY_EX (block_reset, reset, `NUM_LSU_BLOCKS, 1);
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||||
|
|
||||||
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
|
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
|
||||||
|
@ -52,15 +54,13 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
||||||
wire req_global_ready;
|
wire req_global_ready;
|
||||||
wire req_local_ready;
|
wire req_local_ready;
|
||||||
|
|
||||||
`RESET_RELAY (switch_reset, reset);
|
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (REQ_DATAW),
|
.DATAW (REQ_DATAW),
|
||||||
.SIZE (2),
|
.SIZE (2),
|
||||||
.OUT_REG (1)
|
.OUT_REG (1)
|
||||||
) req_global_buf (
|
) req_global_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (switch_reset),
|
.reset (block_reset[i]),
|
||||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
|
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
|
||||||
.data_in ({
|
.data_in ({
|
||||||
lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask,
|
lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask,
|
||||||
|
@ -91,7 +91,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
||||||
.OUT_REG (0)
|
.OUT_REG (0)
|
||||||
) req_local_buf (
|
) req_local_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (switch_reset),
|
.reset (block_reset[i]),
|
||||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
|
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
|
||||||
.data_in ({
|
.data_in ({
|
||||||
lsu_mem_in_if[i].req_data.mask & is_addr_local_mask,
|
lsu_mem_in_if[i].req_data.mask & is_addr_local_mask,
|
||||||
|
@ -126,7 +126,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
||||||
.OUT_BUF (1)
|
.OUT_BUF (1)
|
||||||
) rsp_arb (
|
) rsp_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (switch_reset),
|
.reset (block_reset[i]),
|
||||||
.valid_in ({
|
.valid_in ({
|
||||||
lsu_switch_if[i].rsp_valid,
|
lsu_switch_if[i].rsp_valid,
|
||||||
lsu_mem_out_if[i].rsp_valid
|
lsu_mem_out_if[i].rsp_valid
|
||||||
|
@ -157,18 +157,17 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
||||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||||
) lmem_bus_tmp_if[`NUM_LSU_LANES]();
|
) lmem_bus_tmp_if[`NUM_LSU_LANES]();
|
||||||
|
|
||||||
`RESET_RELAY (adapter_reset, reset);
|
|
||||||
|
|
||||||
VX_lsu_adapter #(
|
VX_lsu_adapter #(
|
||||||
.NUM_LANES (`NUM_LSU_LANES),
|
.NUM_LANES (`NUM_LSU_LANES),
|
||||||
.DATA_SIZE (LSU_WORD_SIZE),
|
.DATA_SIZE (LSU_WORD_SIZE),
|
||||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||||
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
|
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
|
||||||
|
.ARBITER ("P"),
|
||||||
.REQ_OUT_BUF (3),
|
.REQ_OUT_BUF (3),
|
||||||
.RSP_OUT_BUF (0)
|
.RSP_OUT_BUF (0)
|
||||||
) lsu_adapter (
|
) lsu_adapter (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (adapter_reset),
|
.reset (block_reset[i]),
|
||||||
.lsu_mem_if (lsu_switch_if[i]),
|
.lsu_mem_if (lsu_switch_if[i]),
|
||||||
.mem_bus_if (lmem_bus_tmp_if)
|
.mem_bus_if (lmem_bus_tmp_if)
|
||||||
);
|
);
|
||||||
|
|
|
@ -490,6 +490,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||||
VX_stream_arb #(
|
VX_stream_arb #(
|
||||||
.NUM_INPUTS (2),
|
.NUM_INPUTS (2),
|
||||||
.DATAW (RSP_ARB_DATAW),
|
.DATAW (RSP_ARB_DATAW),
|
||||||
|
.ARBITER ("P"), // prioritize commit_rsp_if
|
||||||
.OUT_BUF (3)
|
.OUT_BUF (3)
|
||||||
) rsp_arb (
|
) rsp_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
|
|
@ -13,6 +13,13 @@
|
||||||
|
|
||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
// reset all GPRs in debug mode
|
||||||
|
`ifdef SIMULATION
|
||||||
|
`ifndef NDEBUG
|
||||||
|
`define GPR_RESET
|
||||||
|
`endif
|
||||||
|
`endif
|
||||||
|
|
||||||
module VX_operands import VX_gpu_pkg::*; #(
|
module VX_operands import VX_gpu_pkg::*; #(
|
||||||
parameter `STRING INSTANCE_ID = "",
|
parameter `STRING INSTANCE_ID = "",
|
||||||
parameter NUM_BANKS = 4,
|
parameter NUM_BANKS = 4,
|
||||||
|
@ -36,8 +43,9 @@ module VX_operands import VX_gpu_pkg::*; #(
|
||||||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||||
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
|
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
|
||||||
localparam METADATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS;
|
localparam META_DATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS + `UUID_WIDTH;
|
||||||
localparam DATAW = `UUID_WIDTH + METADATAW + 3 * `NUM_THREADS * `XLEN;
|
localparam REGS_DATAW = `XLEN * `NUM_THREADS;
|
||||||
|
localparam DATAW = META_DATAW + NUM_SRC_REGS * REGS_DATAW;
|
||||||
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
|
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
|
||||||
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
|
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
|
||||||
localparam XLEN_SIZE = `XLEN / 8;
|
localparam XLEN_SIZE = `XLEN / 8;
|
||||||
|
@ -46,30 +54,28 @@ module VX_operands import VX_gpu_pkg::*; #(
|
||||||
`UNUSED_VAR (writeback_if.data.sop)
|
`UNUSED_VAR (writeback_if.data.sop)
|
||||||
|
|
||||||
wire [NUM_SRC_REGS-1:0] src_valid;
|
wire [NUM_SRC_REGS-1:0] src_valid;
|
||||||
wire [NUM_SRC_REGS-1:0] req_in_valid;
|
wire [NUM_SRC_REGS-1:0] req_in_valid, req_in_ready;
|
||||||
wire [NUM_SRC_REGS-1:0] req_in_ready;
|
|
||||||
wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data;
|
wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data;
|
||||||
wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
|
wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
|
||||||
|
|
||||||
wire [NUM_BANKS-1:0] gpr_rd_valid_n, gpr_rd_ready;
|
wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready;
|
||||||
reg [NUM_BANKS-1:0] gpr_rd_valid;
|
wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2;
|
||||||
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr_n;
|
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1;
|
||||||
reg [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr;
|
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1, gpr_rd_data_st2;
|
||||||
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data;
|
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2;
|
||||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx_n;
|
|
||||||
reg [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx;
|
|
||||||
|
|
||||||
wire pipe_in_ready;
|
wire pipe_valid_st1, pipe_ready_st1;
|
||||||
reg pipe_out_valid;
|
wire pipe_valid_st2, pipe_ready_st2;
|
||||||
wire pipe_out_ready;
|
wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2;
|
||||||
reg [`UUID_WIDTH-1:0] pipe_out_uuid;
|
|
||||||
reg [METADATAW-1:0] pipe_out_data;
|
|
||||||
|
|
||||||
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data, src_data_n;
|
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n;
|
||||||
reg [NUM_SRC_REGS-1:0] data_fetched;
|
wire [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2;
|
||||||
reg has_collision, has_collision_n;
|
|
||||||
|
|
||||||
wire stg_in_valid, stg_in_ready;
|
reg [NUM_SRC_REGS-1:0] data_fetched_n;
|
||||||
|
wire [NUM_SRC_REGS-1:0] data_fetched_st1;
|
||||||
|
|
||||||
|
reg has_collision_n;
|
||||||
|
wire has_collision_st1;
|
||||||
|
|
||||||
wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3,
|
wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3,
|
||||||
scoreboard_if.data.rs2,
|
scoreboard_if.data.rs2,
|
||||||
|
@ -89,7 +95,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
||||||
end
|
end
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
|
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
|
||||||
assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched[i];
|
assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched_st1[i];
|
||||||
end
|
end
|
||||||
|
|
||||||
assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid;
|
assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid;
|
||||||
|
@ -109,13 +115,20 @@ module VX_operands import VX_gpu_pkg::*; #(
|
||||||
.data_in (req_in_data),
|
.data_in (req_in_data),
|
||||||
.sel_in (req_bank_idx),
|
.sel_in (req_bank_idx),
|
||||||
.ready_in (req_in_ready),
|
.ready_in (req_in_ready),
|
||||||
.valid_out (gpr_rd_valid_n),
|
.valid_out (gpr_rd_valid),
|
||||||
.data_out (gpr_rd_addr_n),
|
.data_out (gpr_rd_addr),
|
||||||
.sel_out (gpr_rd_req_idx_n),
|
.sel_out (gpr_rd_req_idx),
|
||||||
.ready_out (gpr_rd_ready)
|
.ready_out (gpr_rd_ready)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign gpr_rd_ready = {NUM_BANKS{stg_in_ready}};
|
wire pipe_in_ready = pipe_ready_st1 || ~pipe_valid_st1;
|
||||||
|
|
||||||
|
assign gpr_rd_ready = {NUM_BANKS{pipe_in_ready}};
|
||||||
|
|
||||||
|
assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n;
|
||||||
|
|
||||||
|
wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1;
|
||||||
|
wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2;
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
has_collision_n = 0;
|
has_collision_n = 0;
|
||||||
|
@ -129,83 +142,82 @@ module VX_operands import VX_gpu_pkg::*; #(
|
||||||
end
|
end
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
src_data_n = src_data;
|
data_fetched_n = data_fetched_st1;
|
||||||
for (integer b = 0; b < NUM_BANKS; ++b) begin
|
if (scoreboard_if.ready) begin
|
||||||
if (gpr_rd_valid[b]) begin
|
data_fetched_n = '0;
|
||||||
src_data_n[gpr_rd_req_idx[b]] = gpr_rd_data[b];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
wire pipe_stall = pipe_out_valid && ~pipe_out_ready;
|
|
||||||
assign pipe_in_ready = ~pipe_stall;
|
|
||||||
|
|
||||||
assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n;
|
|
||||||
|
|
||||||
wire stg_in_fire = stg_in_valid && stg_in_ready;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
pipe_out_valid <= 0;
|
|
||||||
gpr_rd_valid <= '0;
|
|
||||||
data_fetched <= '0;
|
|
||||||
src_data <= '0;
|
|
||||||
end else begin
|
end else begin
|
||||||
if (~pipe_stall) begin
|
data_fetched_n = data_fetched_st1 | req_in_ready;
|
||||||
pipe_out_valid <= scoreboard_if.valid;
|
|
||||||
gpr_rd_valid <= gpr_rd_valid_n;
|
|
||||||
if (scoreboard_if.ready) begin
|
|
||||||
data_fetched <= '0;
|
|
||||||
end else begin
|
|
||||||
data_fetched <= data_fetched | req_in_ready;
|
|
||||||
end
|
|
||||||
if (stg_in_fire) begin
|
|
||||||
src_data <= '0;
|
|
||||||
end else begin
|
|
||||||
src_data <= src_data_n;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if (~pipe_stall) begin
|
|
||||||
pipe_out_uuid <= scoreboard_if.data.uuid;
|
|
||||||
pipe_out_data <= {
|
|
||||||
scoreboard_if.data.wis,
|
|
||||||
scoreboard_if.data.tmask,
|
|
||||||
scoreboard_if.data.PC,
|
|
||||||
scoreboard_if.data.wb,
|
|
||||||
scoreboard_if.data.ex_type,
|
|
||||||
scoreboard_if.data.op_type,
|
|
||||||
scoreboard_if.data.op_args,
|
|
||||||
scoreboard_if.data.rd
|
|
||||||
};
|
|
||||||
has_collision <= has_collision_n;
|
|
||||||
gpr_rd_addr <= gpr_rd_addr_n;
|
|
||||||
gpr_rd_req_idx <= gpr_rd_req_idx_n;
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign pipe_out_ready = stg_in_ready;
|
assign pipe_data = {
|
||||||
assign stg_in_valid = pipe_out_valid && ~has_collision;
|
scoreboard_if.data.wis,
|
||||||
|
scoreboard_if.data.tmask,
|
||||||
|
scoreboard_if.data.PC,
|
||||||
|
scoreboard_if.data.wb,
|
||||||
|
scoreboard_if.data.ex_type,
|
||||||
|
scoreboard_if.data.op_type,
|
||||||
|
scoreboard_if.data.op_args,
|
||||||
|
scoreboard_if.data.rd,
|
||||||
|
scoreboard_if.data.uuid
|
||||||
|
};
|
||||||
|
|
||||||
|
VX_pipe_register #(
|
||||||
|
.DATAW (1 + NUM_SRC_REGS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)),
|
||||||
|
.RESETW (1 + NUM_SRC_REGS)
|
||||||
|
) pipe_reg1 (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.enable (pipe_in_ready),
|
||||||
|
.data_in ({scoreboard_if.valid, data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}),
|
||||||
|
.data_out ({pipe_valid_st1, data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1})
|
||||||
|
);
|
||||||
|
|
||||||
|
assign pipe_ready_st1 = pipe_ready_st2 || ~pipe_valid_st2;
|
||||||
|
|
||||||
|
assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_n;
|
||||||
|
|
||||||
|
wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1;
|
||||||
|
|
||||||
|
`RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW
|
||||||
|
|
||||||
|
VX_pipe_register #(
|
||||||
|
.DATAW (1 + NUM_SRC_REGS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH),
|
||||||
|
.RESETW (1 + NUM_SRC_REGS * REGS_DATAW)
|
||||||
|
) pipe_reg2 (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (pipe2_reset),
|
||||||
|
.enable (pipe_ready_st1),
|
||||||
|
.data_in ({pipe_valid2_st1, src_data_st1, gpr_rd_valid_st1, gpr_rd_data_st1, pipe_data_st1, gpr_rd_req_idx_st1}),
|
||||||
|
.data_out ({pipe_valid_st2, src_data_st2, gpr_rd_valid_st2, gpr_rd_data_st2, pipe_data_st2, gpr_rd_req_idx_st2})
|
||||||
|
);
|
||||||
|
|
||||||
|
always @(*) begin
|
||||||
|
src_data_n = src_data_st2;
|
||||||
|
for (integer b = 0; b < NUM_BANKS; ++b) begin
|
||||||
|
if (gpr_rd_valid_st2[b]) begin
|
||||||
|
src_data_n[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
|
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
|
||||||
.LUTRAM (1)
|
.LUTRAM (1)
|
||||||
) out_buffer (
|
) out_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.valid_in (stg_in_valid),
|
.valid_in (pipe_valid_st2),
|
||||||
.ready_in (stg_in_ready),
|
.ready_in (pipe_ready_st2),
|
||||||
.data_in ({
|
.data_in ({
|
||||||
pipe_out_uuid,
|
pipe_data_st2,
|
||||||
pipe_out_data,
|
|
||||||
src_data_n[0],
|
src_data_n[0],
|
||||||
src_data_n[1],
|
src_data_n[1],
|
||||||
src_data_n[2]
|
src_data_n[2]
|
||||||
}),
|
}),
|
||||||
.data_out ({
|
.data_out ({
|
||||||
operands_if.data.uuid,
|
|
||||||
operands_if.data.wis,
|
operands_if.data.wis,
|
||||||
operands_if.data.tmask,
|
operands_if.data.tmask,
|
||||||
operands_if.data.PC,
|
operands_if.data.PC,
|
||||||
|
@ -214,6 +226,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
||||||
operands_if.data.op_type,
|
operands_if.data.op_type,
|
||||||
operands_if.data.op_args,
|
operands_if.data.op_args,
|
||||||
operands_if.data.rd,
|
operands_if.data.rd,
|
||||||
|
operands_if.data.uuid,
|
||||||
operands_if.data.rs1_data,
|
operands_if.data.rs1_data,
|
||||||
operands_if.data.rs2_data,
|
operands_if.data.rs2_data,
|
||||||
operands_if.data.rs3_data
|
operands_if.data.rs3_data
|
||||||
|
@ -262,27 +275,24 @@ module VX_operands import VX_gpu_pkg::*; #(
|
||||||
assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}};
|
assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}};
|
||||||
end
|
end
|
||||||
|
|
||||||
`ifdef GPR_RESET
|
|
||||||
VX_dp_ram_rst #(
|
|
||||||
`else
|
|
||||||
VX_dp_ram #(
|
VX_dp_ram #(
|
||||||
`endif
|
.DATAW (REGS_DATAW),
|
||||||
.DATAW (`XLEN * `NUM_THREADS),
|
|
||||||
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
|
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
|
||||||
.WRENW (BYTEENW),
|
.WRENW (BYTEENW),
|
||||||
|
`ifdef GPR_RESET
|
||||||
|
.RESET_RAM (1),
|
||||||
|
`endif
|
||||||
.NO_RWCHECK (1)
|
.NO_RWCHECK (1)
|
||||||
) gpr_ram (
|
) gpr_ram (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
`ifdef GPR_RESET
|
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
`endif
|
.read (pipe_fire_st1),
|
||||||
.read (1'b1),
|
|
||||||
.wren (wren),
|
.wren (wren),
|
||||||
.write (gpr_wr_enabled),
|
.write (gpr_wr_enabled),
|
||||||
.waddr (gpr_wr_addr),
|
.waddr (gpr_wr_addr),
|
||||||
.wdata (writeback_if.data.data),
|
.wdata (writeback_if.data.data),
|
||||||
.raddr (gpr_rd_addr[b]),
|
.raddr (gpr_rd_addr_st1[b]),
|
||||||
.rdata (gpr_rd_data[b])
|
.rdata (gpr_rd_data_st1[b])
|
||||||
);
|
);
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -383,16 +383,16 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
||||||
wire [`NUM_WARPS-1:0] pending_warp_empty;
|
wire [`NUM_WARPS-1:0] pending_warp_empty;
|
||||||
wire [`NUM_WARPS-1:0] pending_warp_alm_empty;
|
wire [`NUM_WARPS-1:0] pending_warp_alm_empty;
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
`RESET_RELAY_EX (pending_instr_reset, reset, `NUM_WARPS, `MAX_FANOUT);
|
||||||
|
|
||||||
`RESET_RELAY (pending_instr_reset, reset);
|
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||||
|
|
||||||
VX_pending_size #(
|
VX_pending_size #(
|
||||||
.SIZE (4096),
|
.SIZE (4096),
|
||||||
.ALM_EMPTY (1)
|
.ALM_EMPTY (1)
|
||||||
) counter (
|
) counter (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (pending_instr_reset),
|
.reset (pending_instr_reset[i]),
|
||||||
.incr (per_warp_incr[i]),
|
.incr (per_warp_incr[i]),
|
||||||
.decr (commit_sched_if.committed_warps[i]),
|
.decr (commit_sched_if.committed_warps[i]),
|
||||||
.empty (pending_warp_empty[i]),
|
.empty (pending_warp_empty[i]),
|
||||||
|
|
|
@ -179,7 +179,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
||||||
VX_gather_unit #(
|
VX_gather_unit #(
|
||||||
.BLOCK_SIZE (BLOCK_SIZE),
|
.BLOCK_SIZE (BLOCK_SIZE),
|
||||||
.NUM_LANES (NUM_LANES),
|
.NUM_LANES (NUM_LANES),
|
||||||
.OUT_BUF (1)
|
.OUT_BUF (3)
|
||||||
) gather_unit (
|
) gather_unit (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
|
@ -63,7 +63,8 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
||||||
.DATA_IN_WIDTH(32),
|
.DATA_IN_WIDTH(32),
|
||||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||||
.PE_REG (0)
|
.PE_REG (0),
|
||||||
|
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||||
) pe_serializer (
|
) pe_serializer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
|
@ -67,7 +67,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
||||||
.DATA_IN_WIDTH(2*32),
|
.DATA_IN_WIDTH(2*32),
|
||||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||||
.PE_REG (0)
|
.PE_REG (0),
|
||||||
|
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||||
) pe_serializer (
|
) pe_serializer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
|
@ -98,7 +98,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
||||||
.DATA_IN_WIDTH(3*32),
|
.DATA_IN_WIDTH(3*32),
|
||||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||||
.PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0)
|
.PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs
|
||||||
|
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||||
) pe_serializer (
|
) pe_serializer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
|
@ -68,7 +68,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
||||||
.DATA_IN_WIDTH(2*32),
|
.DATA_IN_WIDTH(2*32),
|
||||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||||
.PE_REG (0)
|
.PE_REG (0),
|
||||||
|
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||||
) pe_serializer (
|
) pe_serializer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
|
@ -61,7 +61,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
||||||
.DATA_IN_WIDTH(32),
|
.DATA_IN_WIDTH(32),
|
||||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||||
.PE_REG (0)
|
.PE_REG (0),
|
||||||
|
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||||
) pe_serializer (
|
) pe_serializer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
|
@ -81,12 +81,15 @@ module VX_avs_adapter #(
|
||||||
assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i);
|
assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i);
|
||||||
end
|
end
|
||||||
|
|
||||||
|
`RESET_RELAY_EX (bank_reset, reset, NUM_BANKS, 1);
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||||
|
|
||||||
VX_pending_size #(
|
VX_pending_size #(
|
||||||
.SIZE (RD_QUEUE_SIZE)
|
.SIZE (RD_QUEUE_SIZE)
|
||||||
) pending_size (
|
) pending_size (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (bank_reset[i]),
|
||||||
.incr (req_queue_push[i]),
|
.incr (req_queue_push[i]),
|
||||||
.decr (req_queue_pop[i]),
|
.decr (req_queue_pop[i]),
|
||||||
`UNUSED_PIN (empty),
|
`UNUSED_PIN (empty),
|
||||||
|
@ -102,7 +105,7 @@ module VX_avs_adapter #(
|
||||||
.DEPTH (RD_QUEUE_SIZE)
|
.DEPTH (RD_QUEUE_SIZE)
|
||||||
) rd_req_queue (
|
) rd_req_queue (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (bank_reset[i]),
|
||||||
.push (req_queue_push[i]),
|
.push (req_queue_push[i]),
|
||||||
.pop (req_queue_pop[i]),
|
.pop (req_queue_pop[i]),
|
||||||
.data_in (mem_req_tag),
|
.data_in (mem_req_tag),
|
||||||
|
@ -132,7 +135,7 @@ module VX_avs_adapter #(
|
||||||
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF))
|
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF))
|
||||||
) req_out_buf (
|
) req_out_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (bank_reset[i]),
|
||||||
.valid_in (valid_out_w),
|
.valid_in (valid_out_w),
|
||||||
.ready_in (ready_out_w),
|
.ready_in (ready_out_w),
|
||||||
.data_in ({mem_req_rw, mem_req_byteen, req_bank_off, mem_req_data}),
|
.data_in ({mem_req_rw, mem_req_byteen, req_bank_off, mem_req_data}),
|
||||||
|
@ -168,12 +171,13 @@ module VX_avs_adapter #(
|
||||||
wire [NUM_BANKS-1:0] rsp_queue_empty;
|
wire [NUM_BANKS-1:0] rsp_queue_empty;
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||||
|
|
||||||
VX_fifo_queue #(
|
VX_fifo_queue #(
|
||||||
.DATAW (DATA_WIDTH),
|
.DATAW (DATA_WIDTH),
|
||||||
.DEPTH (RD_QUEUE_SIZE)
|
.DEPTH (RD_QUEUE_SIZE)
|
||||||
) rd_rsp_queue (
|
) rd_rsp_queue (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (bank_reset[i]),
|
||||||
.push (avs_readdatavalid[i]),
|
.push (avs_readdatavalid[i]),
|
||||||
.pop (req_queue_pop[i]),
|
.pop (req_queue_pop[i]),
|
||||||
.data_in (avs_readdata[i]),
|
.data_in (avs_readdata[i]),
|
||||||
|
@ -195,7 +199,7 @@ module VX_avs_adapter #(
|
||||||
VX_stream_arb #(
|
VX_stream_arb #(
|
||||||
.NUM_INPUTS (NUM_BANKS),
|
.NUM_INPUTS (NUM_BANKS),
|
||||||
.DATAW (DATA_WIDTH + TAG_WIDTH),
|
.DATAW (DATA_WIDTH + TAG_WIDTH),
|
||||||
.ARBITER ("R"),
|
.ARBITER ("F"),
|
||||||
.OUT_BUF (RSP_OUT_BUF)
|
.OUT_BUF (RSP_OUT_BUF)
|
||||||
) rsp_arb (
|
) rsp_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
|
|
@ -207,7 +207,7 @@ module VX_axi_adapter #(
|
||||||
VX_stream_arb #(
|
VX_stream_arb #(
|
||||||
.NUM_INPUTS (NUM_BANKS),
|
.NUM_INPUTS (NUM_BANKS),
|
||||||
.DATAW (DATA_WIDTH + TAG_WIDTH),
|
.DATAW (DATA_WIDTH + TAG_WIDTH),
|
||||||
.ARBITER ("R"),
|
.ARBITER ("F"),
|
||||||
.OUT_BUF (RSP_OUT_BUF)
|
.OUT_BUF (RSP_OUT_BUF)
|
||||||
) rsp_arb (
|
) rsp_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
|
|
@ -22,12 +22,16 @@ module VX_dp_ram #(
|
||||||
parameter OUT_REG = 0,
|
parameter OUT_REG = 0,
|
||||||
parameter NO_RWCHECK = 0,
|
parameter NO_RWCHECK = 0,
|
||||||
parameter LUTRAM = 0,
|
parameter LUTRAM = 0,
|
||||||
|
parameter RW_ASSERT = 0,
|
||||||
|
parameter RESET_RAM = 0,
|
||||||
|
parameter READ_ENABLE = 0,
|
||||||
parameter INIT_ENABLE = 0,
|
parameter INIT_ENABLE = 0,
|
||||||
parameter INIT_FILE = "",
|
parameter INIT_FILE = "",
|
||||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||||
parameter ADDRW = `LOG2UP(SIZE)
|
parameter ADDRW = `LOG2UP(SIZE)
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
input wire read,
|
input wire read,
|
||||||
input wire write,
|
input wire write,
|
||||||
input wire [WRENW-1:0] wren,
|
input wire [WRENW-1:0] wren,
|
||||||
|
@ -50,44 +54,44 @@ module VX_dp_ram #(
|
||||||
end \
|
end \
|
||||||
end
|
end
|
||||||
|
|
||||||
|
`UNUSED_PARAM (RW_ASSERT)
|
||||||
`UNUSED_VAR (read)
|
`UNUSED_VAR (read)
|
||||||
|
|
||||||
|
if (WRENW > 1) begin
|
||||||
|
`RUNTIME_ASSERT(~write || (| wren), ("invalid write enable mask"));
|
||||||
|
end
|
||||||
|
|
||||||
|
wire [DATAW-1:0] rdata_w;
|
||||||
|
|
||||||
`ifdef SYNTHESIS
|
`ifdef SYNTHESIS
|
||||||
if (WRENW > 1) begin
|
if (WRENW > 1) begin
|
||||||
`ifdef QUARTUS
|
`ifdef QUARTUS
|
||||||
if (LUTRAM != 0) begin
|
if (LUTRAM != 0) begin
|
||||||
if (OUT_REG != 0) begin
|
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||||
reg [DATAW-1:0] rdata_r;
|
`RAM_INITIALIZATION
|
||||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
always @(posedge clk) begin
|
||||||
`RAM_INITIALIZATION
|
if (write) begin
|
||||||
always @(posedge clk) begin
|
for (integer i = 0; i < WRENW; ++i) begin
|
||||||
if (write) begin
|
if (wren[i])
|
||||||
for (integer i = 0; i < WRENW; ++i) begin
|
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||||
if (wren[i])
|
|
||||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if (read) begin
|
|
||||||
rdata_r <= ram[raddr];
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
assign rdata = rdata_r;
|
|
||||||
end else begin
|
|
||||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
|
||||||
`RAM_INITIALIZATION
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (write) begin
|
|
||||||
for (integer i = 0; i < WRENW; ++i) begin
|
|
||||||
if (wren[i])
|
|
||||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
assign rdata = ram[raddr];
|
|
||||||
end
|
end
|
||||||
|
assign rdata_w = ram[raddr];
|
||||||
end else begin
|
end else begin
|
||||||
if (OUT_REG != 0) begin
|
if (NO_RWCHECK != 0) begin
|
||||||
reg [DATAW-1:0] rdata_r;
|
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||||
|
`RAM_INITIALIZATION
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (write) begin
|
||||||
|
for (integer i = 0; i < WRENW; ++i) begin
|
||||||
|
if (wren[i])
|
||||||
|
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
assign rdata_w = ram[raddr];
|
||||||
|
end else begin
|
||||||
reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||||
`RAM_INITIALIZATION
|
`RAM_INITIALIZATION
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
|
@ -97,37 +101,8 @@ module VX_dp_ram #(
|
||||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if (read) begin
|
|
||||||
rdata_r <= ram[raddr];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
assign rdata = rdata_r;
|
|
||||||
end else begin
|
|
||||||
if (NO_RWCHECK != 0) begin
|
|
||||||
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
|
||||||
`RAM_INITIALIZATION
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (write) begin
|
|
||||||
for (integer i = 0; i < WRENW; ++i) begin
|
|
||||||
if (wren[i])
|
|
||||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
assign rdata = ram[raddr];
|
|
||||||
end else begin
|
|
||||||
reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
|
||||||
`RAM_INITIALIZATION
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (write) begin
|
|
||||||
for (integer i = 0; i < WRENW; ++i) begin
|
|
||||||
if (wren[i])
|
|
||||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
assign rdata = ram[raddr];
|
|
||||||
end
|
end
|
||||||
|
assign rdata_w = ram[raddr];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`else
|
`else
|
||||||
|
@ -135,35 +110,18 @@ module VX_dp_ram #(
|
||||||
if (LUTRAM != 0) begin
|
if (LUTRAM != 0) begin
|
||||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||||
`RAM_INITIALIZATION
|
`RAM_INITIALIZATION
|
||||||
if (OUT_REG != 0) begin
|
always @(posedge clk) begin
|
||||||
reg [DATAW-1:0] rdata_r;
|
if (write) begin
|
||||||
always @(posedge clk) begin
|
for (integer i = 0; i < WRENW; ++i) begin
|
||||||
if (write) begin
|
if (wren[i])
|
||||||
for (integer i = 0; i < WRENW; ++i) begin
|
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||||
if (wren[i])
|
|
||||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if (read) begin
|
|
||||||
rdata_r <= ram[raddr];
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
assign rdata = rdata_r;
|
|
||||||
end else begin
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (write) begin
|
|
||||||
for (integer i = 0; i < WRENW; ++i) begin
|
|
||||||
if (wren[i])
|
|
||||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
assign rdata = ram[raddr];
|
|
||||||
end
|
end
|
||||||
|
assign rdata_w = ram[raddr];
|
||||||
end else begin
|
end else begin
|
||||||
if (OUT_REG != 0) begin
|
if (NO_RWCHECK != 0) begin
|
||||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||||
reg [DATAW-1:0] rdata_r;
|
|
||||||
`RAM_INITIALIZATION
|
`RAM_INITIALIZATION
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (write) begin
|
if (write) begin
|
||||||
|
@ -172,37 +130,20 @@ module VX_dp_ram #(
|
||||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if (read) begin
|
|
||||||
rdata_r <= ram[raddr];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
assign rdata = rdata_r;
|
assign rdata_w = ram[raddr];
|
||||||
end else begin
|
end else begin
|
||||||
if (NO_RWCHECK != 0) begin
|
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
`RAM_INITIALIZATION
|
||||||
`RAM_INITIALIZATION
|
always @(posedge clk) begin
|
||||||
always @(posedge clk) begin
|
if (write) begin
|
||||||
if (write) begin
|
for (integer i = 0; i < WRENW; ++i) begin
|
||||||
for (integer i = 0; i < WRENW; ++i) begin
|
if (wren[i])
|
||||||
if (wren[i])
|
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
assign rdata = ram[raddr];
|
|
||||||
end else begin
|
|
||||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
|
||||||
`RAM_INITIALIZATION
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (write) begin
|
|
||||||
for (integer i = 0; i < WRENW; ++i) begin
|
|
||||||
if (wren[i])
|
|
||||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
assign rdata = ram[raddr];
|
|
||||||
end
|
end
|
||||||
|
assign rdata_w = ram[raddr];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
@ -211,64 +152,36 @@ module VX_dp_ram #(
|
||||||
if (LUTRAM != 0) begin
|
if (LUTRAM != 0) begin
|
||||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||||
`RAM_INITIALIZATION
|
`RAM_INITIALIZATION
|
||||||
if (OUT_REG != 0) begin
|
always @(posedge clk) begin
|
||||||
reg [DATAW-1:0] rdata_r;
|
if (write) begin
|
||||||
always @(posedge clk) begin
|
ram[waddr] <= wdata;
|
||||||
if (write) begin
|
|
||||||
ram[waddr] <= wdata;
|
|
||||||
end
|
|
||||||
if (read) begin
|
|
||||||
rdata_r <= ram[raddr];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
assign rdata = rdata_r;
|
|
||||||
end else begin
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (write) begin
|
|
||||||
ram[waddr] <= wdata;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
assign rdata = ram[raddr];
|
|
||||||
end
|
end
|
||||||
|
assign rdata_w = ram[raddr];
|
||||||
end else begin
|
end else begin
|
||||||
if (OUT_REG != 0) begin
|
if (NO_RWCHECK != 0) begin
|
||||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||||
reg [DATAW-1:0] rdata_r;
|
|
||||||
`RAM_INITIALIZATION
|
`RAM_INITIALIZATION
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (write) begin
|
if (write) begin
|
||||||
ram[waddr] <= wdata;
|
ram[waddr] <= wdata;
|
||||||
end
|
end
|
||||||
if (read) begin
|
|
||||||
rdata_r <= ram[raddr];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
assign rdata = rdata_r;
|
assign rdata_w = ram[raddr];
|
||||||
end else begin
|
end else begin
|
||||||
if (NO_RWCHECK != 0) begin
|
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
`RAM_INITIALIZATION
|
||||||
`RAM_INITIALIZATION
|
always @(posedge clk) begin
|
||||||
always @(posedge clk) begin
|
if (write) begin
|
||||||
if (write) begin
|
ram[waddr] <= wdata;
|
||||||
ram[waddr] <= wdata;
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
assign rdata = ram[raddr];
|
|
||||||
end else begin
|
|
||||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
|
||||||
`RAM_INITIALIZATION
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (write) begin
|
|
||||||
ram[waddr] <= wdata;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
assign rdata = ram[raddr];
|
|
||||||
end
|
end
|
||||||
|
assign rdata_w = ram[raddr];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`else
|
`else
|
||||||
// RAM emulation
|
// simulation
|
||||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||||
`RAM_INITIALIZATION
|
`RAM_INITIALIZATION
|
||||||
|
|
||||||
|
@ -277,39 +190,57 @@ module VX_dp_ram #(
|
||||||
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
|
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
|
||||||
end
|
end
|
||||||
|
|
||||||
if (OUT_REG != 0) begin
|
reg [DATAW-1:0] prev_data;
|
||||||
reg [DATAW-1:0] rdata_r;
|
reg [ADDRW-1:0] prev_waddr;
|
||||||
always @(posedge clk) begin
|
reg prev_write;
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (RESET_RAM && reset) begin
|
||||||
|
for (integer i = 0; i < SIZE; ++i) begin
|
||||||
|
ram[i] <= DATAW'(INIT_VALUE);
|
||||||
|
end
|
||||||
|
end else begin
|
||||||
if (write) begin
|
if (write) begin
|
||||||
ram[waddr] <= ram_n;
|
ram[waddr] <= ram_n;
|
||||||
end
|
end
|
||||||
if (read) begin
|
|
||||||
rdata_r <= ram[raddr];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
assign rdata = rdata_r;
|
if (reset) begin
|
||||||
end else begin
|
prev_write <= 0;
|
||||||
reg [DATAW-1:0] prev_data;
|
prev_data <= '0;
|
||||||
reg [ADDRW-1:0] prev_waddr;
|
prev_waddr <= '0;
|
||||||
reg prev_write;
|
end else begin
|
||||||
always @(posedge clk) begin
|
prev_write <= write;
|
||||||
if (write) begin
|
|
||||||
ram[waddr] <= ram_n;
|
|
||||||
end
|
|
||||||
prev_write <= (| wren);
|
|
||||||
prev_data <= ram[waddr];
|
prev_data <= ram[waddr];
|
||||||
prev_waddr <= waddr;
|
prev_waddr <= waddr;
|
||||||
end
|
end
|
||||||
if (LUTRAM || !NO_RWCHECK) begin
|
end
|
||||||
`UNUSED_VAR (prev_write)
|
|
||||||
`UNUSED_VAR (prev_data)
|
if (LUTRAM || !NO_RWCHECK) begin
|
||||||
`UNUSED_VAR (prev_waddr)
|
`UNUSED_VAR (prev_write)
|
||||||
assign rdata = ram[raddr];
|
`UNUSED_VAR (prev_data)
|
||||||
end else begin
|
`UNUSED_VAR (prev_waddr)
|
||||||
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
assign rdata_w = ram[raddr];
|
||||||
|
end else begin
|
||||||
|
assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||||
|
if (RW_ASSERT) begin
|
||||||
|
`RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("read after write hazard"));
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
if (OUT_REG != 0) begin
|
||||||
|
reg [DATAW-1:0] rdata_r;
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (READ_ENABLE && reset) begin
|
||||||
|
rdata_r <= '0;
|
||||||
|
end else if (!READ_ENABLE || read) begin
|
||||||
|
rdata_r <= rdata_w;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
assign rdata = rdata_r;
|
||||||
|
end else begin
|
||||||
|
assign rdata = rdata_w;
|
||||||
|
end
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
`TRACING_ON
|
`TRACING_ON
|
||||||
|
|
|
@ -1,115 +0,0 @@
|
||||||
// Copyright © 2019-2023
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
`include "VX_platform.vh"
|
|
||||||
|
|
||||||
`TRACING_OFF
|
|
||||||
module VX_dp_ram_rst #(
|
|
||||||
parameter DATAW = 1,
|
|
||||||
parameter SIZE = 1,
|
|
||||||
parameter ADDR_MIN = 0,
|
|
||||||
parameter WRENW = 1,
|
|
||||||
parameter OUT_REG = 0,
|
|
||||||
parameter NO_RWCHECK = 0,
|
|
||||||
parameter LUTRAM = 0,
|
|
||||||
parameter INIT_ENABLE = 0,
|
|
||||||
parameter INIT_FILE = "",
|
|
||||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
|
||||||
parameter ADDRW = `LOG2UP(SIZE)
|
|
||||||
) (
|
|
||||||
input wire clk,
|
|
||||||
input wire reset,
|
|
||||||
input wire read,
|
|
||||||
input wire write,
|
|
||||||
input wire [WRENW-1:0] wren,
|
|
||||||
input wire [ADDRW-1:0] waddr,
|
|
||||||
input wire [DATAW-1:0] wdata,
|
|
||||||
input wire [ADDRW-1:0] raddr,
|
|
||||||
output wire [DATAW-1:0] rdata
|
|
||||||
);
|
|
||||||
localparam WSELW = DATAW / WRENW;
|
|
||||||
`STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter"))
|
|
||||||
|
|
||||||
`define RAM_INITIALIZATION \
|
|
||||||
if (INIT_ENABLE != 0) begin \
|
|
||||||
if (INIT_FILE != "") begin \
|
|
||||||
initial $readmemh(INIT_FILE, ram); \
|
|
||||||
end else begin \
|
|
||||||
initial \
|
|
||||||
for (integer i = 0; i < SIZE; ++i) \
|
|
||||||
ram[i] = INIT_VALUE; \
|
|
||||||
end \
|
|
||||||
end
|
|
||||||
|
|
||||||
`UNUSED_VAR (read)
|
|
||||||
|
|
||||||
// RAM emulation
|
|
||||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
|
||||||
`RAM_INITIALIZATION
|
|
||||||
|
|
||||||
wire [DATAW-1:0] ram_n;
|
|
||||||
for (genvar i = 0; i < WRENW; ++i) begin
|
|
||||||
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
|
|
||||||
end
|
|
||||||
|
|
||||||
if (OUT_REG != 0) begin
|
|
||||||
reg [DATAW-1:0] rdata_r;
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
for (integer i = 0; i < SIZE; ++i) begin
|
|
||||||
ram[i] <= DATAW'(INIT_VALUE);
|
|
||||||
end
|
|
||||||
rdata_r <= '0;
|
|
||||||
end else begin
|
|
||||||
if (write) begin
|
|
||||||
ram[waddr] <= ram_n;
|
|
||||||
end
|
|
||||||
if (read) begin
|
|
||||||
rdata_r <= ram[raddr];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
assign rdata = rdata_r;
|
|
||||||
end else begin
|
|
||||||
reg [DATAW-1:0] prev_data;
|
|
||||||
reg [ADDRW-1:0] prev_waddr;
|
|
||||||
reg prev_write;
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
for (integer i = 0; i < SIZE; ++i) begin
|
|
||||||
ram[i] <= DATAW'(INIT_VALUE);
|
|
||||||
end
|
|
||||||
prev_write <= 0;
|
|
||||||
prev_data <= '0;
|
|
||||||
prev_waddr <= '0;
|
|
||||||
end else begin
|
|
||||||
if (write) begin
|
|
||||||
ram[waddr] <= ram_n;
|
|
||||||
end
|
|
||||||
prev_write <= (| wren);
|
|
||||||
prev_data <= ram[waddr];
|
|
||||||
prev_waddr <= waddr;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if (LUTRAM || !NO_RWCHECK) begin
|
|
||||||
`UNUSED_VAR (prev_write)
|
|
||||||
`UNUSED_VAR (prev_data)
|
|
||||||
`UNUSED_VAR (prev_waddr)
|
|
||||||
assign rdata = ram[raddr];
|
|
||||||
end else begin
|
|
||||||
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
endmodule
|
|
||||||
`TRACING_ON
|
|
|
@ -18,7 +18,8 @@ module VX_elastic_buffer #(
|
||||||
parameter DATAW = 1,
|
parameter DATAW = 1,
|
||||||
parameter SIZE = 1,
|
parameter SIZE = 1,
|
||||||
parameter OUT_REG = 0,
|
parameter OUT_REG = 0,
|
||||||
parameter LUTRAM = 0
|
parameter LUTRAM = 0,
|
||||||
|
parameter MAX_FANOUT = 0
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
@ -40,6 +41,43 @@ module VX_elastic_buffer #(
|
||||||
assign data_out = data_in;
|
assign data_out = data_in;
|
||||||
assign ready_in = ready_out;
|
assign ready_in = ready_out;
|
||||||
|
|
||||||
|
end else if (MAX_FANOUT != 0 && (DATAW > (MAX_FANOUT + MAX_FANOUT/2))) begin
|
||||||
|
|
||||||
|
localparam NUM_SLICES = `CDIV(DATAW, MAX_FANOUT);
|
||||||
|
localparam N_DATAW = DATAW / NUM_SLICES;
|
||||||
|
|
||||||
|
for (genvar i = 0; i < NUM_SLICES; ++i) begin
|
||||||
|
|
||||||
|
localparam S_DATAW = (i == NUM_SLICES-1) ? (DATAW - i * N_DATAW) : N_DATAW;
|
||||||
|
|
||||||
|
wire valid_out_t, ready_in_t;
|
||||||
|
`UNUSED_VAR (valid_out_t)
|
||||||
|
`UNUSED_VAR (ready_in_t)
|
||||||
|
|
||||||
|
`RESET_RELAY (slice_reset, reset);
|
||||||
|
|
||||||
|
VX_elastic_buffer #(
|
||||||
|
.DATAW (S_DATAW),
|
||||||
|
.SIZE (SIZE),
|
||||||
|
.OUT_REG (OUT_REG),
|
||||||
|
.LUTRAM (LUTRAM)
|
||||||
|
) buffer_slice (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (slice_reset),
|
||||||
|
.valid_in (valid_in),
|
||||||
|
.data_in (data_in[i * N_DATAW +: S_DATAW]),
|
||||||
|
.ready_in (ready_in_t),
|
||||||
|
.valid_out (valid_out_t),
|
||||||
|
.data_out (data_out[i * N_DATAW +: S_DATAW]),
|
||||||
|
.ready_out (ready_out)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (i == 0) begin
|
||||||
|
assign ready_in = ready_in_t;
|
||||||
|
assign valid_out = valid_out_t;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
end else if (SIZE == 1) begin
|
end else if (SIZE == 1) begin
|
||||||
|
|
||||||
VX_pipe_buffer #(
|
VX_pipe_buffer #(
|
||||||
|
@ -103,9 +141,9 @@ module VX_elastic_buffer #(
|
||||||
|
|
||||||
assign ready_in = ~full;
|
assign ready_in = ~full;
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_pipe_buffer #(
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.SIZE ((OUT_REG == 2) ? 1 : 0)
|
.DEPTH ((OUT_REG > 0) ? (OUT_REG-1) : 0)
|
||||||
) out_buf (
|
) out_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
|
@ -38,17 +38,16 @@ module VX_fair_arbiter #(
|
||||||
|
|
||||||
end else begin
|
end else begin
|
||||||
|
|
||||||
reg [NUM_REQS-1:0] grant_mask;
|
reg [NUM_REQS-1:0] requests_r;
|
||||||
|
|
||||||
wire [NUM_REQS-1:0] requests_rem = requests & ~grant_mask;
|
wire [NUM_REQS-1:0] requests_sel = requests_r & requests;
|
||||||
wire rem_valid = (| requests_rem);
|
wire [NUM_REQS-1:0] requests_qual = (| requests_sel) ? requests_sel : requests;
|
||||||
wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_rem : requests;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
grant_mask <= '0;
|
requests_r <= '0;
|
||||||
end else if (grant_ready) begin
|
end else if (grant_ready) begin
|
||||||
grant_mask <= rem_valid ? (grant_mask | grant_onehot) : grant_onehot;
|
requests_r <= requests_qual & ~grant_onehot;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -177,10 +177,11 @@ module VX_fifo_queue #(
|
||||||
.SIZE (DEPTH),
|
.SIZE (DEPTH),
|
||||||
.LUTRAM (LUTRAM)
|
.LUTRAM (LUTRAM)
|
||||||
) dp_ram (
|
) dp_ram (
|
||||||
.clk(clk),
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
.read (1'b1),
|
.read (1'b1),
|
||||||
.write (push),
|
.write (push),
|
||||||
`UNUSED_PIN (wren),
|
.wren (1'b1),
|
||||||
.waddr (wr_ptr_r),
|
.waddr (wr_ptr_r),
|
||||||
.wdata (data_in),
|
.wdata (data_in),
|
||||||
.raddr (rd_ptr_r),
|
.raddr (rd_ptr_r),
|
||||||
|
@ -226,9 +227,10 @@ module VX_fifo_queue #(
|
||||||
.LUTRAM (LUTRAM)
|
.LUTRAM (LUTRAM)
|
||||||
) dp_ram (
|
) dp_ram (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
.read (1'b1),
|
.read (1'b1),
|
||||||
.write (push),
|
.write (push),
|
||||||
`UNUSED_PIN (wren),
|
.wren (1'b1),
|
||||||
.waddr (wr_ptr_r),
|
.waddr (wr_ptr_r),
|
||||||
.wdata (data_in),
|
.wdata (data_in),
|
||||||
.raddr (rd_ptr_n_r),
|
.raddr (rd_ptr_n_r),
|
||||||
|
|
|
@ -38,9 +38,11 @@ module VX_find_first #(
|
||||||
assign d_n[TL+i] = REVERSE ? data_in[N-1-i] : data_in[i];
|
assign d_n[TL+i] = REVERSE ? data_in[N-1-i] : data_in[i];
|
||||||
end
|
end
|
||||||
|
|
||||||
for (genvar i = TL+N; i < TN; ++i) begin
|
if (TL < (TN-N)) begin
|
||||||
assign s_n[i] = 0;
|
for (genvar i = TL+N; i < TN; ++i) begin
|
||||||
assign d_n[i] = '0;
|
assign s_n[i] = 0;
|
||||||
|
assign d_n[i] = '0;
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
for (genvar j = 0; j < LOGN; ++j) begin
|
for (genvar j = 0; j < LOGN; ++j) begin
|
||||||
|
|
|
@ -54,9 +54,10 @@ module VX_index_buffer #(
|
||||||
.LUTRAM (LUTRAM)
|
.LUTRAM (LUTRAM)
|
||||||
) data_table (
|
) data_table (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
.read (1'b1),
|
.read (1'b1),
|
||||||
.write (acquire_en),
|
.write (acquire_en),
|
||||||
`UNUSED_PIN (wren),
|
.wren (1'b1),
|
||||||
.waddr (write_addr),
|
.waddr (write_addr),
|
||||||
.wdata (write_data),
|
.wdata (write_data),
|
||||||
.raddr (read_addr),
|
.raddr (read_addr),
|
||||||
|
|
|
@ -87,16 +87,16 @@ module VX_mem_coalescer #(
|
||||||
localparam STATE_SETUP = 0;
|
localparam STATE_SETUP = 0;
|
||||||
localparam STATE_SEND = 1;
|
localparam STATE_SEND = 1;
|
||||||
|
|
||||||
reg state_r, state_n;
|
logic state_r, state_n;
|
||||||
|
|
||||||
reg out_req_valid_r, out_req_valid_n;
|
logic out_req_valid_r, out_req_valid_n;
|
||||||
reg out_req_rw_r, out_req_rw_n;
|
logic out_req_rw_r, out_req_rw_n;
|
||||||
reg [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
|
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
|
||||||
reg [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
|
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
|
||||||
reg [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
|
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
|
||||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
|
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
|
||||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
|
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
|
||||||
reg [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
|
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
|
||||||
|
|
||||||
reg in_req_ready_n;
|
reg in_req_ready_n;
|
||||||
|
|
||||||
|
@ -135,7 +135,11 @@ module VX_mem_coalescer #(
|
||||||
`UNUSED_PIN (onehot),
|
`UNUSED_PIN (onehot),
|
||||||
.valid_out (batch_valid_n[i])
|
.valid_out (batch_valid_n[i])
|
||||||
);
|
);
|
||||||
assign seed_idx[i] = NUM_REQS_W'(i * DATA_RATIO) + NUM_REQS_W'(batch_idx);
|
if (OUT_REQS > 1) begin
|
||||||
|
assign seed_idx[i] = {(NUM_REQS_W-DATA_RATIO_W)'(i), batch_idx};
|
||||||
|
end else begin
|
||||||
|
assign seed_idx[i] = batch_idx;
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||||
|
@ -149,29 +153,6 @@ module VX_mem_coalescer #(
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
state_r <= STATE_SETUP;
|
|
||||||
processed_mask_r <= '0;
|
|
||||||
out_req_valid_r <= 0;
|
|
||||||
end else begin
|
|
||||||
state_r <= state_n;
|
|
||||||
batch_valid_r <= batch_valid_n;
|
|
||||||
seed_addr_r <= seed_addr_n;
|
|
||||||
seed_atype_r <= seed_atype_n;
|
|
||||||
addr_matches_r <= addr_matches_n;
|
|
||||||
out_req_valid_r <= out_req_valid_n;
|
|
||||||
out_req_mask_r <= out_req_mask_n;
|
|
||||||
out_req_rw_r <= out_req_rw_n;
|
|
||||||
out_req_addr_r <= out_req_addr_n;
|
|
||||||
out_req_atype_r <= out_req_atype_n;
|
|
||||||
out_req_byteen_r <= out_req_byteen_n;
|
|
||||||
out_req_data_r <= out_req_data_n;
|
|
||||||
out_req_tag_r <= out_req_tag_n;
|
|
||||||
processed_mask_r <= processed_mask_n;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r;
|
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r;
|
||||||
|
|
||||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged;
|
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged;
|
||||||
|
@ -248,6 +229,17 @@ module VX_mem_coalescer #(
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
|
VX_pipe_register #(
|
||||||
|
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + ATYPE_WIDTH + OUT_ADDR_WIDTH + ATYPE_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
|
||||||
|
.RESETW (1 + NUM_REQS + 1)
|
||||||
|
) pipe_reg (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.enable (1'b1),
|
||||||
|
.data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_atype_n, out_req_addr_n, out_req_atype_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}),
|
||||||
|
.data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_atype_r, out_req_addr_r, out_req_atype_r, out_req_byteen_r, out_req_data_r, out_req_tag_r})
|
||||||
|
);
|
||||||
|
|
||||||
wire out_rsp_fire = out_rsp_valid && out_rsp_ready;
|
wire out_rsp_fire = out_rsp_valid && out_rsp_ready;
|
||||||
|
|
||||||
wire out_rsp_eop;
|
wire out_rsp_eop;
|
||||||
|
|
|
@ -29,7 +29,7 @@ module VX_onehot_encoder #(
|
||||||
);
|
);
|
||||||
if (N == 1) begin
|
if (N == 1) begin
|
||||||
|
|
||||||
assign data_out = data_in;
|
assign data_out = 0;
|
||||||
assign valid_out = data_in;
|
assign valid_out = data_in;
|
||||||
|
|
||||||
end else if (N == 2) begin
|
end else if (N == 2) begin
|
||||||
|
|
|
@ -17,7 +17,8 @@
|
||||||
module VX_onehot_mux #(
|
module VX_onehot_mux #(
|
||||||
parameter DATAW = 1,
|
parameter DATAW = 1,
|
||||||
parameter N = 1,
|
parameter N = 1,
|
||||||
parameter MODEL = 1
|
parameter MODEL = 1,
|
||||||
|
parameter LUT_OPT = 0
|
||||||
) (
|
) (
|
||||||
input wire [N-1:0][DATAW-1:0] data_in,
|
input wire [N-1:0][DATAW-1:0] data_in,
|
||||||
input wire [N-1:0] sel_in,
|
input wire [N-1:0] sel_in,
|
||||||
|
@ -26,6 +27,90 @@ module VX_onehot_mux #(
|
||||||
if (N == 1) begin
|
if (N == 1) begin
|
||||||
`UNUSED_VAR (sel_in)
|
`UNUSED_VAR (sel_in)
|
||||||
assign data_out = data_in;
|
assign data_out = data_in;
|
||||||
|
end else if (LUT_OPT && N == 2) begin
|
||||||
|
`UNUSED_VAR (sel_in)
|
||||||
|
assign data_out = sel_in[0] ? data_in[0] : data_in[1];
|
||||||
|
end else if (LUT_OPT && N == 3) begin
|
||||||
|
reg [DATAW-1:0] data_out_r;
|
||||||
|
always @(*) begin
|
||||||
|
case (sel_in)
|
||||||
|
3'b001: data_out_r = data_in[0];
|
||||||
|
3'b010: data_out_r = data_in[1];
|
||||||
|
3'b100: data_out_r = data_in[2];
|
||||||
|
default: data_out_r = 'x;
|
||||||
|
endcase
|
||||||
|
end
|
||||||
|
assign data_out = data_out_r;
|
||||||
|
end else if (LUT_OPT && N == 4) begin
|
||||||
|
reg [DATAW-1:0] data_out_r;
|
||||||
|
always @(*) begin
|
||||||
|
case (sel_in)
|
||||||
|
4'b0001: data_out_r = data_in[0];
|
||||||
|
4'b0010: data_out_r = data_in[1];
|
||||||
|
4'b0100: data_out_r = data_in[2];
|
||||||
|
4'b1000: data_out_r = data_in[3];
|
||||||
|
default: data_out_r = 'x;
|
||||||
|
endcase
|
||||||
|
end
|
||||||
|
assign data_out = data_out_r;
|
||||||
|
end else if (LUT_OPT && N == 5) begin
|
||||||
|
reg [DATAW-1:0] data_out_r;
|
||||||
|
always @(*) begin
|
||||||
|
case (sel_in)
|
||||||
|
5'b00001: data_out_r = data_in[0];
|
||||||
|
5'b00010: data_out_r = data_in[1];
|
||||||
|
5'b00100: data_out_r = data_in[2];
|
||||||
|
5'b01000: data_out_r = data_in[3];
|
||||||
|
5'b10000: data_out_r = data_in[4];
|
||||||
|
default: data_out_r = 'x;
|
||||||
|
endcase
|
||||||
|
end
|
||||||
|
assign data_out = data_out_r;
|
||||||
|
end else if (LUT_OPT && N == 6) begin
|
||||||
|
reg [DATAW-1:0] data_out_r;
|
||||||
|
always @(*) begin
|
||||||
|
case (sel_in)
|
||||||
|
6'b000001: data_out_r = data_in[0];
|
||||||
|
6'b000010: data_out_r = data_in[1];
|
||||||
|
6'b000100: data_out_r = data_in[2];
|
||||||
|
6'b001000: data_out_r = data_in[3];
|
||||||
|
6'b010000: data_out_r = data_in[4];
|
||||||
|
6'b100000: data_out_r = data_in[5];
|
||||||
|
default: data_out_r = 'x;
|
||||||
|
endcase
|
||||||
|
end
|
||||||
|
assign data_out = data_out_r;
|
||||||
|
end else if (LUT_OPT && N == 7) begin
|
||||||
|
reg [DATAW-1:0] data_out_r;
|
||||||
|
always @(*) begin
|
||||||
|
case (sel_in)
|
||||||
|
7'b0000001: data_out_r = data_in[0];
|
||||||
|
7'b0000010: data_out_r = data_in[1];
|
||||||
|
7'b0000100: data_out_r = data_in[2];
|
||||||
|
7'b0001000: data_out_r = data_in[3];
|
||||||
|
7'b0010000: data_out_r = data_in[4];
|
||||||
|
7'b0100000: data_out_r = data_in[5];
|
||||||
|
7'b1000000: data_out_r = data_in[6];
|
||||||
|
default: data_out_r = 'x;
|
||||||
|
endcase
|
||||||
|
end
|
||||||
|
assign data_out = data_out_r;
|
||||||
|
end else if (LUT_OPT && N == 8) begin
|
||||||
|
reg [DATAW-1:0] data_out_r;
|
||||||
|
always @(*) begin
|
||||||
|
case (sel_in)
|
||||||
|
8'b00000001: data_out_r = data_in[0];
|
||||||
|
8'b00000010: data_out_r = data_in[1];
|
||||||
|
8'b00000100: data_out_r = data_in[2];
|
||||||
|
8'b00001000: data_out_r = data_in[3];
|
||||||
|
8'b00010000: data_out_r = data_in[4];
|
||||||
|
8'b00100000: data_out_r = data_in[5];
|
||||||
|
8'b01000000: data_out_r = data_in[6];
|
||||||
|
8'b10000000: data_out_r = data_in[7];
|
||||||
|
default: data_out_r = 'x;
|
||||||
|
endcase
|
||||||
|
end
|
||||||
|
assign data_out = data_out_r;
|
||||||
end else if (MODEL == 1) begin
|
end else if (MODEL == 1) begin
|
||||||
wire [N-1:0][DATAW-1:0] mask;
|
wire [N-1:0][DATAW-1:0] mask;
|
||||||
for (genvar i = 0; i < N; ++i) begin
|
for (genvar i = 0; i < N; ++i) begin
|
||||||
|
|
|
@ -21,7 +21,8 @@ module VX_pe_serializer #(
|
||||||
parameter DATA_IN_WIDTH = 1,
|
parameter DATA_IN_WIDTH = 1,
|
||||||
parameter DATA_OUT_WIDTH = 1,
|
parameter DATA_OUT_WIDTH = 1,
|
||||||
parameter TAG_WIDTH = 0,
|
parameter TAG_WIDTH = 0,
|
||||||
parameter PE_REG = 0
|
parameter PE_REG = 0,
|
||||||
|
parameter OUT_BUF = 0
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
@ -43,6 +44,11 @@ module VX_pe_serializer #(
|
||||||
output wire [TAG_WIDTH-1:0] tag_out,
|
output wire [TAG_WIDTH-1:0] tag_out,
|
||||||
input wire ready_out
|
input wire ready_out
|
||||||
);
|
);
|
||||||
|
wire valid_out_u;
|
||||||
|
wire [NUM_LANES-1:0][DATA_OUT_WIDTH-1:0] data_out_u;
|
||||||
|
wire [TAG_WIDTH-1:0] tag_out_u;
|
||||||
|
wire ready_out_u;
|
||||||
|
|
||||||
wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s;
|
wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s;
|
||||||
wire valid_out_s;
|
wire valid_out_s;
|
||||||
wire [TAG_WIDTH-1:0] tag_out_s;
|
wire [TAG_WIDTH-1:0] tag_out_s;
|
||||||
|
@ -105,7 +111,7 @@ module VX_pe_serializer #(
|
||||||
reg [TAG_WIDTH-1:0] tag_out_r;
|
reg [TAG_WIDTH-1:0] tag_out_r;
|
||||||
|
|
||||||
wire valid_out_b = valid_out_s && batch_out_done;
|
wire valid_out_b = valid_out_s && batch_out_done;
|
||||||
wire ready_out_b = ready_out || ~valid_out;
|
wire ready_out_b = ready_out_u || ~valid_out_u;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
|
@ -119,29 +125,42 @@ module VX_pe_serializer #(
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign enable = ready_out_b || ~valid_out_b;
|
assign enable = ready_out_b || ~valid_out_b;
|
||||||
assign ready_in = enable && batch_in_done;
|
assign ready_in = enable && batch_in_done;
|
||||||
|
assign pe_enable = enable;
|
||||||
|
|
||||||
assign pe_enable = enable;
|
assign valid_out_u = valid_out_r;
|
||||||
|
assign data_out_u = data_out_r;
|
||||||
assign valid_out = valid_out_r;
|
assign tag_out_u = tag_out_r;
|
||||||
assign data_out = data_out_r;
|
|
||||||
assign tag_out = tag_out_r;
|
|
||||||
|
|
||||||
end else begin
|
end else begin
|
||||||
|
|
||||||
assign pe_data_in_s = data_in;
|
assign pe_data_in_s = data_in;
|
||||||
|
|
||||||
assign enable = ready_out || ~valid_out;
|
assign enable = ready_out_u || ~valid_out_u;
|
||||||
assign ready_in = enable;
|
assign ready_in = enable;
|
||||||
|
assign pe_enable = enable;
|
||||||
|
|
||||||
assign pe_enable = enable;
|
assign valid_out_u = valid_out_s;
|
||||||
|
assign data_out_u = pe_data_out;
|
||||||
assign valid_out = valid_out_s;
|
assign tag_out_u = tag_out_s;
|
||||||
assign data_out = pe_data_out;
|
|
||||||
assign tag_out = tag_out_s;
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
VX_elastic_buffer #(
|
||||||
|
.DATAW (NUM_LANES * DATA_OUT_WIDTH + TAG_WIDTH),
|
||||||
|
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||||
|
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||||
|
) out_buf (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.valid_in (valid_out_u),
|
||||||
|
.ready_in (ready_out_u),
|
||||||
|
.data_in ({data_out_u, tag_out_u}),
|
||||||
|
.data_out ({data_out, tag_out}),
|
||||||
|
.valid_out (valid_out),
|
||||||
|
.ready_out (ready_out)
|
||||||
|
);
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
`TRACING_ON
|
`TRACING_ON
|
||||||
|
|
|
@ -24,8 +24,8 @@
|
||||||
|
|
||||||
`TRACING_OFF
|
`TRACING_OFF
|
||||||
module VX_pipe_buffer #(
|
module VX_pipe_buffer #(
|
||||||
parameter DATAW = 1,
|
parameter DATAW = 1,
|
||||||
parameter PASSTHRU = 0
|
parameter DEPTH = 1
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
@ -36,27 +36,41 @@ module VX_pipe_buffer #(
|
||||||
input wire ready_out,
|
input wire ready_out,
|
||||||
output wire valid_out
|
output wire valid_out
|
||||||
);
|
);
|
||||||
if (PASSTHRU != 0) begin
|
if (DEPTH == 0) begin
|
||||||
`UNUSED_VAR (clk)
|
`UNUSED_VAR (clk)
|
||||||
`UNUSED_VAR (reset)
|
`UNUSED_VAR (reset)
|
||||||
assign ready_in = ready_out;
|
assign ready_in = ready_out;
|
||||||
assign valid_out = valid_in;
|
assign valid_out = valid_in;
|
||||||
assign data_out = data_in;
|
assign data_out = data_in;
|
||||||
end else begin
|
end else begin
|
||||||
wire stall = valid_out && ~ready_out;
|
wire [DEPTH:0] valid;
|
||||||
|
`IGNORE_UNOPTFLAT_BEGIN
|
||||||
|
wire [DEPTH:0] ready;
|
||||||
|
`IGNORE_UNOPTFLAT_END
|
||||||
|
wire [DEPTH:0][DATAW-1:0] data;
|
||||||
|
|
||||||
VX_pipe_register #(
|
assign valid[0] = valid_in;
|
||||||
.DATAW (1 + DATAW),
|
assign data[0] = data_in;
|
||||||
.RESETW (1)
|
assign ready_in = ready[0];
|
||||||
) pipe_register (
|
|
||||||
.clk (clk),
|
for (genvar i = 0; i < DEPTH; ++i) begin
|
||||||
.reset (reset),
|
assign ready[i] = (ready[i+1] || ~valid[i+1]);
|
||||||
.enable (~stall),
|
VX_pipe_register #(
|
||||||
.data_in ({valid_in, data_in}),
|
.DATAW (1 + DATAW),
|
||||||
.data_out ({valid_out, data_out})
|
.RESETW (1)
|
||||||
);
|
) pipe_register (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.enable (ready[i]),
|
||||||
|
.data_in ({valid[i], data[i]}),
|
||||||
|
.data_out ({valid[i+1], data[i+1]})
|
||||||
|
);
|
||||||
|
end
|
||||||
|
|
||||||
|
assign valid_out = valid[DEPTH];
|
||||||
|
assign data_out = data[DEPTH];
|
||||||
|
assign ready[DEPTH] = ready_out;
|
||||||
|
|
||||||
assign ready_in = ~stall;
|
|
||||||
end
|
end
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|
|
@ -17,7 +17,8 @@
|
||||||
module VX_pipe_register #(
|
module VX_pipe_register #(
|
||||||
parameter DATAW = 1,
|
parameter DATAW = 1,
|
||||||
parameter RESETW = 0,
|
parameter RESETW = 0,
|
||||||
parameter DEPTH = 1
|
parameter DEPTH = 1,
|
||||||
|
parameter MAX_FANOUT = 0
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
@ -31,45 +32,67 @@ module VX_pipe_register #(
|
||||||
`UNUSED_VAR (enable)
|
`UNUSED_VAR (enable)
|
||||||
assign data_out = data_in;
|
assign data_out = data_in;
|
||||||
end else if (DEPTH == 1) begin
|
end else if (DEPTH == 1) begin
|
||||||
if (RESETW == 0) begin
|
if (MAX_FANOUT != 0 && (DATAW > (MAX_FANOUT + MAX_FANOUT/2))) begin
|
||||||
`UNUSED_VAR (reset)
|
localparam NUM_SLICES = `CDIV(DATAW, MAX_FANOUT);
|
||||||
reg [DATAW-1:0] value;
|
localparam N_DATAW = DATAW / NUM_SLICES;
|
||||||
|
for (genvar i = 0; i < NUM_SLICES; ++i) begin
|
||||||
always @(posedge clk) begin
|
localparam SLICE_START = i * N_DATAW;
|
||||||
if (enable) begin
|
localparam SLICE_END = SLICE_START + S_DATAW - 1;
|
||||||
value <= data_in;
|
localparam S_DATAW = (i == NUM_SLICES-1) ? (DATAW - SLICE_START) : N_DATAW;
|
||||||
end
|
localparam S_RESETW = (SLICE_END >= (DATAW - RESETW)) ?
|
||||||
|
((SLICE_START >= (DATAW - RESETW)) ? S_DATAW : (SLICE_END - (DATAW - RESETW) + 1)) : 0;
|
||||||
|
VX_pipe_register #(
|
||||||
|
.DATAW (S_DATAW),
|
||||||
|
.RESETW (S_RESETW)
|
||||||
|
) pipe_register_slice (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.enable (enable),
|
||||||
|
.data_in (data_in[i * N_DATAW +: S_DATAW]),
|
||||||
|
.data_out (data_out[i * N_DATAW +: S_DATAW])
|
||||||
|
);
|
||||||
end
|
end
|
||||||
assign data_out = value;
|
|
||||||
end else if (RESETW == DATAW) begin
|
|
||||||
reg [DATAW-1:0] value;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
value <= RESETW'(0);
|
|
||||||
end else if (enable) begin
|
|
||||||
value <= data_in;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
assign data_out = value;
|
|
||||||
end else begin
|
end else begin
|
||||||
reg [DATAW-RESETW-1:0] value_d;
|
if (RESETW == 0) begin
|
||||||
reg [RESETW-1:0] value_r;
|
`UNUSED_VAR (reset)
|
||||||
|
reg [DATAW-1:0] value;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (enable) begin
|
||||||
value_r <= RESETW'(0);
|
value <= data_in;
|
||||||
end else if (enable) begin
|
end
|
||||||
value_r <= data_in[DATAW-1:DATAW-RESETW];
|
|
||||||
end
|
end
|
||||||
end
|
assign data_out = value;
|
||||||
|
end else if (RESETW == DATAW) begin
|
||||||
|
reg [DATAW-1:0] value;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (enable) begin
|
if (reset) begin
|
||||||
value_d <= data_in[DATAW-RESETW-1:0];
|
value <= RESETW'(0);
|
||||||
|
end else if (enable) begin
|
||||||
|
value <= data_in;
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
assign data_out = value;
|
||||||
|
end else begin
|
||||||
|
reg [DATAW-RESETW-1:0] value_d;
|
||||||
|
reg [RESETW-1:0] value_r;
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset) begin
|
||||||
|
value_r <= RESETW'(0);
|
||||||
|
end else if (enable) begin
|
||||||
|
value_r <= data_in[DATAW-1:DATAW-RESETW];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (enable) begin
|
||||||
|
value_d <= data_in[DATAW-RESETW-1:0];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
assign data_out = {value_r, value_d};
|
||||||
end
|
end
|
||||||
assign data_out = {value_r, value_d};
|
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
wire [DEPTH:0][DATAW-1:0] data_delayed;
|
wire [DEPTH:0][DATAW-1:0] data_delayed;
|
||||||
|
|
|
@ -22,7 +22,7 @@ module VX_reset_relay #(
|
||||||
input wire reset,
|
input wire reset,
|
||||||
output wire [N-1:0] reset_o
|
output wire [N-1:0] reset_o
|
||||||
);
|
);
|
||||||
if (MAX_FANOUT >= 0 && N > MAX_FANOUT) begin
|
if (MAX_FANOUT >= 0 && N > (MAX_FANOUT + MAX_FANOUT/2)) begin
|
||||||
localparam F = `UP(MAX_FANOUT);
|
localparam F = `UP(MAX_FANOUT);
|
||||||
localparam R = N / F;
|
localparam R = N / F;
|
||||||
`PRESERVE_NET reg [R-1:0] reset_r;
|
`PRESERVE_NET reg [R-1:0] reset_r;
|
||||||
|
|
|
@ -15,9 +15,10 @@
|
||||||
|
|
||||||
`TRACING_OFF
|
`TRACING_OFF
|
||||||
module VX_rr_arbiter #(
|
module VX_rr_arbiter #(
|
||||||
parameter NUM_REQS = 1,
|
parameter NUM_REQS = 1,
|
||||||
parameter MODEL = 1,
|
parameter MODEL = 1,
|
||||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS),
|
||||||
|
parameter LUT_OPT = 0
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
@ -37,7 +38,7 @@ module VX_rr_arbiter #(
|
||||||
assign grant_onehot = requests;
|
assign grant_onehot = requests;
|
||||||
assign grant_valid = requests[0];
|
assign grant_valid = requests[0];
|
||||||
|
|
||||||
end else if (NUM_REQS == 2) begin
|
end else if (LUT_OPT && NUM_REQS == 2) begin
|
||||||
|
|
||||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||||
|
@ -63,7 +64,7 @@ module VX_rr_arbiter #(
|
||||||
assign grant_onehot = grant_onehot_r;
|
assign grant_onehot = grant_onehot_r;
|
||||||
assign grant_valid = (| requests);
|
assign grant_valid = (| requests);
|
||||||
|
|
||||||
end /*else if (NUM_REQS == 3) begin
|
end else if (LUT_OPT && NUM_REQS == 3) begin
|
||||||
|
|
||||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||||
|
@ -93,7 +94,7 @@ module VX_rr_arbiter #(
|
||||||
assign grant_onehot = grant_onehot_r;
|
assign grant_onehot = grant_onehot_r;
|
||||||
assign grant_valid = (| requests);
|
assign grant_valid = (| requests);
|
||||||
|
|
||||||
end */else if (NUM_REQS == 4) begin
|
end else if (LUT_OPT && NUM_REQS == 4) begin
|
||||||
|
|
||||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||||
|
@ -129,7 +130,7 @@ module VX_rr_arbiter #(
|
||||||
assign grant_onehot = grant_onehot_r;
|
assign grant_onehot = grant_onehot_r;
|
||||||
assign grant_valid = (| requests);
|
assign grant_valid = (| requests);
|
||||||
|
|
||||||
end /*else if (NUM_REQS == 5) begin
|
end else if (LUT_OPT && NUM_REQS == 5) begin
|
||||||
|
|
||||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||||
|
@ -173,7 +174,7 @@ module VX_rr_arbiter #(
|
||||||
assign grant_onehot = grant_onehot_r;
|
assign grant_onehot = grant_onehot_r;
|
||||||
assign grant_valid = (| requests);
|
assign grant_valid = (| requests);
|
||||||
|
|
||||||
end else if (NUM_REQS == 6) begin
|
end else if (LUT_OPT && NUM_REQS == 6) begin
|
||||||
|
|
||||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||||
|
@ -227,7 +228,7 @@ module VX_rr_arbiter #(
|
||||||
assign grant_onehot = grant_onehot_r;
|
assign grant_onehot = grant_onehot_r;
|
||||||
assign grant_valid = (| requests);
|
assign grant_valid = (| requests);
|
||||||
|
|
||||||
end else if (NUM_REQS == 7) begin
|
end else if (LUT_OPT && NUM_REQS == 7) begin
|
||||||
|
|
||||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||||
|
@ -293,7 +294,7 @@ module VX_rr_arbiter #(
|
||||||
assign grant_onehot = grant_onehot_r;
|
assign grant_onehot = grant_onehot_r;
|
||||||
assign grant_valid = (| requests);
|
assign grant_valid = (| requests);
|
||||||
|
|
||||||
end */else if (NUM_REQS == 8) begin
|
end else if (LUT_OPT && NUM_REQS == 8) begin
|
||||||
|
|
||||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||||
|
|
|
@ -21,13 +21,16 @@ module VX_sp_ram #(
|
||||||
parameter WRENW = 1,
|
parameter WRENW = 1,
|
||||||
parameter OUT_REG = 0,
|
parameter OUT_REG = 0,
|
||||||
parameter NO_RWCHECK = 0,
|
parameter NO_RWCHECK = 0,
|
||||||
|
parameter RW_ASSERT = 0,
|
||||||
parameter LUTRAM = 0,
|
parameter LUTRAM = 0,
|
||||||
|
parameter RESET_RAM = 0,
|
||||||
parameter INIT_ENABLE = 0,
|
parameter INIT_ENABLE = 0,
|
||||||
parameter INIT_FILE = "",
|
parameter INIT_FILE = "",
|
||||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||||
parameter ADDRW = `LOG2UP(SIZE)
|
parameter ADDRW = `LOG2UP(SIZE)
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
input wire read,
|
input wire read,
|
||||||
input wire write,
|
input wire write,
|
||||||
input wire [WRENW-1:0] wren,
|
input wire [WRENW-1:0] wren,
|
||||||
|
@ -42,13 +45,16 @@ module VX_sp_ram #(
|
||||||
.WRENW (WRENW),
|
.WRENW (WRENW),
|
||||||
.OUT_REG (OUT_REG),
|
.OUT_REG (OUT_REG),
|
||||||
.NO_RWCHECK (NO_RWCHECK),
|
.NO_RWCHECK (NO_RWCHECK),
|
||||||
|
.RW_ASSERT (RW_ASSERT),
|
||||||
.LUTRAM (LUTRAM),
|
.LUTRAM (LUTRAM),
|
||||||
|
.RESET_RAM (RESET_RAM),
|
||||||
.INIT_ENABLE (INIT_ENABLE),
|
.INIT_ENABLE (INIT_ENABLE),
|
||||||
.INIT_FILE (INIT_FILE),
|
.INIT_FILE (INIT_FILE),
|
||||||
.INIT_VALUE (INIT_VALUE),
|
.INIT_VALUE (INIT_VALUE),
|
||||||
.ADDRW (ADDRW)
|
.ADDRW (ADDRW)
|
||||||
) dp_ram (
|
) dp_ram (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
.read (read),
|
.read (read),
|
||||||
.write (write),
|
.write (write),
|
||||||
.wren (wren),
|
.wren (wren),
|
||||||
|
|
|
@ -18,7 +18,7 @@ module VX_stream_arb #(
|
||||||
parameter NUM_INPUTS = 1,
|
parameter NUM_INPUTS = 1,
|
||||||
parameter NUM_OUTPUTS = 1,
|
parameter NUM_OUTPUTS = 1,
|
||||||
parameter DATAW = 1,
|
parameter DATAW = 1,
|
||||||
parameter `STRING ARBITER = "P",
|
parameter `STRING ARBITER = "R",
|
||||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||||
parameter OUT_BUF = 0,
|
parameter OUT_BUF = 0,
|
||||||
parameter LUTRAM = 0,
|
parameter LUTRAM = 0,
|
||||||
|
@ -46,14 +46,14 @@ module VX_stream_arb #(
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||||
|
|
||||||
localparam BATCH_BEGIN = i * NUM_REQS;
|
localparam SLICE_BEGIN = i * NUM_REQS;
|
||||||
localparam BATCH_END = `MIN(BATCH_BEGIN + NUM_REQS, NUM_INPUTS);
|
localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_INPUTS);
|
||||||
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
|
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
|
||||||
|
|
||||||
`RESET_RELAY (slice_reset, reset);
|
`RESET_RELAY (slice_reset, reset);
|
||||||
|
|
||||||
VX_stream_arb #(
|
VX_stream_arb #(
|
||||||
.NUM_INPUTS (BATCH_SIZE),
|
.NUM_INPUTS (SLICE_SIZE),
|
||||||
.NUM_OUTPUTS (1),
|
.NUM_OUTPUTS (1),
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
|
@ -63,9 +63,9 @@ module VX_stream_arb #(
|
||||||
) arb_slice (
|
) arb_slice (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (slice_reset),
|
.reset (slice_reset),
|
||||||
.valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]),
|
.valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]),
|
||||||
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]),
|
.ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]),
|
||||||
.data_in (data_in[BATCH_END-1: BATCH_BEGIN]),
|
.data_in (data_in[SLICE_END-1: SLICE_BEGIN]),
|
||||||
.data_out (data_out[i]),
|
.data_out (data_out[i]),
|
||||||
.sel_out (sel_out[i]),
|
.sel_out (sel_out[i]),
|
||||||
.valid_out (valid_out[i]),
|
.valid_out (valid_out[i]),
|
||||||
|
@ -73,32 +73,32 @@ module VX_stream_arb #(
|
||||||
);
|
);
|
||||||
end
|
end
|
||||||
|
|
||||||
end else if (MAX_FANOUT != 0 && (NUM_INPUTS > MAX_FANOUT)) begin
|
end else if (MAX_FANOUT != 0 && (NUM_INPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin
|
||||||
|
|
||||||
// (#inputs > max_fanout) and (#outputs == 1)
|
// (#inputs > max_fanout) and (#outputs == 1)
|
||||||
|
|
||||||
localparam NUM_BATCHES = `CDIV(NUM_INPUTS, MAX_FANOUT);
|
localparam NUM_SLICES = `CDIV(NUM_INPUTS, MAX_FANOUT);
|
||||||
localparam LOG_NUM_REQS2 = `CLOG2(MAX_FANOUT);
|
localparam LOG_NUM_REQS2 = `CLOG2(MAX_FANOUT);
|
||||||
localparam LOG_NUM_REQS3 = `CLOG2(NUM_BATCHES);
|
localparam LOG_NUM_REQS3 = `CLOG2(NUM_SLICES);
|
||||||
|
|
||||||
wire [NUM_BATCHES-1:0] valid_tmp;
|
wire [NUM_SLICES-1:0] valid_tmp;
|
||||||
wire [NUM_BATCHES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp;
|
wire [NUM_SLICES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp;
|
||||||
wire [NUM_BATCHES-1:0] ready_tmp;
|
wire [NUM_SLICES-1:0] ready_tmp;
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
|
for (genvar i = 0; i < NUM_SLICES; ++i) begin
|
||||||
|
|
||||||
localparam BATCH_BEGIN = i * MAX_FANOUT;
|
localparam SLICE_BEGIN = i * MAX_FANOUT;
|
||||||
localparam BATCH_END = `MIN(BATCH_BEGIN + MAX_FANOUT, NUM_INPUTS);
|
localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_INPUTS);
|
||||||
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
|
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
|
||||||
|
|
||||||
wire [DATAW-1:0] data_tmp_u;
|
wire [DATAW-1:0] data_tmp_u;
|
||||||
wire [`LOG2UP(BATCH_SIZE)-1:0] sel_tmp_u;
|
wire [`LOG2UP(SLICE_SIZE)-1:0] sel_tmp_u;
|
||||||
|
|
||||||
`RESET_RELAY (slice_reset, reset);
|
`RESET_RELAY (slice_reset, reset);
|
||||||
|
|
||||||
if (MAX_FANOUT != 1) begin
|
if (MAX_FANOUT != 1) begin
|
||||||
VX_stream_arb #(
|
VX_stream_arb #(
|
||||||
.NUM_INPUTS (BATCH_SIZE),
|
.NUM_INPUTS (SLICE_SIZE),
|
||||||
.NUM_OUTPUTS (1),
|
.NUM_OUTPUTS (1),
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
|
@ -108,9 +108,9 @@ module VX_stream_arb #(
|
||||||
) fanout_slice_arb (
|
) fanout_slice_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (slice_reset),
|
.reset (slice_reset),
|
||||||
.valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]),
|
.valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]),
|
||||||
.data_in (data_in[BATCH_END-1: BATCH_BEGIN]),
|
.data_in (data_in[SLICE_END-1: SLICE_BEGIN]),
|
||||||
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]),
|
.ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]),
|
||||||
.valid_out (valid_tmp[i]),
|
.valid_out (valid_tmp[i]),
|
||||||
.data_out (data_tmp_u),
|
.data_out (data_tmp_u),
|
||||||
.sel_out (sel_tmp_u),
|
.sel_out (sel_tmp_u),
|
||||||
|
@ -125,7 +125,7 @@ module VX_stream_arb #(
|
||||||
wire [LOG_NUM_REQS3-1:0] sel_out_u;
|
wire [LOG_NUM_REQS3-1:0] sel_out_u;
|
||||||
|
|
||||||
VX_stream_arb #(
|
VX_stream_arb #(
|
||||||
.NUM_INPUTS (NUM_BATCHES),
|
.NUM_INPUTS (NUM_SLICES),
|
||||||
.NUM_OUTPUTS (1),
|
.NUM_OUTPUTS (1),
|
||||||
.DATAW (DATAW + LOG_NUM_REQS2),
|
.DATAW (DATAW + LOG_NUM_REQS2),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
|
@ -174,17 +174,9 @@ module VX_stream_arb #(
|
||||||
);
|
);
|
||||||
|
|
||||||
assign valid_in_r = arb_valid;
|
assign valid_in_r = arb_valid;
|
||||||
|
assign data_in_r = data_in[arb_index];
|
||||||
assign arb_ready = ready_in_r;
|
assign arb_ready = ready_in_r;
|
||||||
|
|
||||||
VX_onehot_mux #(
|
|
||||||
.DATAW (DATAW),
|
|
||||||
.N (NUM_REQS)
|
|
||||||
) onehot_mux (
|
|
||||||
.data_in (data_in),
|
|
||||||
.sel_in (arb_onehot),
|
|
||||||
.data_out (data_in_r)
|
|
||||||
);
|
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||||
assign ready_in[i] = ready_in_r && arb_onehot[i];
|
assign ready_in[i] = ready_in_r && arb_onehot[i];
|
||||||
end
|
end
|
||||||
|
@ -214,15 +206,15 @@ module VX_stream_arb #(
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
|
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
|
||||||
|
|
||||||
localparam BATCH_BEGIN = i * NUM_REQS;
|
localparam SLICE_BEGIN = i * NUM_REQS;
|
||||||
localparam BATCH_END = `MIN(BATCH_BEGIN + NUM_REQS, NUM_OUTPUTS);
|
localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_OUTPUTS);
|
||||||
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
|
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
|
||||||
|
|
||||||
`RESET_RELAY (slice_reset, reset);
|
`RESET_RELAY (slice_reset, reset);
|
||||||
|
|
||||||
VX_stream_arb #(
|
VX_stream_arb #(
|
||||||
.NUM_INPUTS (1),
|
.NUM_INPUTS (1),
|
||||||
.NUM_OUTPUTS (BATCH_SIZE),
|
.NUM_OUTPUTS (SLICE_SIZE),
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
.MAX_FANOUT (MAX_FANOUT),
|
.MAX_FANOUT (MAX_FANOUT),
|
||||||
|
@ -234,30 +226,30 @@ module VX_stream_arb #(
|
||||||
.valid_in (valid_in[i]),
|
.valid_in (valid_in[i]),
|
||||||
.ready_in (ready_in[i]),
|
.ready_in (ready_in[i]),
|
||||||
.data_in (data_in[i]),
|
.data_in (data_in[i]),
|
||||||
.data_out (data_out[BATCH_END-1: BATCH_BEGIN]),
|
.data_out (data_out[SLICE_END-1: SLICE_BEGIN]),
|
||||||
.valid_out (valid_out[BATCH_END-1: BATCH_BEGIN]),
|
.valid_out (valid_out[SLICE_END-1: SLICE_BEGIN]),
|
||||||
.ready_out (ready_out[BATCH_END-1: BATCH_BEGIN]),
|
.ready_out (ready_out[SLICE_END-1: SLICE_BEGIN]),
|
||||||
`UNUSED_PIN (sel_out)
|
`UNUSED_PIN (sel_out)
|
||||||
);
|
);
|
||||||
|
|
||||||
for (genvar j = BATCH_BEGIN; j < BATCH_END; ++j) begin
|
for (genvar j = SLICE_BEGIN; j < SLICE_END; ++j) begin
|
||||||
assign sel_out[j] = i;
|
assign sel_out[j] = i;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > MAX_FANOUT)) begin
|
end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin
|
||||||
|
|
||||||
// (#inputs == 1) and (#outputs > max_fanout)
|
// (#inputs == 1) and (#outputs > max_fanout)
|
||||||
|
|
||||||
localparam NUM_BATCHES = `CDIV(NUM_OUTPUTS, MAX_FANOUT);
|
localparam NUM_SLICES = `CDIV(NUM_OUTPUTS, MAX_FANOUT);
|
||||||
|
|
||||||
wire [NUM_BATCHES-1:0] valid_tmp;
|
wire [NUM_SLICES-1:0] valid_tmp;
|
||||||
wire [NUM_BATCHES-1:0][DATAW-1:0] data_tmp;
|
wire [NUM_SLICES-1:0][DATAW-1:0] data_tmp;
|
||||||
wire [NUM_BATCHES-1:0] ready_tmp;
|
wire [NUM_SLICES-1:0] ready_tmp;
|
||||||
|
|
||||||
VX_stream_arb #(
|
VX_stream_arb #(
|
||||||
.NUM_INPUTS (1),
|
.NUM_INPUTS (1),
|
||||||
.NUM_OUTPUTS (NUM_BATCHES),
|
.NUM_OUTPUTS (NUM_SLICES),
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
.MAX_FANOUT (MAX_FANOUT),
|
.MAX_FANOUT (MAX_FANOUT),
|
||||||
|
@ -275,17 +267,17 @@ module VX_stream_arb #(
|
||||||
`UNUSED_PIN (sel_out)
|
`UNUSED_PIN (sel_out)
|
||||||
);
|
);
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
|
for (genvar i = 0; i < NUM_SLICES; ++i) begin
|
||||||
|
|
||||||
localparam BATCH_BEGIN = i * MAX_FANOUT;
|
localparam SLICE_BEGIN = i * MAX_FANOUT;
|
||||||
localparam BATCH_END = `MIN(BATCH_BEGIN + MAX_FANOUT, NUM_OUTPUTS);
|
localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_OUTPUTS);
|
||||||
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
|
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
|
||||||
|
|
||||||
`RESET_RELAY (slice_reset, reset);
|
`RESET_RELAY (slice_reset, reset);
|
||||||
|
|
||||||
VX_stream_arb #(
|
VX_stream_arb #(
|
||||||
.NUM_INPUTS (1),
|
.NUM_INPUTS (1),
|
||||||
.NUM_OUTPUTS (BATCH_SIZE),
|
.NUM_OUTPUTS (SLICE_SIZE),
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
.MAX_FANOUT (MAX_FANOUT),
|
.MAX_FANOUT (MAX_FANOUT),
|
||||||
|
@ -297,9 +289,9 @@ module VX_stream_arb #(
|
||||||
.valid_in (valid_tmp[i]),
|
.valid_in (valid_tmp[i]),
|
||||||
.ready_in (ready_tmp[i]),
|
.ready_in (ready_tmp[i]),
|
||||||
.data_in (data_tmp[i]),
|
.data_in (data_tmp[i]),
|
||||||
.data_out (data_out[BATCH_END-1: BATCH_BEGIN]),
|
.data_out (data_out[SLICE_END-1: SLICE_BEGIN]),
|
||||||
.valid_out (valid_out[BATCH_END-1: BATCH_BEGIN]),
|
.valid_out (valid_out[SLICE_END-1: SLICE_BEGIN]),
|
||||||
.ready_out (ready_out[BATCH_END-1: BATCH_BEGIN]),
|
.ready_out (ready_out[SLICE_END-1: SLICE_BEGIN]),
|
||||||
`UNUSED_PIN (sel_out)
|
`UNUSED_PIN (sel_out)
|
||||||
);
|
);
|
||||||
end
|
end
|
||||||
|
@ -357,9 +349,9 @@ module VX_stream_arb #(
|
||||||
|
|
||||||
// #Inputs == #Outputs
|
// #Inputs == #Outputs
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
|
||||||
|
|
||||||
`RESET_RELAY_EN (out_buf_reset, reset, (NUM_OUTPUTS > 1));
|
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
|
@ -368,7 +360,7 @@ module VX_stream_arb #(
|
||||||
.LUTRAM (LUTRAM)
|
.LUTRAM (LUTRAM)
|
||||||
) out_buf (
|
) out_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (out_buf_reset),
|
.reset (out_buf_reset[i]),
|
||||||
.valid_in (valid_in[i]),
|
.valid_in (valid_in[i]),
|
||||||
.ready_in (ready_in[i]),
|
.ready_in (ready_in[i]),
|
||||||
.data_in (data_in[i]),
|
.data_in (data_in[i]),
|
||||||
|
|
|
@ -39,8 +39,9 @@ module VX_stream_pack #(
|
||||||
input wire ready_out
|
input wire ready_out
|
||||||
);
|
);
|
||||||
if (NUM_REQS > 1) begin
|
if (NUM_REQS > 1) begin
|
||||||
|
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS);
|
||||||
|
|
||||||
wire [NUM_REQS-1:0] grant_onehot;
|
wire [LOG_NUM_REQS-1:0] grant_index;
|
||||||
wire grant_valid;
|
wire grant_valid;
|
||||||
wire grant_ready;
|
wire grant_ready;
|
||||||
|
|
||||||
|
@ -52,21 +53,12 @@ module VX_stream_pack #(
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.requests (valid_in),
|
.requests (valid_in),
|
||||||
.grant_valid (grant_valid),
|
.grant_valid (grant_valid),
|
||||||
`UNUSED_PIN (grant_index),
|
.grant_index (grant_index),
|
||||||
.grant_onehot(grant_onehot),
|
`UNUSED_PIN (grant_onehot),
|
||||||
.grant_ready (grant_ready)
|
.grant_ready (grant_ready)
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [TAG_WIDTH-1:0] tag_sel;
|
wire [TAG_WIDTH-1:0] tag_sel = tag_in[grant_index];
|
||||||
|
|
||||||
VX_onehot_mux #(
|
|
||||||
.DATAW (TAG_WIDTH),
|
|
||||||
.N (NUM_REQS)
|
|
||||||
) onehot_mux (
|
|
||||||
.data_in (tag_in),
|
|
||||||
.sel_in (grant_onehot),
|
|
||||||
.data_out (tag_sel)
|
|
||||||
);
|
|
||||||
|
|
||||||
wire [NUM_REQS-1:0] tag_matches;
|
wire [NUM_REQS-1:0] tag_matches;
|
||||||
|
|
||||||
|
|
|
@ -72,17 +72,16 @@ module VX_stream_switch #(
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||||
|
|
||||||
`RESET_RELAY_EN (out_buf_reset, reset, (NUM_OUTPUTS > 1));
|
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||||
) out_buf (
|
) out_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (out_buf_reset),
|
.reset (out_buf_reset[i]),
|
||||||
.valid_in (valid_out_r[i]),
|
.valid_in (valid_out_r[i]),
|
||||||
.ready_in (ready_out_r[i]),
|
.ready_in (ready_out_r[i]),
|
||||||
.data_in (data_out_r[i]),
|
.data_in (data_out_r[i]),
|
||||||
|
@ -104,20 +103,19 @@ module VX_stream_switch #(
|
||||||
assign ready_in[i] = ready_out_r[i][sel_in[i]];
|
assign ready_in[i] = ready_out_r[i][sel_in[i]];
|
||||||
end
|
end
|
||||||
|
|
||||||
|
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
|
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
|
||||||
for (genvar j = 0; j < NUM_REQS; ++j) begin
|
for (genvar j = 0; j < NUM_REQS; ++j) begin
|
||||||
localparam ii = i * NUM_REQS + j;
|
localparam ii = i * NUM_REQS + j;
|
||||||
if (ii < NUM_OUTPUTS) begin
|
if (ii < NUM_OUTPUTS) begin
|
||||||
|
|
||||||
`RESET_RELAY (out_buf_reset, reset);
|
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||||
) out_buf (
|
) out_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (out_buf_reset),
|
.reset (out_buf_reset[ii]),
|
||||||
.valid_in (valid_out_r[i][j]),
|
.valid_in (valid_out_r[i][j]),
|
||||||
.ready_in (ready_out_r[i][j]),
|
.ready_in (ready_out_r[i][j]),
|
||||||
.data_in (data_in[i]),
|
.data_in (data_in[i]),
|
||||||
|
@ -126,6 +124,7 @@ module VX_stream_switch #(
|
||||||
.ready_out (ready_out[ii])
|
.ready_out (ready_out[ii])
|
||||||
);
|
);
|
||||||
end else begin
|
end else begin
|
||||||
|
`UNUSED_VAR (out_buf_reset[ii])
|
||||||
`UNUSED_VAR (valid_out_r[i][j])
|
`UNUSED_VAR (valid_out_r[i][j])
|
||||||
assign ready_out_r[i][j] = '0;
|
assign ready_out_r[i][j] = '0;
|
||||||
end
|
end
|
||||||
|
@ -138,17 +137,16 @@ module VX_stream_switch #(
|
||||||
|
|
||||||
`UNUSED_VAR (sel_in)
|
`UNUSED_VAR (sel_in)
|
||||||
|
|
||||||
|
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||||
|
|
||||||
`RESET_RELAY_EN (out_buf_reset, reset, (NUM_OUTPUTS > 1));
|
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||||
) out_buf (
|
) out_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (out_buf_reset),
|
.reset (out_buf_reset[i]),
|
||||||
.valid_in (valid_in[i]),
|
.valid_in (valid_in[i]),
|
||||||
.ready_in (ready_in[i]),
|
.ready_in (ready_in[i]),
|
||||||
.data_in (data_in[i]),
|
.data_in (data_in[i]),
|
||||||
|
|
|
@ -20,7 +20,7 @@ module VX_stream_xbar #(
|
||||||
parameter DATAW = 4,
|
parameter DATAW = 4,
|
||||||
parameter IN_WIDTH = `LOG2UP(NUM_INPUTS),
|
parameter IN_WIDTH = `LOG2UP(NUM_INPUTS),
|
||||||
parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS),
|
parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS),
|
||||||
parameter ARBITER = "P",
|
parameter ARBITER = "R",
|
||||||
parameter OUT_BUF = 0,
|
parameter OUT_BUF = 0,
|
||||||
parameter LUTRAM = 0,
|
parameter LUTRAM = 0,
|
||||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||||
|
@ -126,10 +126,9 @@ module VX_stream_xbar #(
|
||||||
assign data_out_r = {NUM_OUTPUTS{data_in}};
|
assign data_out_r = {NUM_OUTPUTS{data_in}};
|
||||||
assign ready_in = ready_out_r[sel_in];
|
assign ready_in = ready_out_r[sel_in];
|
||||||
|
|
||||||
|
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||||
|
|
||||||
`RESET_RELAY (out_buf_reset, reset);
|
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||||
|
@ -137,7 +136,7 @@ module VX_stream_xbar #(
|
||||||
.LUTRAM (LUTRAM)
|
.LUTRAM (LUTRAM)
|
||||||
) out_buf (
|
) out_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (out_buf_reset),
|
.reset (out_buf_reset[i]),
|
||||||
.valid_in (valid_out_r[i]),
|
.valid_in (valid_out_r[i]),
|
||||||
.ready_in (ready_out_r[i]),
|
.ready_in (ready_out_r[i]),
|
||||||
.data_in (data_out_r[i]),
|
.data_in (data_out_r[i]),
|
||||||
|
|
|
@ -94,7 +94,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
||||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx;
|
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx;
|
||||||
wire [NUM_BANKS-1:0] per_bank_req_ready;
|
wire [NUM_BANKS-1:0] per_bank_req_ready;
|
||||||
|
|
||||||
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_all;
|
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_aos;
|
||||||
|
|
||||||
wire [NUM_REQS-1:0] req_valid_in;
|
wire [NUM_REQS-1:0] req_valid_in;
|
||||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
|
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
|
||||||
|
@ -111,7 +111,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
||||||
req_bank_addr[i],
|
req_bank_addr[i],
|
||||||
mem_bus_if[i].req_data.byteen,
|
mem_bus_if[i].req_data.byteen,
|
||||||
mem_bus_if[i].req_data.data,
|
mem_bus_if[i].req_data.data,
|
||||||
mem_bus_if[i].req_data.tag};
|
mem_bus_if[i].req_data.tag
|
||||||
|
};
|
||||||
assign mem_bus_if[i].req_ready = req_ready_in[i];
|
assign mem_bus_if[i].req_ready = req_ready_in[i];
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -120,6 +121,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
||||||
.NUM_OUTPUTS (NUM_BANKS),
|
.NUM_OUTPUTS (NUM_BANKS),
|
||||||
.DATAW (REQ_DATAW),
|
.DATAW (REQ_DATAW),
|
||||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||||
|
.ARBITER ("F"),
|
||||||
.OUT_BUF (3) // output should be registered for the data_store addressing
|
.OUT_BUF (3) // output should be registered for the data_store addressing
|
||||||
) req_xbar (
|
) req_xbar (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
@ -134,7 +136,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
||||||
.sel_in (req_bank_idx),
|
.sel_in (req_bank_idx),
|
||||||
.ready_in (req_ready_in),
|
.ready_in (req_ready_in),
|
||||||
.valid_out (per_bank_req_valid),
|
.valid_out (per_bank_req_valid),
|
||||||
.data_out (per_bank_req_data_all),
|
.data_out (per_bank_req_data_aos),
|
||||||
.sel_out (per_bank_req_idx),
|
.sel_out (per_bank_req_idx),
|
||||||
.ready_out (per_bank_req_ready)
|
.ready_out (per_bank_req_ready)
|
||||||
);
|
);
|
||||||
|
@ -145,7 +147,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
||||||
per_bank_req_addr[i],
|
per_bank_req_addr[i],
|
||||||
per_bank_req_byteen[i],
|
per_bank_req_byteen[i],
|
||||||
per_bank_req_data[i],
|
per_bank_req_data[i],
|
||||||
per_bank_req_tag[i]} = per_bank_req_data_all[i];
|
per_bank_req_tag[i]
|
||||||
|
} = per_bank_req_data_aos[i];
|
||||||
end
|
end
|
||||||
|
|
||||||
// banks access
|
// banks access
|
||||||
|
@ -156,38 +159,55 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
||||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag;
|
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag;
|
||||||
wire [NUM_BANKS-1:0] per_bank_rsp_ready;
|
wire [NUM_BANKS-1:0] per_bank_rsp_ready;
|
||||||
|
|
||||||
`RESET_RELAY (bank_reset, reset);
|
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||||
|
wire bank_rsp_valid, bank_rsp_ready;
|
||||||
|
wire [WORD_WIDTH-1:0] bank_rsp_data;
|
||||||
|
|
||||||
|
`RESET_RELAY_EN (bram_reset, reset, (NUM_BANKS > 1));
|
||||||
|
|
||||||
VX_sp_ram #(
|
VX_sp_ram #(
|
||||||
.DATAW (WORD_WIDTH),
|
.DATAW (WORD_WIDTH),
|
||||||
.SIZE (WORDS_PER_BANK),
|
.SIZE (WORDS_PER_BANK),
|
||||||
.WRENW (WORD_SIZE)
|
.WRENW (WORD_SIZE),
|
||||||
|
.NO_RWCHECK (1)
|
||||||
) data_store (
|
) data_store (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.read (1'b1),
|
.reset (bram_reset),
|
||||||
|
.read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]),
|
||||||
.write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]),
|
.write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]),
|
||||||
.wren (per_bank_req_byteen[i]),
|
.wren (per_bank_req_byteen[i]),
|
||||||
.addr (per_bank_req_addr[i]),
|
.addr (per_bank_req_addr[i]),
|
||||||
.wdata (per_bank_req_data[i]),
|
.wdata (per_bank_req_data[i]),
|
||||||
.rdata (per_bank_rsp_data[i])
|
.rdata (bank_rsp_data)
|
||||||
);
|
);
|
||||||
|
|
||||||
// drop write response
|
// read-during-write hazard detection
|
||||||
wire per_bank_req_valid_w, per_bank_req_ready_w;
|
reg [BANK_ADDR_WIDTH-1:0] last_wr_addr;
|
||||||
assign per_bank_req_valid_w = per_bank_req_valid[i] && ~per_bank_req_rw[i];
|
reg last_wr_valid;
|
||||||
assign per_bank_req_ready[i] = per_bank_req_ready_w || per_bank_req_rw[i];
|
always @(posedge clk) begin
|
||||||
|
if (bram_reset) begin
|
||||||
|
last_wr_valid <= 0;
|
||||||
|
end else begin
|
||||||
|
last_wr_valid <= per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i];
|
||||||
|
end
|
||||||
|
last_wr_addr <= per_bank_req_addr[i];
|
||||||
|
end
|
||||||
|
wire is_rdw_hazard = last_wr_valid && ~per_bank_req_rw[i] && (per_bank_req_addr[i] == last_wr_addr);
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
// drop write response and stall on read-during-write hazard
|
||||||
.DATAW (REQ_SEL_WIDTH + TAG_WIDTH),
|
assign bank_rsp_valid = per_bank_req_valid[i] && ~per_bank_req_rw[i] && ~is_rdw_hazard;
|
||||||
.SIZE (0)
|
assign per_bank_req_ready[i] = (bank_rsp_ready || per_bank_req_rw[i]) && ~is_rdw_hazard;
|
||||||
) bank_buf (
|
|
||||||
|
// register BRAM output
|
||||||
|
VX_pipe_buffer #(
|
||||||
|
.DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH)
|
||||||
|
) bram_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (bank_reset),
|
.reset (bram_reset),
|
||||||
.valid_in (per_bank_req_valid_w),
|
.valid_in (bank_rsp_valid),
|
||||||
.ready_in (per_bank_req_ready_w),
|
.ready_in (bank_rsp_ready),
|
||||||
.data_in ({per_bank_req_idx[i], per_bank_req_tag[i]}),
|
.data_in ({per_bank_req_idx[i], bank_rsp_data, per_bank_req_tag[i]}),
|
||||||
.data_out ({per_bank_rsp_idx[i], per_bank_rsp_tag[i]}),
|
.data_out ({per_bank_rsp_idx[i], per_bank_rsp_data[i], per_bank_rsp_tag[i]}),
|
||||||
.valid_out (per_bank_rsp_valid[i]),
|
.valid_out (per_bank_rsp_valid[i]),
|
||||||
.ready_out (per_bank_rsp_ready[i])
|
.ready_out (per_bank_rsp_ready[i])
|
||||||
);
|
);
|
||||||
|
@ -195,10 +215,10 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
||||||
|
|
||||||
// bank responses gather
|
// bank responses gather
|
||||||
|
|
||||||
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_all;
|
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_aos;
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||||
assign per_bank_rsp_data_all[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
|
assign per_bank_rsp_data_aos[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
|
||||||
end
|
end
|
||||||
|
|
||||||
wire [NUM_REQS-1:0] rsp_valid_out;
|
wire [NUM_REQS-1:0] rsp_valid_out;
|
||||||
|
@ -209,6 +229,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
||||||
.NUM_INPUTS (NUM_BANKS),
|
.NUM_INPUTS (NUM_BANKS),
|
||||||
.NUM_OUTPUTS (NUM_REQS),
|
.NUM_OUTPUTS (NUM_REQS),
|
||||||
.DATAW (RSP_DATAW),
|
.DATAW (RSP_DATAW),
|
||||||
|
.ARBITER ("P"), // this priority arbiter has negligeable impact om performance
|
||||||
.OUT_BUF (OUT_BUF)
|
.OUT_BUF (OUT_BUF)
|
||||||
) rsp_xbar (
|
) rsp_xbar (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
@ -216,7 +237,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
||||||
`UNUSED_PIN (collisions),
|
`UNUSED_PIN (collisions),
|
||||||
.sel_in (per_bank_rsp_idx),
|
.sel_in (per_bank_rsp_idx),
|
||||||
.valid_in (per_bank_rsp_valid),
|
.valid_in (per_bank_rsp_valid),
|
||||||
.data_in (per_bank_rsp_data_all),
|
.data_in (per_bank_rsp_data_aos),
|
||||||
.ready_in (per_bank_rsp_ready),
|
.ready_in (per_bank_rsp_ready),
|
||||||
.valid_out (rsp_valid_out),
|
.valid_out (rsp_valid_out),
|
||||||
.data_out (rsp_data_out),
|
.data_out (rsp_data_out),
|
||||||
|
@ -310,7 +331,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin
|
if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin
|
||||||
if (mem_bus_if[i].req_data.rw) begin
|
if (mem_bus_if[i].req_data.rw) begin
|
||||||
`TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
`TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n",
|
||||||
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i]));
|
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i]));
|
||||||
end else begin
|
end else begin
|
||||||
`TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
|
`TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||||
|
@ -318,7 +339,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin
|
if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin
|
||||||
`TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n",
|
`TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n",
|
||||||
$time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i]));
|
$time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i]));
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -328,7 +349,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin
|
if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin
|
||||||
if (per_bank_req_rw[i]) begin
|
if (per_bank_req_rw[i]) begin
|
||||||
`TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
`TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n",
|
||||||
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i]));
|
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i]));
|
||||||
end else begin
|
end else begin
|
||||||
`TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
`TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||||
|
@ -336,7 +357,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin
|
if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin
|
||||||
`TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
`TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
|
||||||
$time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i]));
|
$time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i]));
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -73,12 +73,12 @@ ifneq ($(TARGET), fpga)
|
||||||
CFLAGS += -DSIMULATION
|
CFLAGS += -DSIMULATION
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Debugigng
|
# Debugging
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
ifneq ($(TARGET), fpga)
|
ifneq ($(TARGET), fpga)
|
||||||
CFLAGS += -DNDEBUG
|
CFLAGS += -DDEBUG_LEVEL=$(DEBUG) $(DBG_TRACE_FLAGS)
|
||||||
else
|
else
|
||||||
CFLAGS += $(DBG_TRACE_FLAGS)
|
CFLAGS += -DNDEBUG
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
CFLAGS += -DNDEBUG
|
CFLAGS += -DNDEBUG
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
create_clock -name {clk} -period "220 MHz" -waveform { 0.000 1.0 } [get_ports {clk}]
|
create_clock -name {clk} -period "200 MHz" -waveform { 0.000 1.0 } [get_ports {clk}]
|
|
@ -45,6 +45,7 @@ FPGA_BIN_DIR=<bin_dir> XRT_DEVICE_INDEX=1 TARGET=hw ./ci/blackbox.sh --driver=xr
|
||||||
|
|
||||||
# build report logs
|
# build report logs
|
||||||
<build_dir>/bin/vortex_afu.xclbin.info
|
<build_dir>/bin/vortex_afu.xclbin.info
|
||||||
|
<build_dir>/_x/logs/link/vivado.log # search for keyword "Very high fanout"
|
||||||
<build_dir>/_x/reports/link/link/imp/impl_1_full_util_routed.rpt
|
<build_dir>/_x/reports/link/link/imp/impl_1_full_util_routed.rpt
|
||||||
<build_dir>/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt # search for keyword "VIOLATED"
|
<build_dir>/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt # search for keyword "VIOLATED"
|
||||||
<build_dir>/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log
|
<build_dir>/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log
|
||||||
|
|
|
@ -111,14 +111,14 @@ ifeq ($(TARGET), hw_emu)
|
||||||
CFLAGS += -DSIMULATION
|
CFLAGS += -DSIMULATION
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Debugigng
|
# Debugging
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
VPP_FLAGS += -g --debug.protocol all
|
VPP_FLAGS += -g --debug.protocol all
|
||||||
ifneq ($(TARGET), hw)
|
ifneq ($(TARGET), hw)
|
||||||
CFLAGS += -DNDEBUG
|
|
||||||
else
|
|
||||||
VPP_FLAGS += --vivado.prop fileset.sim_1.xsim.elaborate.debug_level=all
|
VPP_FLAGS += --vivado.prop fileset.sim_1.xsim.elaborate.debug_level=all
|
||||||
CFLAGS += $(DBG_TRACE_FLAGS)
|
CFLAGS += -DDEBUG_LEVEL=$(DEBUG) $(DBG_TRACE_FLAGS)
|
||||||
|
else
|
||||||
|
CFLAGS += -DNDEBUG
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
VPP_FLAGS += --optimize 3
|
VPP_FLAGS += --optimize 3
|
||||||
|
|
|
@ -49,7 +49,7 @@ endif
|
||||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
|
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
|
||||||
RTL_INCLUDE += $(FPU_INCLUDE)
|
RTL_INCLUDE += $(FPU_INCLUDE)
|
||||||
|
|
||||||
# Debugigng
|
# Debugging
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
CFLAGS += $(DBG_TRACE_FLAGS)
|
CFLAGS += $(DBG_TRACE_FLAGS)
|
||||||
else
|
else
|
||||||
|
|
|
@ -29,7 +29,7 @@ THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count()
|
||||||
VL_FLAGS += -j $(THREADS)
|
VL_FLAGS += -j $(THREADS)
|
||||||
#VL_FLAGS += --threads $(THREADS)
|
#VL_FLAGS += --threads $(THREADS)
|
||||||
|
|
||||||
# Debugigng
|
# Debugging
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
|
|
||||||
#include <VX_config.h>
|
#include <VX_config.h>
|
||||||
#include <VX_types.h>
|
#include <VX_types.h>
|
||||||
|
#include <newlib.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
.section .init, "ax"
|
.section .init, "ax"
|
||||||
|
@ -51,12 +52,10 @@ _start:
|
||||||
# la t0, trap_entry
|
# la t0, trap_entry
|
||||||
# csrw mtvec, t0
|
# csrw mtvec, t0
|
||||||
|
|
||||||
# register global termination functions
|
#ifdef HAVE_INITFINI_ARRAY
|
||||||
la a0, __libc_fini_array
|
|
||||||
call atexit
|
|
||||||
|
|
||||||
# run global initialization functions
|
# run global initialization functions
|
||||||
call __libc_init_array
|
call __libc_init_array
|
||||||
|
#endif
|
||||||
|
|
||||||
# call main program routine
|
# call main program routine
|
||||||
call main
|
call main
|
||||||
|
|
|
@ -119,70 +119,13 @@ void __libc_fini_array (void) {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
// This function will be called by LIBC at program exit.
|
||||||
#define MAX_CORES 64
|
// Since this platform only support statically linked programs,
|
||||||
volatile int g_cxa_locks[MAX_CORES] = {0};
|
// it is not required to support LIBC's exit functions registration via atexit().
|
||||||
*/
|
void __funcs_on_exit (void) {
|
||||||
|
#ifdef HAVE_INITFINI_ARRAY
|
||||||
void __cxa_lock() {
|
__libc_fini_array();
|
||||||
/*int core_id = vx_core_id();
|
#endif
|
||||||
g_cxa_locks[core_id] = 1;
|
|
||||||
vx_fence();
|
|
||||||
for (int i = 1; i < MAX_CORES; ++i) {
|
|
||||||
int other = (core_id + i) % MAX_CORES;
|
|
||||||
while (g_cxa_locks[other]) {
|
|
||||||
vx_fence(); // cache coherence not supported, so we need to flush the caches
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
}
|
|
||||||
|
|
||||||
void __cxa_unlock() {
|
|
||||||
/*vx_fence();
|
|
||||||
int core_id = vx_core_id();
|
|
||||||
g_cxa_locks[core_id] = 0;*/
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MAX_FEXITS 64
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
void (*f[MAX_FEXITS])(void*);
|
|
||||||
void *a[MAX_FEXITS];
|
|
||||||
} fexit_list_t;
|
|
||||||
|
|
||||||
static fexit_list_t g_fexit_list;
|
|
||||||
static int g_num_fexits = 0;
|
|
||||||
|
|
||||||
void __funcs_on_exit() {
|
|
||||||
void (*func)(void *), *arg;
|
|
||||||
fexit_list_t* fexit_list = &g_fexit_list;
|
|
||||||
for (int i = 0; i < g_num_fexits; ++i) {
|
|
||||||
func = fexit_list->f[i];
|
|
||||||
arg = fexit_list->a[i];
|
|
||||||
func(arg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void __cxa_finalize(void *dso) {}
|
|
||||||
|
|
||||||
int __cxa_atexit(void (*func)(void *), void *arg, void *dso) {
|
|
||||||
__cxa_lock();
|
|
||||||
int num_fexits = g_num_fexits;
|
|
||||||
if (num_fexits >= MAX_FEXITS)
|
|
||||||
return -1;
|
|
||||||
fexit_list_t* fexit_list = &g_fexit_list;
|
|
||||||
fexit_list->f[num_fexits] = func;
|
|
||||||
fexit_list->a[num_fexits] = arg;
|
|
||||||
g_num_fexits = num_fexits + 1;
|
|
||||||
__cxa_unlock();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void call(void *p) {
|
|
||||||
((void (*)(void))(uintptr_t)p)();
|
|
||||||
}
|
|
||||||
|
|
||||||
int atexit(void (*func)(void)) {
|
|
||||||
return __cxa_atexit(call, (void*)(uintptr_t)func, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <array>
|
||||||
|
|
||||||
#define CACHE_BLOCK_SIZE 64
|
#define CACHE_BLOCK_SIZE 64
|
||||||
|
|
||||||
|
|
|
@ -34,6 +34,7 @@ typedef void* vx_buffer_h;
|
||||||
#define VX_CAPS_GLOBAL_MEM_SIZE 0x5
|
#define VX_CAPS_GLOBAL_MEM_SIZE 0x5
|
||||||
#define VX_CAPS_LOCAL_MEM_SIZE 0x6
|
#define VX_CAPS_LOCAL_MEM_SIZE 0x6
|
||||||
#define VX_CAPS_ISA_FLAGS 0x7
|
#define VX_CAPS_ISA_FLAGS 0x7
|
||||||
|
#define VX_CAPS_NUM_MEM_BANKS 0x8
|
||||||
|
|
||||||
// device isa flags
|
// device isa flags
|
||||||
#define VX_ISA_STD_A (1ull << ISA_STD_A)
|
#define VX_ISA_STD_A (1ull << ISA_STD_A)
|
||||||
|
|
|
@ -30,7 +30,7 @@ else
|
||||||
CXXFLAGS += -I$(SYN_DIR)
|
CXXFLAGS += -I$(SYN_DIR)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Debugigng
|
# Debugging
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
CXXFLAGS += -g -O0
|
CXXFLAGS += -g -O0
|
||||||
else
|
else
|
||||||
|
|
|
@ -232,6 +232,9 @@ public:
|
||||||
case VX_CAPS_ISA_FLAGS:
|
case VX_CAPS_ISA_FLAGS:
|
||||||
_value = isa_caps_;
|
_value = isa_caps_;
|
||||||
break;
|
break;
|
||||||
|
case VX_CAPS_NUM_MEM_BANKS:
|
||||||
|
_value = MEMORY_BANKS;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
|
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
|
||||||
std::abort();
|
std::abort();
|
||||||
|
|
|
@ -19,7 +19,7 @@ LDFLAGS += -L$(DESTDIR) -lrtlsim
|
||||||
|
|
||||||
SRCS := $(SRC_DIR)/vortex.cpp
|
SRCS := $(SRC_DIR)/vortex.cpp
|
||||||
|
|
||||||
# Debugigng
|
# Debugging
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
CXXFLAGS += -g -O0
|
CXXFLAGS += -g -O0
|
||||||
else
|
else
|
||||||
|
|
|
@ -77,6 +77,9 @@ public:
|
||||||
case VX_CAPS_ISA_FLAGS:
|
case VX_CAPS_ISA_FLAGS:
|
||||||
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
||||||
break;
|
break;
|
||||||
|
case VX_CAPS_NUM_MEM_BANKS:
|
||||||
|
_value = MEMORY_BANKS;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||||
std::abort();
|
std::abort();
|
||||||
|
|
|
@ -19,7 +19,7 @@ LDFLAGS += -L$(DESTDIR) -lsimx
|
||||||
|
|
||||||
SRCS := $(SRC_DIR)/vortex.cpp
|
SRCS := $(SRC_DIR)/vortex.cpp
|
||||||
|
|
||||||
# Debugigng
|
# Debugging
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
CXXFLAGS += -g -O0
|
CXXFLAGS += -g -O0
|
||||||
else
|
else
|
||||||
|
|
|
@ -105,6 +105,9 @@ public:
|
||||||
case VX_CAPS_ISA_FLAGS:
|
case VX_CAPS_ISA_FLAGS:
|
||||||
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
||||||
break;
|
break;
|
||||||
|
case VX_CAPS_NUM_MEM_BANKS:
|
||||||
|
_value = MEMORY_BANKS;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||||
std::abort();
|
std::abort();
|
||||||
|
|
|
@ -12,7 +12,7 @@ LDFLAGS += -shared -pthread -ldl
|
||||||
|
|
||||||
SRCS := $(SRC_DIR)/vortex.cpp $(SRC_DIR)/utils.cpp
|
SRCS := $(SRC_DIR)/vortex.cpp $(SRC_DIR)/utils.cpp
|
||||||
|
|
||||||
# Debugigng
|
# Debugging
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
CXXFLAGS += -g -O0
|
CXXFLAGS += -g -O0
|
||||||
else
|
else
|
||||||
|
|
|
@ -211,6 +211,8 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
||||||
uint64_t mem_reads = 0;
|
uint64_t mem_reads = 0;
|
||||||
uint64_t mem_writes = 0;
|
uint64_t mem_writes = 0;
|
||||||
uint64_t mem_lat = 0;
|
uint64_t mem_lat = 0;
|
||||||
|
uint64_t mem_req_counter = 0;
|
||||||
|
uint64_t mem_ticks = 0;
|
||||||
|
|
||||||
uint64_t num_cores;
|
uint64_t num_cores;
|
||||||
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), {
|
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), {
|
||||||
|
@ -222,6 +224,11 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
||||||
return err;
|
return err;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
uint64_t num_mem_bank_ports;
|
||||||
|
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_MEM_BANKS, &num_mem_bank_ports), {
|
||||||
|
return err;
|
||||||
|
});
|
||||||
|
|
||||||
bool icache_enable = isa_flags & VX_ISA_EXT_ICACHE;
|
bool icache_enable = isa_flags & VX_ISA_EXT_ICACHE;
|
||||||
bool dcache_enable = isa_flags & VX_ISA_EXT_DCACHE;
|
bool dcache_enable = isa_flags & VX_ISA_EXT_DCACHE;
|
||||||
bool l2cache_enable = isa_flags & VX_ISA_EXT_L2CACHE;
|
bool l2cache_enable = isa_flags & VX_ISA_EXT_L2CACHE;
|
||||||
|
@ -314,7 +321,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
||||||
if (num_cores > 1) {
|
if (num_cores > 1) {
|
||||||
uint64_t scrb_total = scrb_alu_per_core + scrb_fpu_per_core + scrb_lsu_per_core + scrb_csrs_per_core + scrb_wctl_per_core;
|
uint64_t scrb_total = scrb_alu_per_core + scrb_fpu_per_core + scrb_lsu_per_core + scrb_csrs_per_core + scrb_wctl_per_core;
|
||||||
int scrb_percent_per_core = calcAvgPercent(scrb_stalls_per_core, cycles_per_core);
|
int scrb_percent_per_core = calcAvgPercent(scrb_stalls_per_core, cycles_per_core);
|
||||||
fprintf(stream, "PERF: core%d: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, scrs=%d%%, wctl=%d%%)\n"
|
fprintf(stream, "PERF: core%d: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, csrs=%d%%, wctl=%d%%)\n"
|
||||||
, core_id
|
, core_id
|
||||||
, scrb_stalls_per_core
|
, scrb_stalls_per_core
|
||||||
, scrb_percent_per_core
|
, scrb_percent_per_core
|
||||||
|
@ -533,6 +540,12 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
||||||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), {
|
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), {
|
||||||
return err;
|
return err;
|
||||||
});
|
});
|
||||||
|
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_CNTR, core_id, &mem_req_counter), {
|
||||||
|
return err;
|
||||||
|
});
|
||||||
|
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_TICK, core_id, &mem_ticks), {
|
||||||
|
return err;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
|
@ -559,7 +572,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
||||||
fprintf(stream, "PERF: scheduler idle=%ld (%d%%)\n", sched_idles, sched_idles_percent);
|
fprintf(stream, "PERF: scheduler idle=%ld (%d%%)\n", sched_idles, sched_idles_percent);
|
||||||
fprintf(stream, "PERF: scheduler stalls=%ld (%d%%)\n", sched_stalls, sched_stalls_percent);
|
fprintf(stream, "PERF: scheduler stalls=%ld (%d%%)\n", sched_stalls, sched_stalls_percent);
|
||||||
fprintf(stream, "PERF: ibuffer stalls=%ld (%d%%)\n", ibuffer_stalls, ibuffer_percent);
|
fprintf(stream, "PERF: ibuffer stalls=%ld (%d%%)\n", ibuffer_stalls, ibuffer_percent);
|
||||||
fprintf(stream, "PERF: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, scrs=%d%%, wctl=%d%%)\n"
|
fprintf(stream, "PERF: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, csrs=%d%%, wctl=%d%%)\n"
|
||||||
, scrb_stalls
|
, scrb_stalls
|
||||||
, scrb_percent
|
, scrb_percent
|
||||||
, calcAvgPercent(scrb_alu, scrb_total)
|
, calcAvgPercent(scrb_alu, scrb_total)
|
||||||
|
@ -609,8 +622,10 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int mem_avg_lat = caclAverage(mem_lat, mem_reads);
|
int mem_avg_lat = caclAverage(mem_lat, mem_reads);
|
||||||
|
int memory_bank_port_utilization = calcAvgPercent(mem_req_counter, (mem_ticks * num_mem_bank_ports));
|
||||||
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
|
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
|
||||||
fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat);
|
fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat);
|
||||||
|
fprintf(stream, "PERF: memory bank port utilization=%d%%\n", memory_bank_port_utilization);
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -26,7 +26,7 @@ endif
|
||||||
|
|
||||||
PROJECT := libvortex-xrt.so
|
PROJECT := libvortex-xrt.so
|
||||||
|
|
||||||
# Debugigng
|
# Debugging
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
CXXFLAGS += -g -O0
|
CXXFLAGS += -g -O0
|
||||||
else
|
else
|
||||||
|
|
|
@ -404,6 +404,9 @@ public:
|
||||||
case VX_CAPS_ISA_FLAGS:
|
case VX_CAPS_ISA_FLAGS:
|
||||||
_value = isa_caps_;
|
_value = isa_caps_;
|
||||||
break;
|
break;
|
||||||
|
case VX_CAPS_NUM_MEM_BANKS:
|
||||||
|
_value = MEMORY_BANKS;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
|
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
|
||||||
std::abort();
|
std::abort();
|
||||||
|
|
|
@ -41,11 +41,11 @@ public:
|
||||||
dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2";
|
dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2";
|
||||||
dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb";
|
dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb";
|
||||||
dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192;
|
dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192;
|
||||||
|
dram_config["MemorySystem"]["DRAM"]["org"]["channel"] = 8;
|
||||||
dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps";
|
dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps";
|
||||||
dram_config["MemorySystem"]["Controller"]["impl"] = "Generic";
|
dram_config["MemorySystem"]["Controller"]["impl"] = "Generic";
|
||||||
dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS";
|
dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS";
|
||||||
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
|
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
|
||||||
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
|
|
||||||
dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy";
|
dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy";
|
||||||
{
|
{
|
||||||
YAML::Node draw_plugin;
|
YAML::Node draw_plugin;
|
||||||
|
@ -66,7 +66,7 @@ public:
|
||||||
auto original_buf = std::cout.rdbuf();
|
auto original_buf = std::cout.rdbuf();
|
||||||
std::cout.rdbuf(nullstream.rdbuf());
|
std::cout.rdbuf(nullstream.rdbuf());
|
||||||
ramulator_frontend_->finalize();
|
ramulator_frontend_->finalize();
|
||||||
ramulator_memorysystem_->finalize();
|
ramulator_memorysystem_->finalize();
|
||||||
std::cout.rdbuf(original_buf);
|
std::cout.rdbuf(original_buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -59,7 +59,7 @@ void RamMemDevice::read(void* data, uint64_t addr, uint64_t size) {
|
||||||
if ((addr & (wordSize_-1))
|
if ((addr & (wordSize_-1))
|
||||||
|| (addr_end & (wordSize_-1))
|
|| (addr_end & (wordSize_-1))
|
||||||
|| (addr_end <= contents_.size())) {
|
|| (addr_end <= contents_.size())) {
|
||||||
std::cout << "lookup of 0x" << std::hex << (addr_end-1) << " failed.\n";
|
std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
|
||||||
throw BadAddress();
|
throw BadAddress();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ void RamMemDevice::write(const void* data, uint64_t addr, uint64_t size) {
|
||||||
if ((addr & (wordSize_-1))
|
if ((addr & (wordSize_-1))
|
||||||
|| (addr_end & (wordSize_-1))
|
|| (addr_end & (wordSize_-1))
|
||||||
|| (addr_end <= contents_.size())) {
|
|| (addr_end <= contents_.size())) {
|
||||||
std::cout << "lookup of 0x" << std::hex << (addr_end-1) << " failed.\n";
|
std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
|
||||||
throw BadAddress();
|
throw BadAddress();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,8 +115,7 @@ void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) {
|
||||||
void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) {
|
void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) {
|
||||||
mem_accessor_t ma;
|
mem_accessor_t ma;
|
||||||
if (!this->lookup(addr, size, &ma)) {
|
if (!this->lookup(addr, size, &ma)) {
|
||||||
assert(0);
|
std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
|
||||||
std::cout << "lookup of 0x" << std::hex << addr << " failed.\n";
|
|
||||||
throw BadAddress();
|
throw BadAddress();
|
||||||
}
|
}
|
||||||
ma.md->read(data, ma.addr, size);
|
ma.md->read(data, ma.addr, size);
|
||||||
|
@ -125,8 +124,7 @@ void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) {
|
||||||
void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) {
|
void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) {
|
||||||
mem_accessor_t ma;
|
mem_accessor_t ma;
|
||||||
if (!this->lookup(addr, size, &ma)) {
|
if (!this->lookup(addr, size, &ma)) {
|
||||||
assert(0);
|
std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
|
||||||
std::cout << "lookup of 0x" << std::hex << addr << " failed.\n";
|
|
||||||
throw BadAddress();
|
throw BadAddress();
|
||||||
}
|
}
|
||||||
ma.md->write(data, ma.addr, size);
|
ma.md->write(data, ma.addr, size);
|
||||||
|
@ -408,7 +406,7 @@ bool ACLManager::check(uint64_t addr, uint64_t size, int flags) const {
|
||||||
while (it != acl_map_.end() && it->first < end) {
|
while (it != acl_map_.end() && it->first < end) {
|
||||||
if (it->second.end > addr) {
|
if (it->second.end > addr) {
|
||||||
if ((it->second.flags & flags) != flags) {
|
if ((it->second.flags & flags) != flags) {
|
||||||
std::cout << "Memory access violation from 0x" << std::hex << addr << " to 0x" << end << ", curent flags=" << it->second.flags << ", access flags=" << flags << std::endl;
|
std::cout << "Memory access violation from 0x" << std::hex << addr << " to 0x" << end << ", curent flags=" << it->second.flags << ", access flags=" << flags << std::dec << std::endl;
|
||||||
return false; // Overlapping entry is missing at least one required flag bit
|
return false; // Overlapping entry is missing at least one required flag bit
|
||||||
}
|
}
|
||||||
addr = it->second.end; // Move to the end of the current matching range
|
addr = it->second.end; // Move to the end of the current matching range
|
||||||
|
|
|
@ -168,23 +168,23 @@ public:
|
||||||
{}
|
{}
|
||||||
|
|
||||||
void* operator new(size_t /*size*/) {
|
void* operator new(size_t /*size*/) {
|
||||||
return allocator().allocate();
|
return allocator_.allocate();
|
||||||
}
|
}
|
||||||
|
|
||||||
void operator delete(void* ptr) {
|
void operator delete(void* ptr) {
|
||||||
allocator().deallocate(ptr);
|
allocator_.deallocate(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Func func_;
|
Func func_;
|
||||||
Pkt pkt_;
|
Pkt pkt_;
|
||||||
|
|
||||||
static MemoryPool<SimCallEvent<Pkt>>& allocator() {
|
static MemoryPool<SimCallEvent<Pkt>> allocator_;
|
||||||
static MemoryPool<SimCallEvent<Pkt>> instance(64);
|
|
||||||
return instance;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Pkt>
|
||||||
|
MemoryPool<SimCallEvent<Pkt>> SimCallEvent<Pkt>::allocator_(64);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <typename Pkt>
|
template <typename Pkt>
|
||||||
|
@ -201,23 +201,23 @@ public:
|
||||||
{}
|
{}
|
||||||
|
|
||||||
void* operator new(size_t /*size*/) {
|
void* operator new(size_t /*size*/) {
|
||||||
return allocator().allocate();
|
return allocator_.allocate();
|
||||||
}
|
}
|
||||||
|
|
||||||
void operator delete(void* ptr) {
|
void operator delete(void* ptr) {
|
||||||
allocator().deallocate(ptr);
|
allocator_.deallocate(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const SimPort<Pkt>* port_;
|
const SimPort<Pkt>* port_;
|
||||||
Pkt pkt_;
|
Pkt pkt_;
|
||||||
|
|
||||||
static MemoryPool<SimPortEvent<Pkt>>& allocator() {
|
static MemoryPool<SimPortEvent<Pkt>> allocator_;
|
||||||
static MemoryPool<SimPortEvent<Pkt>> instance(64);
|
|
||||||
return instance;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Pkt>
|
||||||
|
MemoryPool<SimPortEvent<Pkt>> SimPortEvent<Pkt>::allocator_(64);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
class SimContext;
|
class SimContext;
|
||||||
|
|
|
@ -71,3 +71,27 @@ const char* fileExtension(const char* filepath);
|
||||||
|
|
||||||
void *aligned_malloc(size_t size, size_t alignment);
|
void *aligned_malloc(size_t size, size_t alignment);
|
||||||
void aligned_free(void *ptr);
|
void aligned_free(void *ptr);
|
||||||
|
|
||||||
|
namespace vortex {
|
||||||
|
|
||||||
|
// Verilator data type casting
|
||||||
|
template <typename R, size_t W, typename Enable = void>
|
||||||
|
class VDataCast;
|
||||||
|
template <typename R, size_t W>
|
||||||
|
class VDataCast<R, W, typename std::enable_if<(W > 8)>::type> {
|
||||||
|
public:
|
||||||
|
template <typename T>
|
||||||
|
static R get(T& obj) {
|
||||||
|
return reinterpret_cast<R>(obj.data());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename R, size_t W>
|
||||||
|
class VDataCast<R, W, typename std::enable_if<(W <= 8)>::type> {
|
||||||
|
public:
|
||||||
|
template <typename T>
|
||||||
|
static R get(T& obj) {
|
||||||
|
return reinterpret_cast<R>(&obj);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
|
@ -83,13 +83,13 @@ THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count()
|
||||||
VL_FLAGS += -j $(THREADS)
|
VL_FLAGS += -j $(THREADS)
|
||||||
#VL_FLAGS += --threads $(THREADS)
|
#VL_FLAGS += --threads $(THREADS)
|
||||||
|
|
||||||
# Debugigng
|
# Debugging
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||||
else
|
else
|
||||||
VL_FLAGS += -DNDEBUG
|
VL_FLAGS += -DNDEBUG
|
||||||
CXXFLAGS += -O3 -DNDEBUG
|
CXXFLAGS += -O2 -DNDEBUG
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Enable scope analyzer
|
# Enable scope analyzer
|
||||||
|
@ -123,7 +123,7 @@ $(DESTDIR)/vortex_afu.h : $(AFU_DIR)/vortex_afu.vh
|
||||||
$(SCRIPT_DIR)/gen_config.py -i $^ -o $@
|
$(SCRIPT_DIR)/gen_config.py -i $^ -o $@
|
||||||
|
|
||||||
$(DESTDIR)/$(PROJECT): $(SRCS) $(DESTDIR)/vortex_afu.h $(SCOPE_JSON)
|
$(DESTDIR)/$(PROJECT): $(SRCS) $(DESTDIR)/vortex_afu.h $(SCOPE_JSON)
|
||||||
verilator --build --exe -O3 $(VL_FLAGS) --cc $(TOP) --top-module $(TOP) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' --Mdir $@.obj_dir -o $@
|
verilator --build --exe $(VL_FLAGS) --cc $(TOP) --top-module $(TOP) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' --Mdir $@.obj_dir -o $@
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf $(DESTDIR)/$(PROJECT).obj_dir
|
rm -rf $(DESTDIR)/$(PROJECT).obj_dir
|
||||||
|
|
|
@ -35,13 +35,13 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <util.h>
|
#include <util.h>
|
||||||
|
|
||||||
#ifndef MEMORY_BANKS
|
//#ifndef MEMORY_BANKS
|
||||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||||
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||||
#else
|
#else
|
||||||
#define MEMORY_BANKS 2
|
#define MEMORY_BANKS 2
|
||||||
#endif
|
#endif
|
||||||
#endif
|
//#endif
|
||||||
|
|
||||||
#ifndef MEM_CLOCK_RATIO
|
#ifndef MEM_CLOCK_RATIO
|
||||||
#define MEM_CLOCK_RATIO 1
|
#define MEM_CLOCK_RATIO 1
|
||||||
|
@ -380,7 +380,7 @@ private:
|
||||||
device_->vcp2af_sRxPort_c0_hdr_resp_type = 0;
|
device_->vcp2af_sRxPort_c0_hdr_resp_type = 0;
|
||||||
memcpy(device_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE);
|
memcpy(device_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE);
|
||||||
device_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
|
device_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
|
||||||
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
|
/*printf("%0ld: [sim] CCI Rd Rsp: addr=0x%lx, mdata=0x%x, data=0x", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
|
||||||
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
|
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
|
||||||
printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]);
|
printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]);
|
||||||
printf("\n");*/
|
printf("\n");*/
|
||||||
|
@ -398,7 +398,7 @@ private:
|
||||||
cci_req.mdata = device_->af2cp_sTxPort_c0_hdr_mdata;
|
cci_req.mdata = device_->af2cp_sTxPort_c0_hdr_mdata;
|
||||||
auto host_ptr = (uint64_t*)(device_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
|
auto host_ptr = (uint64_t*)(device_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
|
||||||
memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE);
|
memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE);
|
||||||
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, device_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
|
//printf("%0ld: [sim] CCI Rd Req: addr=0x%lx, mdata=0x%x\n", timestamp, device_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
|
||||||
cci_reads_.emplace_back(cci_req);
|
cci_reads_.emplace_back(cci_req);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -453,7 +453,7 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, byte_addr);
|
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, 0x%x, data=0x", timestamp, b, byte_addr);
|
||||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,7 +65,7 @@ THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count()
|
||||||
VL_FLAGS += -j $(THREADS)
|
VL_FLAGS += -j $(THREADS)
|
||||||
#VL_FLAGS += --threads $(THREADS)
|
#VL_FLAGS += --threads $(THREADS)
|
||||||
|
|
||||||
# Debugigng
|
# Debugging
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||||
|
|
|
@ -39,6 +39,7 @@ typedef VVortex Device;
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include <dram_sim.h>
|
#include <dram_sim.h>
|
||||||
|
#include <util.h>
|
||||||
|
|
||||||
#ifndef MEMORY_BANKS
|
#ifndef MEMORY_BANKS
|
||||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||||
|
@ -316,11 +317,11 @@ private:
|
||||||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||||
auto mem_rsp = *mem_rsp_it;
|
auto mem_rsp = *mem_rsp_it;
|
||||||
/*
|
/*
|
||||||
printf("%0ld: [sim] MEM Rd Rsp: addr=%0lx, data=", timestamp, mem_rsp->addr);
|
printf("%0ld: [sim] MEM Rd Rsp: addr=0x%0lx, data=0x", timestamp, mem_rsp->addr);
|
||||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||||
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]);
|
printf("%02x", mem_rsp->block[i]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
*/
|
*/
|
||||||
device_->m_axi_rvalid[0] = 1;
|
device_->m_axi_rvalid[0] = 1;
|
||||||
device_->m_axi_rid[0] = mem_rsp->tag;
|
device_->m_axi_rid[0] = mem_rsp->tag;
|
||||||
|
@ -347,7 +348,7 @@ private:
|
||||||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||||
auto mem_rsp = *mem_rsp_it;
|
auto mem_rsp = *mem_rsp_it;
|
||||||
/*
|
/*
|
||||||
printf("%0ld: [sim] MEM Wr Rsp: addr=%0lx\n", timestamp, mem_rsp->addr);
|
printf("%0ld: [sim] MEM Wr Rsp: addr=0x%0lx\n", timestamp, mem_rsp->addr);
|
||||||
*/
|
*/
|
||||||
device_->m_axi_bvalid[0] = 1;
|
device_->m_axi_bvalid[0] = 1;
|
||||||
device_->m_axi_bid[0] = mem_rsp->tag;
|
device_->m_axi_bid[0] = mem_rsp->tag;
|
||||||
|
@ -387,11 +388,15 @@ private:
|
||||||
} else {
|
} else {
|
||||||
// process writes
|
// process writes
|
||||||
/*
|
/*
|
||||||
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen);
|
printf("%0ld: [sim] MEM Wr: addr=0x%0lx, byteen=0x", timestamp, base_addr);
|
||||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
|
||||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf(", data=0x");
|
||||||
|
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||||
|
printf("%02x", data[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
*/
|
*/
|
||||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||||
if ((byteen >> i) & 0x1) {
|
if ((byteen >> i) & 0x1) {
|
||||||
|
@ -459,13 +464,13 @@ private:
|
||||||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||||
auto mem_rsp = *mem_rsp_it;
|
auto mem_rsp = *mem_rsp_it;
|
||||||
/*
|
/*
|
||||||
printf("%0ld: [sim] MEM Rd: tag=%0lx, addr=%0lx, data=", timestamp, mem_rsp->tag, mem_rsp->addr);
|
printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
|
||||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||||
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]);
|
printf("%02x", mem_rsp->block[i]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
*/
|
*/
|
||||||
memcpy(device_->mem_rsp_data.data(), mem_rsp->block.data(), MEM_BLOCK_SIZE);
|
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data), mem_rsp->block.data(), MEM_BLOCK_SIZE);
|
||||||
device_->mem_rsp_tag = mem_rsp->tag;
|
device_->mem_rsp_tag = mem_rsp->tag;
|
||||||
pending_mem_reqs_.erase(mem_rsp_it);
|
pending_mem_reqs_.erase(mem_rsp_it);
|
||||||
mem_rd_rsp_active_ = true;
|
mem_rd_rsp_active_ = true;
|
||||||
|
@ -480,7 +485,7 @@ private:
|
||||||
uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
|
uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
|
||||||
if (device_->mem_req_rw) {
|
if (device_->mem_req_rw) {
|
||||||
auto byteen = device_->mem_req_byteen;
|
auto byteen = device_->mem_req_byteen;
|
||||||
auto data = (uint8_t*)(device_->mem_req_data.data());
|
auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data);
|
||||||
|
|
||||||
if (byte_addr >= uint64_t(IO_COUT_ADDR)
|
if (byte_addr >= uint64_t(IO_COUT_ADDR)
|
||||||
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||||
|
@ -499,11 +504,15 @@ private:
|
||||||
} else {
|
} else {
|
||||||
// process writes
|
// process writes
|
||||||
/*
|
/*
|
||||||
printf("%0ld: [sim] MEM Wr: tag=%0lx, addr=%0x, byteen=%0lx, data=", timestamp, device_->mem_req_tag, byte_addr, byteen);
|
printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr);
|
||||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
|
||||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf(", data=0x");
|
||||||
|
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||||
|
printf("%d=%02x,", i, data[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
*/
|
*/
|
||||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||||
if ((byteen >> i) & 0x1) {
|
if ((byteen >> i) & 0x1) {
|
||||||
|
@ -530,7 +539,7 @@ private:
|
||||||
ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE);
|
ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE);
|
||||||
pending_mem_reqs_.emplace_back(mem_req);
|
pending_mem_reqs_.emplace_back(mem_req);
|
||||||
|
|
||||||
//printf("%0ld: [sim] MEM Rd Req: addr=%0x, tag=%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
|
//printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
|
||||||
|
|
||||||
// send dram request
|
// send dram request
|
||||||
dram_queue_.push(mem_req);
|
dram_queue_.push(mem_req);
|
||||||
|
|
|
@ -24,7 +24,7 @@ LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulato
|
||||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||||
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
|
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
|
||||||
|
|
||||||
# Debugigng
|
# Debugging
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG)
|
CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG)
|
||||||
#CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG) -fsanitize=address -fno-omit-frame-pointer
|
#CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG) -fsanitize=address -fno-omit-frame-pointer
|
||||||
|
|
|
@ -77,8 +77,8 @@ public:
|
||||||
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
|
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
|
caches_.at(i)->MemReqPorts.at(0).bind(&cache_arb->ReqIn.at(i));
|
||||||
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
|
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
|
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
using namespace vortex;
|
using namespace vortex;
|
||||||
|
|
||||||
|
@ -315,27 +316,75 @@ public:
|
||||||
simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i));
|
simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i));
|
||||||
bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
|
bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
|
||||||
}
|
}
|
||||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort);
|
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||||
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0));
|
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
|
if (strcmp(simobject->name().c_str(), "l3cache")) {
|
||||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort);
|
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
|
||||||
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0));
|
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||||
|
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
|
||||||
|
|
||||||
if (config.B != 0) {
|
if (config.B != 0) {
|
||||||
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
|
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
|
||||||
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
|
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
|
||||||
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
|
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
|
||||||
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
|
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
|
||||||
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||||
|
}
|
||||||
|
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||||
|
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
|
||||||
|
} else {
|
||||||
|
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||||
|
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
||||||
}
|
}
|
||||||
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
|
||||||
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
|
|
||||||
} else {
|
} else {
|
||||||
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
// TODO: Change this into a crossbar
|
||||||
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
uint32_t max = MAX(2, config_.num_inputs);
|
||||||
|
//printf("%s connecting\n", simobject_->name().c_str());
|
||||||
|
//3
|
||||||
|
if (config.B != 0) {
|
||||||
|
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, max, max);
|
||||||
|
for (uint32_t i = 0; i < max; ++i) {
|
||||||
|
//printf("%s connecting input=%d to MemPorts\n", simobject_->name().c_str(), i);
|
||||||
|
bypass_switch_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B)));
|
||||||
|
simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_switch_->RspOut.at(i));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
|
||||||
|
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||||
|
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.B != 0)
|
||||||
|
{
|
||||||
|
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
|
||||||
|
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B));
|
||||||
|
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i)
|
||||||
|
{
|
||||||
|
//1
|
||||||
|
//printf("%s Connecting memory ports to bank=%d\n", simobject_->name().c_str(), i);
|
||||||
|
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
|
||||||
|
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||||
|
}
|
||||||
|
//2
|
||||||
|
if (config_.num_inputs > 1) {
|
||||||
|
for (uint32_t i = 0; i < max; ++i) {
|
||||||
|
//printf("%s connecting bank and bypass port=%d\n", simobject_->name().c_str(), i);
|
||||||
|
bank_switch_->ReqOut.at(i % (1 << config.B)).bind(&bypass_switch_->ReqIn.at(i));
|
||||||
|
bypass_switch_->RspIn.at(i).bind(&bank_switch_->RspOut.at(i % (1 << config.B)));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||||
|
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||||
|
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// calculate cache initialization cycles
|
// calculate cache initialization cycles
|
||||||
|
@ -673,8 +722,8 @@ CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config
|
||||||
: SimObject<CacheSim>(ctx, name)
|
: SimObject<CacheSim>(ctx, name)
|
||||||
, CoreReqPorts(config.num_inputs, this)
|
, CoreReqPorts(config.num_inputs, this)
|
||||||
, CoreRspPorts(config.num_inputs, this)
|
, CoreRspPorts(config.num_inputs, this)
|
||||||
, MemReqPort(this)
|
, MemReqPorts(NUM_MEM_PORTS, this)
|
||||||
, MemRspPort(this)
|
, MemRspPorts(NUM_MEM_PORTS, this)
|
||||||
, impl_(new Impl(this, config))
|
, impl_(new Impl(this, config))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue