mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
merge from master branch
This commit is contained in:
commit
e91eb4aed4
124 changed files with 1933 additions and 1718 deletions
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
|
@ -219,7 +219,9 @@ jobs:
|
|||
runs-on: ubuntu-20.04
|
||||
needs: build_vm
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
name: [regression, opencl, cache, config1, config2, debug, stress, vm]
|
||||
xlen: [32, 64]
|
||||
|
||||
steps:
|
||||
|
@ -267,4 +269,4 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Check Completion
|
||||
run: echo "All matrix jobs passed"
|
||||
run: echo "All matrix jobs passed"
|
||||
|
|
|
@ -44,10 +44,10 @@ clean: clean-build
|
|||
$(MAKE) -C $(VORTEX_HOME)/third_party clean
|
||||
|
||||
# Install setup
|
||||
KERNEL_INC_DST = $(PREFIX)/kernel/include
|
||||
KERNEL_LIB_DST = $(PREFIX)/kernel/lib$(XLEN)
|
||||
RUNTIME_INC_DST = $(PREFIX)/runtime/include
|
||||
RUNTIME_LIB_DST = $(PREFIX)/runtime/lib
|
||||
KERNEL_INC_DST = $(INSTALLDIR)/kernel/include
|
||||
KERNEL_LIB_DST = $(INSTALLDIR)/kernel/lib$(XLEN)
|
||||
RUNTIME_INC_DST = $(INSTALLDIR)/runtime/include
|
||||
RUNTIME_LIB_DST = $(INSTALLDIR)/runtime/lib
|
||||
|
||||
KERNEL_HEADERS = $(wildcard $(VORTEX_HOME)/kernel/include/*.h)
|
||||
KERNEL_LIBS = $(wildcard kernel/*.a)
|
||||
|
|
69
README.md
69
README.md
|
@ -1,5 +1,3 @@
|
|||
[](https://travis-ci.com/vortexgpgpu/vortex)
|
||||
|
||||
# Vortex GPGPU
|
||||
|
||||
Vortex is a full-stack open-source RISC-V GPGPU.
|
||||
|
@ -47,20 +45,20 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
|
|||
- [Yosys](https://github.com/YosysHQ/yosys)
|
||||
- [Sv2v](https://github.com/zachjs/sv2v)
|
||||
### Install development tools
|
||||
```
|
||||
sudo apt-get install build-essential
|
||||
sudo apt-get install binutils
|
||||
sudo apt-get install python
|
||||
sudo apt-get install uuid-dev
|
||||
sudo apt-get install git
|
||||
```sh
|
||||
sudo apt-get install build-essential
|
||||
sudo apt-get install binutils
|
||||
sudo apt-get install python
|
||||
sudo apt-get install uuid-dev
|
||||
sudo apt-get install git
|
||||
```
|
||||
### Install Vortex codebase
|
||||
```sh
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
cd vortex
|
||||
```
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git -b vortex_vm
|
||||
cd vortex
|
||||
```
|
||||
|
||||
### Configure your build folder
|
||||
```sh
|
||||
#
|
||||
# By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir.
|
||||
# This is the example for volvo server
|
||||
|
@ -72,38 +70,45 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
|
|||
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR
|
||||
# Run the following instead to enable virtual memory feature in compilation
|
||||
../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR --vm_enable=1
|
||||
|
||||
```
|
||||
### Install prebuilt toolchain
|
||||
# We will use the precomipled tools in volvo toolchanin directory
|
||||
### set environment variables
|
||||
```sh
|
||||
# should always run before using the toolchain!
|
||||
source ./ci/toolchain_env.sh
|
||||
```
|
||||
### Building Vortex
|
||||
make -s
|
||||
```sh
|
||||
make -s
|
||||
```
|
||||
|
||||
### Quick demo running vecadd OpenCL kernel on 2 cores
|
||||
$ ./ci/blackbox.sh --cores=2 --app=vecadd
|
||||
```sh
|
||||
./ci/blackbox.sh --cores=2 --app=vecadd
|
||||
```
|
||||
|
||||
### Common Developer Tips
|
||||
- Installing Vortex kernel and runtime libraries to use with external tools requires passing --prefix=<install-path> to the configure script.
|
||||
```sh
|
||||
$ ../configure --xlen=32 --tooldir=$HOME/tools --prefix=<install-path>
|
||||
$ make -s
|
||||
$ make install
|
||||
``````
|
||||
```sh
|
||||
../configure --xlen=32 --tooldir=$HOME/tools --prefix=<install-path>
|
||||
make -s
|
||||
make install
|
||||
```
|
||||
- Building Vortex 64-bit simply requires using --xlen=64 configure option.
|
||||
```sh
|
||||
$ ../configure --xlen=32 --tooldir=$HOME/tools
|
||||
```
|
||||
```sh
|
||||
../configure --xlen=32 --tooldir=$HOME/tools
|
||||
```
|
||||
- Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source <build-path>/ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login.
|
||||
```sh
|
||||
$ echo "source <build-path>/ci/toolchain_env.sh" >> ~/.bashrc
|
||||
```
|
||||
```sh
|
||||
echo "source <build-path>/ci/toolchain_env.sh" >> ~/.bashrc
|
||||
```
|
||||
- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder.
|
||||
```sh
|
||||
$ ../configure
|
||||
```
|
||||
```sh
|
||||
../configure
|
||||
```
|
||||
- To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information.
|
||||
```sh
|
||||
$ ./ci/blackbox.sh --app=demo --debug=3
|
||||
```
|
||||
```sh
|
||||
./ci/blackbox.sh --app=demo --debug=3
|
||||
```
|
||||
- For additional information, check out the /docs.
|
||||
|
|
|
@ -23,6 +23,8 @@ rm -f blackbox.*.cache
|
|||
|
||||
XLEN=${XLEN:=@XLEN@}
|
||||
|
||||
XSIZE=$((XLEN / 8))
|
||||
|
||||
echo "Vortex Regression Test: XLEN=$XLEN"
|
||||
|
||||
unittest()
|
||||
|
@ -99,11 +101,11 @@ regression()
|
|||
|
||||
# test global barrier
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
|
||||
# test local barrier
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
|
||||
./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tbar"
|
||||
./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar"
|
||||
|
||||
echo "regression tests done!"
|
||||
}
|
||||
|
@ -148,32 +150,54 @@ vm(){
|
|||
echo "vm tests done!"
|
||||
}
|
||||
|
||||
test_csv_trace()
|
||||
cache()
|
||||
{
|
||||
# test CSV trace generation
|
||||
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
|
||||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
# clean build
|
||||
make -C sim/simx clean
|
||||
make -C sim/rtlsim clean
|
||||
}
|
||||
echo "begin cache tests..."
|
||||
|
||||
debug()
|
||||
{
|
||||
echo "begin debugging tests..."
|
||||
# disable local memory
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
|
||||
|
||||
test_csv_trace
|
||||
# disable L1 cache
|
||||
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
|
||||
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
|
||||
# reduce l1 line size
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
echo "debugging tests done!"
|
||||
# test cache ways
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test writeback
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
|
||||
# cache clustering
|
||||
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2
|
||||
|
||||
# L2/L3
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=4 --l2cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
|
||||
echo "begin cache tests..."
|
||||
}
|
||||
|
||||
config1()
|
||||
|
@ -189,10 +213,12 @@ config1()
|
|||
./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge
|
||||
|
||||
# cores clustering
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=4 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
|
||||
# issue width
|
||||
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
|
@ -212,22 +238,19 @@ config1()
|
|||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
|
||||
# FPU's PE scaling
|
||||
CONFIGS="-DFMA_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfmadd"
|
||||
CONFIGS="-DFCVT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tftoi"
|
||||
CONFIGS="-DFDIV_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfdiv"
|
||||
CONFIGS="-DFSQRT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfsqrt"
|
||||
CONFIGS="-DFNCP_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfclamp"
|
||||
|
||||
# LSU scaling
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
|
||||
# L2/L3
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
|
||||
# multiple L1 caches per socket
|
||||
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=8 --warps=1 --threads=2
|
||||
|
||||
echo "configuration-1 tests done!"
|
||||
}
|
||||
|
||||
|
@ -262,55 +285,63 @@ config2()
|
|||
# disabling ZICOND extension
|
||||
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
|
||||
|
||||
# disable local memory
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
|
||||
|
||||
# test AXI bus
|
||||
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=demo
|
||||
|
||||
# disable L1 cache
|
||||
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
|
||||
# reduce l1 line size
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=simx --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache ways
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
|
||||
# test 128-bit MEM block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=demo
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
|
||||
# test XLEN-bit MEM block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
|
||||
# test memory coalescing
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
|
||||
|
||||
# test single-bank DRAM
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=demo
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
|
||||
# test 27-bit DRAM address
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=demo
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
|
||||
echo "configuration-2 tests done!"
|
||||
}
|
||||
|
||||
test_csv_trace()
|
||||
{
|
||||
# test CSV trace generation
|
||||
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
|
||||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
# clean build
|
||||
make -C sim/simx clean
|
||||
make -C sim/rtlsim clean
|
||||
}
|
||||
|
||||
debug()
|
||||
{
|
||||
echo "begin debugging tests..."
|
||||
|
||||
test_csv_trace
|
||||
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
|
||||
|
||||
echo "debugging tests done!"
|
||||
}
|
||||
|
||||
stress()
|
||||
{
|
||||
echo "begin stress tests..."
|
||||
|
||||
# test verilator reset values
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache
|
||||
|
||||
echo "stress tests done!"
|
||||
|
@ -329,19 +360,14 @@ synthesis()
|
|||
show_usage()
|
||||
{
|
||||
echo "Vortex Regression Test"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]"
|
||||
}
|
||||
|
||||
start=$SECONDS
|
||||
|
||||
declare -a tests=()
|
||||
clean=0
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
--vm )
|
||||
tests+=("vm")
|
||||
;;
|
||||
--clean )
|
||||
clean=1
|
||||
;;
|
||||
|
@ -360,6 +386,12 @@ while [ "$1" != "" ]; do
|
|||
--opencl )
|
||||
tests+=("opencl")
|
||||
;;
|
||||
--cache )
|
||||
tests+=("cache")
|
||||
;;
|
||||
--vm )
|
||||
tests+=("vm")
|
||||
;;
|
||||
--config1 )
|
||||
tests+=("config1")
|
||||
;;
|
||||
|
@ -382,6 +414,7 @@ while [ "$1" != "" ]; do
|
|||
tests+=("kernel")
|
||||
tests+=("regression")
|
||||
tests+=("opencl")
|
||||
tests+=("cache")
|
||||
tests+=("config1")
|
||||
tests+=("config2")
|
||||
tests+=("debug")
|
||||
|
@ -405,6 +438,8 @@ then
|
|||
make -s
|
||||
fi
|
||||
|
||||
start=$SECONDS
|
||||
|
||||
for test in "${tests[@]}"; do
|
||||
$test
|
||||
done
|
||||
|
|
|
@ -19,6 +19,8 @@ import csv
|
|||
import re
|
||||
import inspect
|
||||
|
||||
configs = None
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='CPU trace log to CSV format converter.')
|
||||
parser.add_argument('-t', '--type', default='simx', help='log type (rtlsim or simx)')
|
||||
|
@ -26,6 +28,24 @@ def parse_args():
|
|||
parser.add_argument('log', help='Input log file')
|
||||
return parser.parse_args()
|
||||
|
||||
def load_config(filename):
|
||||
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=0x([0-9a-fA-F]+), num_barriers=(\d+)"
|
||||
with open(filename, 'r') as file:
|
||||
for line in file:
|
||||
config_match = re.search(config_pattern, line)
|
||||
if config_match:
|
||||
config = {
|
||||
'num_threads': int(config_match.group(1)),
|
||||
'num_warps': int(config_match.group(2)),
|
||||
'num_cores': int(config_match.group(3)),
|
||||
'num_clusters': int(config_match.group(4)),
|
||||
'socket_size': int(config_match.group(5)),
|
||||
'local_mem_base': int(config_match.group(6), 16),
|
||||
'num_barriers': int(config_match.group(7)),
|
||||
}
|
||||
return config
|
||||
return None
|
||||
|
||||
def parse_simx(log_lines):
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
|
||||
|
@ -46,10 +66,10 @@ def parse_simx(log_lines):
|
|||
instr_data = {}
|
||||
instr_data["lineno"] = lineno
|
||||
instr_data["PC"] = re.search(pc_pattern, line).group(1)
|
||||
instr_data["core_id"] = re.search(core_id_pattern, line).group(1)
|
||||
instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1)
|
||||
instr_data["core_id"] = int(re.search(core_id_pattern, line).group(1))
|
||||
instr_data["warp_id"] = int(re.search(warp_id_pattern, line).group(1))
|
||||
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
|
||||
instr_data["uuid"] = re.search(uuid_pattern, line).group(1)
|
||||
instr_data["uuid"] = int(re.search(uuid_pattern, line).group(1))
|
||||
elif line.startswith("DEBUG Instr"):
|
||||
instr_data["instr"] = re.search(instr_pattern, line).group(1)
|
||||
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
|
||||
|
@ -60,6 +80,7 @@ def parse_simx(log_lines):
|
|||
instr_data["destination"] = re.search(destination_pattern, line).group(1)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
instr_data = None
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
return entries
|
||||
|
@ -95,7 +116,7 @@ def append_value(text, reg, value, tmask_arr, sep):
|
|||
return text, sep
|
||||
|
||||
def parse_rtlsim(log_lines):
|
||||
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=(\d+), num_barriers=(\d+)"
|
||||
global configs
|
||||
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
|
||||
|
@ -117,36 +138,20 @@ def parse_rtlsim(log_lines):
|
|||
uuid_pattern = r"#(\d+)"
|
||||
entries = []
|
||||
instr_data = {}
|
||||
num_threads = 0
|
||||
num_warps = 0
|
||||
num_cores = 0
|
||||
num_clusters = 0
|
||||
socket_size = 0
|
||||
local_mem_base = 0
|
||||
num_barriers = 0
|
||||
num_sockets = 0
|
||||
num_cores = configs['num_cores']
|
||||
socket_size = configs['socket_size']
|
||||
num_sockets = (num_cores + socket_size - 1) // socket_size
|
||||
for lineno, line in enumerate(log_lines, start=1):
|
||||
try:
|
||||
config_match = re.search(config_pattern, line)
|
||||
if config_match:
|
||||
num_threads = int(config_match.group(1))
|
||||
num_warps = int(config_match.group(2))
|
||||
num_cores = int(config_match.group(3))
|
||||
num_clusters = int(config_match.group(4))
|
||||
socket_size = int(config_match.group(5))
|
||||
local_mem_base = int(config_match.group(6))
|
||||
num_barriers = int(config_match.group(7))
|
||||
num_sockets = (num_cores + socket_size - 1) // socket_size
|
||||
continue
|
||||
line_match = re.search(line_pattern, line)
|
||||
if line_match:
|
||||
PC = re.search(pc_pattern, line).group(1)
|
||||
warp_id = re.search(warp_id_pattern, line).group(1)
|
||||
warp_id = int(re.search(warp_id_pattern, line).group(1))
|
||||
tmask = re.search(tmask_pattern, line).group(1)
|
||||
uuid = re.search(uuid_pattern, line).group(1)
|
||||
cluster_id = line_match.group(1)
|
||||
socket_id = line_match.group(2)
|
||||
core_id = line_match.group(3)
|
||||
uuid = int(re.search(uuid_pattern, line).group(1))
|
||||
cluster_id = int(line_match.group(1))
|
||||
socket_id = int(line_match.group(2))
|
||||
core_id = int(line_match.group(3))
|
||||
stage = line_match.group(4)
|
||||
if stage == "decode":
|
||||
trace = {}
|
||||
|
@ -273,7 +278,9 @@ def split_log_file(log_filename):
|
|||
return sublogs
|
||||
|
||||
def main():
|
||||
global configs
|
||||
args = parse_args()
|
||||
configs = load_config(args.log)
|
||||
sublogs = split_log_file(args.log)
|
||||
write_csv(sublogs, args.csv, args.type)
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ TOOLDIR ?= @TOOLDIR@
|
|||
|
||||
OSVERSION ?= @OSVERSION@
|
||||
|
||||
PREFIX ?= @PREFIX@
|
||||
INSTALLDIR ?= @INSTALLDIR@
|
||||
|
||||
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
|
||||
|
||||
|
|
4
configure
vendored
4
configure
vendored
|
@ -63,7 +63,7 @@ copy_files() {
|
|||
filename_no_ext="${filename%.in}"
|
||||
dest_file="$dest_dir/$filename_no_ext"
|
||||
mkdir -p "$dest_dir"
|
||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file"
|
||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file"
|
||||
# apply permissions to bash scripts
|
||||
read -r firstline < "$dest_file"
|
||||
if [[ "$firstline" =~ ^#!.*bash ]]; then
|
||||
|
@ -178,4 +178,4 @@ THIRD_PARTY_DIR=$SCRIPT_DIR/third_party
|
|||
|
||||
copy_files "$SCRIPT_DIR" "$CURRENT_DIR"
|
||||
|
||||
echo "VM Enable: "$VM_ENABLE
|
||||
echo "VM Enable: "$VM_ENABLE
|
||||
|
|
|
@ -53,9 +53,9 @@ A waveform trace `trace.vcd` will be generated in the current directory during t
|
|||
## Analyzing Vortex trace log
|
||||
|
||||
When debugging Vortex RTL or SimX Simulator, reading the trace run.log file can be overwhelming when the trace gets really large.
|
||||
We provide a trace sanitizer tool under ./hw/scripts/trace_csv.py that you can use to convert the large trace into a CSV file containing all the instructions that executed with their source and destination operands. To increase compatibility between traces you will need to initialize RTLSIM's GPRs to zero by defining GPR_RESET.
|
||||
We provide a trace sanitizer tool under ./hw/scripts/trace_csv.py that you can use to convert the large trace into a CSV file containing all the instructions that executed with their source and destination operands.
|
||||
|
||||
$ CONFIGS="-DGPR_RESET" ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=3 --log=run_rtlsim.log
|
||||
$ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=3 --log=run_rtlsim.log
|
||||
$ ./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
|
||||
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=3 --log=run_simx.log
|
||||
|
|
|
@ -96,10 +96,11 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
.CRSQ_SIZE (`L2_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L2_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L2_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L2_MREQ_SIZE),
|
||||
.MREQ_SIZE (`L2_WRITEBACK ? `L2_MSHR_SIZE : `L2_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`L2_WRITEBACK),
|
||||
.DIRTY_BYTES (`L2_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
|
|
|
@ -217,7 +217,7 @@
|
|||
`ifndef IO_COUT_ADDR
|
||||
`define IO_COUT_ADDR `IO_BASE_ADDR
|
||||
`endif
|
||||
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
|
||||
`define IO_COUT_SIZE 64
|
||||
|
||||
`ifndef IO_MPM_ADDR
|
||||
`define IO_MPM_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE)
|
||||
|
@ -685,7 +685,7 @@
|
|||
|
||||
// Number of Banks
|
||||
`ifndef L3_NUM_BANKS
|
||||
`define L3_NUM_BANKS `MIN(4, `NUM_CLUSTERS)
|
||||
`define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS)
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
|
@ -718,6 +718,15 @@
|
|||
`define L3_WRITEBACK 0
|
||||
`endif
|
||||
|
||||
`ifndef MEMORY_BANKS
|
||||
`define MEMORY_BANKS 8
|
||||
`endif
|
||||
|
||||
// Number of Memory Ports from LLC
|
||||
`ifndef NUM_MEM_PORTS
|
||||
`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS)
|
||||
`endif
|
||||
|
||||
// ISA Extensions /////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef EXT_A_ENABLE
|
||||
|
|
|
@ -238,11 +238,11 @@
|
|||
`define RESET_RELAY(dst, src) \
|
||||
`RESET_RELAY_EX (dst, src, 1, 0)
|
||||
|
||||
// size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2
|
||||
`define TO_OUT_BUF_SIZE(out_reg) `MIN(out_reg, 2)
|
||||
// size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2, 5 -> 2
|
||||
`define TO_OUT_BUF_SIZE(s) `MIN(s, 2)
|
||||
|
||||
// reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2
|
||||
`define TO_OUT_BUF_REG(out_reg) ((out_reg & 1) + ((out_reg >> 2) << 1))
|
||||
// reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2, 5 > 3
|
||||
`define TO_OUT_BUF_REG(s) ((s < 2) ? s : (s - 2))
|
||||
|
||||
`define REPEAT(n,f,s) `_REPEAT_``n(f,s)
|
||||
`define _REPEAT_0(f,s)
|
||||
|
|
|
@ -145,11 +145,12 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.CRSQ_SIZE (`DCACHE_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`DCACHE_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`DCACHE_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`DCACHE_MREQ_SIZE),
|
||||
.MREQ_SIZE (`DCACHE_WRITEBACK ? `DCACHE_MSHR_SIZE : `DCACHE_MREQ_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`DCACHE_WRITEBACK),
|
||||
.DIRTY_BYTES (`DCACHE_WRITEBACK),
|
||||
.NC_ENABLE (1),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2)
|
||||
|
@ -178,8 +179,6 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[0], icache_mem_bus_if, L1_MEM_TAG_WIDTH, ICACHE_MEM_TAG_WIDTH);
|
||||
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[1], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
|
||||
|
||||
`RESET_RELAY (mem_arb_reset, reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATA_SIZE (`L1_LINE_SIZE),
|
||||
|
@ -190,7 +189,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.RSP_OUT_BUF (2)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_arb_reset),
|
||||
.reset (reset),
|
||||
.bus_in_if (l1_mem_bus_if),
|
||||
.bus_out_if (l1_mem_arb_bus_if)
|
||||
);
|
||||
|
|
|
@ -166,6 +166,10 @@
|
|||
`define VX_CSR_MPM_MEM_WRITES_H 12'hB99
|
||||
`define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency
|
||||
`define VX_CSR_MPM_MEM_LT_H 12'hB9A
|
||||
`define VX_CSR_MPM_MEM_BANK_CNTR 12'hB1E // memory bank requests
|
||||
`define VX_CSR_MPM_MEM_BANK_CNTR_H 12'hB9E
|
||||
`define VX_CSR_MPM_MEM_BANK_TICK 12'hB1F // memory ticks
|
||||
`define VX_CSR_MPM_MEM_BANK_TICK_H 12'hB9F
|
||||
// PERF: lmem
|
||||
`define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads
|
||||
`define VX_CSR_MPM_LMEM_READS_H 12'hB9B
|
||||
|
|
|
@ -80,10 +80,11 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
.CRSQ_SIZE (`L3_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L3_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L3_MREQ_SIZE),
|
||||
.MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`L3_WRITEBACK),
|
||||
.DIRTY_BYTES (`L3_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
|
@ -192,12 +193,12 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw)
|
||||
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data));
|
||||
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data));
|
||||
else
|
||||
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen));
|
||||
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: MEM Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag, mem_rsp_data));
|
||||
`TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -240,13 +240,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
MMIO_CMD_ARG0: begin
|
||||
cmd_args[0] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%0h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_ARG1: begin
|
||||
cmd_args[1] <= 64'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%0h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
MMIO_CMD_ARG2: begin
|
||||
|
@ -263,13 +263,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
`ifdef SCOPE
|
||||
MMIO_SCOPE_WRITE: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%0h\n", $time, cmd_scope_wdata));
|
||||
`TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata));
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%0h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)));
|
||||
`TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)));
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
|
@ -305,14 +305,14 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
MMIO_SCOPE_READ: begin
|
||||
mmio_tx.data <= cmd_scope_rdata;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%0h\n", $time, cmd_scope_rdata));
|
||||
`TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata));
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
MMIO_DEV_CAPS: begin
|
||||
mmio_tx.data <= dev_caps;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%0h\n", $time, dev_caps));
|
||||
`TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps));
|
||||
`endif
|
||||
end
|
||||
MMIO_ISA_CAPS: begin
|
||||
|
@ -580,8 +580,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.TAG_WIDTH (AVS_REQ_TAGW+1)
|
||||
) mem_bus_if[1]();
|
||||
|
||||
`RESET_RELAY (mem_arb_reset, reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATA_SIZE (LMEM_DATA_SIZE),
|
||||
|
@ -592,7 +590,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.RSP_OUT_BUF (0)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_arb_reset),
|
||||
.reset (reset),
|
||||
.bus_in_if (cci_vx_mem_bus_if),
|
||||
.bus_out_if (mem_bus_if)
|
||||
);
|
||||
|
@ -760,7 +758,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE);
|
||||
end
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%0h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data));
|
||||
`TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
|
||||
|
@ -778,14 +776,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (cci_rdq_reset, reset);
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (CCI_RD_QUEUE_DATAW),
|
||||
.DEPTH (CCI_RD_QUEUE_SIZE)
|
||||
) cci_rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (cci_rdq_reset),
|
||||
.reset (reset),
|
||||
.push (cci_rdq_push),
|
||||
.pop (cci_rdq_pop),
|
||||
.data_in (cci_rdq_din),
|
||||
|
@ -906,7 +902,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
cci_wr_req_done <= 1;
|
||||
end
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%0h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data));
|
||||
`TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data));
|
||||
`endif
|
||||
end
|
||||
|
||||
|
@ -1093,13 +1089,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin
|
||||
if (avs_write[i] && ~avs_waitrequest[i]) begin
|
||||
`TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]));
|
||||
`TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]));
|
||||
end
|
||||
if (avs_read[i] && ~avs_waitrequest[i]) begin
|
||||
`TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i]));
|
||||
end
|
||||
if (avs_readdatavalid[i]) begin
|
||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%0h\n", $time, i, avs_readdata[i]));
|
||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i]));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -377,13 +377,13 @@ module VX_afu_wrap #(
|
|||
`TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]));
|
||||
end
|
||||
if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%0h\n", $time, i, m_axi_mem_wdata_a[i]));
|
||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i]));
|
||||
end
|
||||
if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i]));
|
||||
end
|
||||
if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin
|
||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]));
|
||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
94
hw/rtl/cache/VX_bank_flush.sv
vendored
94
hw/rtl/cache/VX_bank_flush.sv
vendored
|
@ -14,6 +14,7 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_bank_flush #(
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
|
@ -27,33 +28,36 @@ module VX_bank_flush #(
|
|||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire flush_in_valid,
|
||||
output wire flush_in_ready,
|
||||
output wire flush_out_init,
|
||||
output wire flush_out_valid,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] flush_out_line,
|
||||
output wire [NUM_WAYS-1:0] flush_out_way,
|
||||
input wire flush_out_ready,
|
||||
input wire mshr_empty
|
||||
input wire flush_begin,
|
||||
output wire flush_end,
|
||||
output wire flush_init,
|
||||
output wire flush_valid,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] flush_line,
|
||||
output wire [NUM_WAYS-1:0] flush_way,
|
||||
input wire flush_ready,
|
||||
input wire mshr_empty,
|
||||
input wire bank_empty
|
||||
);
|
||||
parameter CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);
|
||||
// ways interation is only needed when eviction is enabled
|
||||
localparam CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);
|
||||
|
||||
parameter STATE_IDLE = 2'd0;
|
||||
parameter STATE_INIT = 2'd1;
|
||||
parameter STATE_FLUSH = 2'd2;
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_INIT = 1;
|
||||
localparam STATE_WAIT1 = 2;
|
||||
localparam STATE_FLUSH = 3;
|
||||
localparam STATE_WAIT2 = 4;
|
||||
localparam STATE_DONE = 5;
|
||||
|
||||
reg [2:0] state_r, state_n;
|
||||
|
||||
reg [CTR_WIDTH-1:0] counter_r;
|
||||
reg [1:0] state_r, state_n;
|
||||
reg flush_in_ready_r, flush_in_ready_n;
|
||||
|
||||
always @(*) begin
|
||||
state_n = state_r;
|
||||
flush_in_ready_n = 0;
|
||||
case (state_r)
|
||||
// STATE_IDLE
|
||||
default: begin
|
||||
if (flush_in_valid && mshr_empty) begin
|
||||
state_n = STATE_FLUSH;
|
||||
STATE_IDLE: begin
|
||||
if (flush_begin) begin
|
||||
state_n = STATE_WAIT1;
|
||||
end
|
||||
end
|
||||
STATE_INIT: begin
|
||||
|
@ -61,25 +65,41 @@ module VX_bank_flush #(
|
|||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
if (counter_r == ((2 ** CTR_WIDTH)-1)) begin
|
||||
state_n = STATE_IDLE;
|
||||
flush_in_ready_n = 1;
|
||||
STATE_WAIT1: begin
|
||||
// wait for pending requests to complete
|
||||
if (mshr_empty) begin
|
||||
state_n = STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
|
||||
state_n = (BANK_ID == 0) ? STATE_DONE : STATE_WAIT2;
|
||||
end
|
||||
end
|
||||
STATE_WAIT2: begin
|
||||
// ensure the bank is empty before notifying the cache flush unit,
|
||||
// because the flush request to lower caches only goes through bank0
|
||||
// and it is important that request gets send out last.
|
||||
if (bank_empty) begin
|
||||
state_n = STATE_DONE;
|
||||
end
|
||||
end
|
||||
STATE_DONE: begin
|
||||
// generate a completion pulse
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state_r <= STATE_INIT;
|
||||
state_r <= STATE_INIT;
|
||||
counter_r <= '0;
|
||||
flush_in_ready_r <= '0;
|
||||
end else begin
|
||||
state_r <= state_n;
|
||||
flush_in_ready_r <= flush_in_ready_n;
|
||||
if (state_r != STATE_IDLE) begin
|
||||
if ((state_r == STATE_INIT) || flush_out_ready) begin
|
||||
if ((state_r == STATE_INIT)
|
||||
|| ((state_r == STATE_FLUSH) && flush_ready)) begin
|
||||
counter_r <= counter_r + CTR_WIDTH'(1);
|
||||
end
|
||||
end else begin
|
||||
|
@ -88,22 +108,20 @@ module VX_bank_flush #(
|
|||
end
|
||||
end
|
||||
|
||||
assign flush_in_ready = flush_in_ready_r;
|
||||
|
||||
assign flush_out_init = (state_r == STATE_INIT);
|
||||
|
||||
assign flush_out_valid = (state_r == STATE_FLUSH);
|
||||
assign flush_out_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
||||
assign flush_end = (state_r == STATE_DONE);
|
||||
assign flush_init = (state_r == STATE_INIT);
|
||||
assign flush_valid = (state_r == STATE_FLUSH);
|
||||
assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin
|
||||
reg [NUM_WAYS-1:0] flush_out_way_r;
|
||||
reg [NUM_WAYS-1:0] flush_way_r;
|
||||
always @(*) begin
|
||||
flush_out_way_r = '0;
|
||||
flush_out_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
|
||||
flush_way_r = '0;
|
||||
flush_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
|
||||
end
|
||||
assign flush_out_way = flush_out_way_r;
|
||||
assign flush_way = flush_way_r;
|
||||
end else begin
|
||||
assign flush_out_way = {NUM_WAYS{1'b1}};
|
||||
assign flush_way = {NUM_WAYS{1'b1}};
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
52
hw/rtl/cache/VX_cache.sv
vendored
52
hw/rtl/cache/VX_cache.sv
vendored
|
@ -45,6 +45,9 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -69,8 +72,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
VX_mem_bus_if.master mem_bus_if
|
||||
);
|
||||
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter"))
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter: number of banks must be power of 2"))
|
||||
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable"))
|
||||
`STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback"))
|
||||
|
||||
// In writeback mode, memory fill response may issue a new memory request to handle evicted blocks.
|
||||
// We need to ensure that the memory request queue never fills up to avoid deadlock.
|
||||
`STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE"))
|
||||
|
||||
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
|
||||
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
|
||||
|
@ -101,26 +109,23 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) core_bus2_if[NUM_REQS]();
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_ready;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_begin;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_end;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_fire;
|
||||
|
||||
// this reset relay is required to sync with bank initialization
|
||||
`RESET_RELAY (flush_reset, reset);
|
||||
|
||||
VX_cache_flush #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (flush_reset),
|
||||
.reset (reset),
|
||||
.core_bus_in_if (core_bus_if),
|
||||
.core_bus_out_if (core_bus2_if),
|
||||
.bank_req_fire (per_bank_core_req_fire),
|
||||
.flush_valid (per_bank_flush_valid),
|
||||
.flush_ready (per_bank_flush_ready)
|
||||
.flush_begin (per_bank_flush_begin),
|
||||
.flush_end (per_bank_flush_end)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
@ -131,9 +136,9 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_s;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
`RESET_RELAY_EX (core_rsp_reset, reset, NUM_REQS, `MAX_FANOUT);
|
||||
|
||||
`RESET_RELAY (core_rsp_reset, reset);
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
|
||||
|
@ -141,7 +146,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
) core_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (core_rsp_reset),
|
||||
.reset (core_rsp_reset[i]),
|
||||
.valid_in (core_rsp_valid_s[i]),
|
||||
.ready_in (core_rsp_ready_s[i]),
|
||||
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
|
||||
|
@ -165,15 +170,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
wire mem_bus_if_flush;
|
||||
|
||||
`RESET_RELAY (mem_req_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (mem_req_reset),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_valid_s),
|
||||
.ready_in (mem_req_ready_s),
|
||||
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}),
|
||||
|
@ -192,15 +195,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||
wire mem_rsp_ready_s;
|
||||
|
||||
`RESET_RELAY (mem_rsp_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
|
||||
.SIZE (MRSQ_SIZE),
|
||||
.OUT_REG (MRSQ_SIZE > 2)
|
||||
) mem_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (mem_rsp_reset),
|
||||
.reset (reset),
|
||||
.valid_in (mem_bus_if.rsp_valid),
|
||||
.ready_in (mem_bus_if.rsp_ready),
|
||||
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
|
||||
|
@ -316,6 +317,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (CORE_REQ_DATAW),
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (REQ_XBAR_BUF)
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
|
@ -373,6 +375,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
|
@ -423,8 +426,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
|
||||
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
|
||||
|
||||
.flush_valid (per_bank_flush_valid[bank_id]),
|
||||
.flush_ready (per_bank_flush_ready[bank_id])
|
||||
.flush_begin (per_bank_flush_begin[bank_id]),
|
||||
.flush_end (per_bank_flush_end[bank_id])
|
||||
);
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
|
@ -448,7 +451,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.NUM_OUTPUTS (NUM_REQS),
|
||||
.DATAW (CORE_RSP_DATAW)
|
||||
.DATAW (CORE_RSP_DATAW),
|
||||
.ARBITER ("F")
|
||||
) rsp_xbar (
|
||||
.clk (clk),
|
||||
.reset (rsp_xbar_reset),
|
||||
|
@ -494,15 +498,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
};
|
||||
end
|
||||
|
||||
`RESET_RELAY (mem_arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1),
|
||||
.ARBITER ("F")
|
||||
) mem_req_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_arb_reset),
|
||||
.reset (reset),
|
||||
.valid_in (per_bank_mem_req_valid),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.data_in (data_in),
|
||||
|
|
238
hw/rtl/cache/VX_cache_bank.sv
vendored
238
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -44,6 +44,9 @@ module VX_cache_bank #(
|
|||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -105,8 +108,8 @@ module VX_cache_bank #(
|
|||
output wire mem_rsp_ready,
|
||||
|
||||
// flush
|
||||
input wire flush_valid,
|
||||
output wire flush_ready
|
||||
input wire flush_begin,
|
||||
output wire flush_end
|
||||
);
|
||||
|
||||
localparam PIPELINE_STAGES = 2;
|
||||
|
@ -117,6 +120,7 @@ module VX_cache_bank #(
|
|||
|
||||
wire crsp_queue_stall;
|
||||
wire mshr_alm_full;
|
||||
wire mreq_queue_empty;
|
||||
wire mreq_queue_alm_full;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
|
||||
|
@ -132,11 +136,12 @@ module VX_cache_bank #(
|
|||
wire [MSHR_ADDR_WIDTH-1:0] replay_id;
|
||||
wire replay_ready;
|
||||
|
||||
wire is_init_st0;
|
||||
wire is_init_st0, is_init_st1;
|
||||
wire is_flush_st0, is_flush_st1;
|
||||
wire [NUM_WAYS-1:0] flush_way_st0;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel_st0, line_sel_st1;
|
||||
wire rw_sel, rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
|
||||
|
@ -149,7 +154,8 @@ module VX_cache_bank #(
|
|||
wire is_creq_st0, is_creq_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire is_replay_st0, is_replay_st1;
|
||||
wire creq_flush_st0, creq_flush_st1;
|
||||
wire creq_flush_sel, creq_flush_st0, creq_flush_st1;
|
||||
wire evict_dirty_st0, evict_dirty_st1;
|
||||
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
|
||||
|
@ -157,73 +163,82 @@ module VX_cache_bank #(
|
|||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
wire mshr_empty;
|
||||
|
||||
wire line_flush_valid;
|
||||
wire line_flush_init;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_flush_sel;
|
||||
wire [NUM_WAYS-1:0] line_flush_way;
|
||||
wire line_flush_ready;
|
||||
wire flush_valid;
|
||||
wire init_valid;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] flush_sel;
|
||||
wire [NUM_WAYS-1:0] flush_way;
|
||||
wire flush_ready;
|
||||
|
||||
// ensure we have no pending memory request in the bank
|
||||
wire no_pending_req = ~valid_st0 && ~valid_st1 && mreq_queue_empty;
|
||||
|
||||
// flush unit
|
||||
VX_bank_flush #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WRITEBACK (WRITEBACK)
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.flush_in_valid (flush_valid),
|
||||
.flush_in_ready (flush_ready),
|
||||
.flush_out_init (line_flush_init),
|
||||
.flush_out_valid (line_flush_valid),
|
||||
.flush_out_line (line_flush_sel),
|
||||
.flush_out_way (line_flush_way),
|
||||
.flush_out_ready (line_flush_ready),
|
||||
.mshr_empty (mshr_empty)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.flush_begin (flush_begin),
|
||||
.flush_end (flush_end),
|
||||
.flush_init (init_valid),
|
||||
.flush_valid (flush_valid),
|
||||
.flush_line (flush_sel),
|
||||
.flush_way (flush_way),
|
||||
.flush_ready (flush_ready),
|
||||
.mshr_empty (mshr_empty),
|
||||
.bank_empty (no_pending_req)
|
||||
);
|
||||
|
||||
wire rdw_hazard_st0;
|
||||
reg rdw_hazard_st1;
|
||||
wire rdw_hazard1_sel;
|
||||
wire rdw_hazard2_sel;
|
||||
reg rdw_hazard3_st1;
|
||||
|
||||
wire pipe_stall = crsp_queue_stall || rdw_hazard_st1;
|
||||
wire pipe_stall = crsp_queue_stall || rdw_hazard3_st1;
|
||||
|
||||
// inputs arbitration:
|
||||
// mshr replay has highest priority to maximize utilization since there is no miss.
|
||||
// handle memory responses next to prevent deadlock with potential memory request from a miss.
|
||||
// flush has precedence over core requests to ensure that the cache is in a consistent state.
|
||||
wire replay_grant = ~line_flush_init;
|
||||
wire replay_grant = ~init_valid;
|
||||
wire replay_enable = replay_grant && replay_valid;
|
||||
|
||||
wire fill_grant = ~line_flush_init && ~replay_enable;
|
||||
wire fill_grant = ~init_valid && ~replay_enable;
|
||||
wire fill_enable = fill_grant && mem_rsp_valid;
|
||||
|
||||
wire flush_grant = ~line_flush_init && ~replay_enable && ~fill_enable;
|
||||
wire flush_enable = flush_grant && line_flush_valid;
|
||||
wire flush_grant = ~init_valid && ~replay_enable && ~fill_enable;
|
||||
wire flush_enable = flush_grant && flush_valid;
|
||||
|
||||
wire creq_grant = ~line_flush_init && ~replay_enable && ~fill_enable && ~flush_enable;
|
||||
wire creq_grant = ~init_valid && ~replay_enable && ~fill_enable && ~flush_enable;
|
||||
wire creq_enable = creq_grant && core_req_valid;
|
||||
|
||||
assign replay_ready = replay_grant
|
||||
&& ~rdw_hazard_st0
|
||||
&& ~rdw_hazard1_sel
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign mem_rsp_ready = fill_grant
|
||||
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||
&& ~rdw_hazard2_sel
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign line_flush_ready = flush_grant
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~pipe_stall;
|
||||
assign flush_ready = flush_grant
|
||||
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||
&& ~rdw_hazard2_sel
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign core_req_ready = creq_grant
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~mshr_alm_full
|
||||
&& ~pipe_stall;
|
||||
|
||||
wire init_fire = line_flush_init;
|
||||
wire init_fire = init_valid;
|
||||
wire replay_fire = replay_valid && replay_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
wire flush_fire = line_flush_valid && line_flush_ready;
|
||||
wire flush_fire = flush_valid && flush_ready;
|
||||
wire core_req_fire = core_req_valid && core_req_ready;
|
||||
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
|
||||
|
@ -232,8 +247,9 @@ module VX_cache_bank #(
|
|||
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
|
||||
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
|
||||
assign tag_sel = replay_valid ? replay_tag : core_req_tag;
|
||||
assign creq_flush_sel = core_req_valid && core_req_flush;
|
||||
|
||||
assign addr_sel = (line_flush_init | line_flush_valid) ? `CS_LINE_ADDR_WIDTH'(line_flush_sel) :
|
||||
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
|
||||
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
|
@ -260,8 +276,8 @@ module VX_cache_bank #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_sel, line_flush_init, replay_enable, fill_enable, flush_enable, creq_enable, core_req_flush, line_flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
|
@ -273,18 +289,20 @@ module VX_cache_bank #(
|
|||
wire do_init_st0 = valid_st0 && is_init_st0;
|
||||
wire do_flush_st0 = valid_st0 && is_flush_st0;
|
||||
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
|
||||
wire do_creq_wr_st0 = valid_st0 && is_creq_st0 && rw_st0;
|
||||
wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0;
|
||||
wire do_replay_wr_st0 = valid_st0 && is_replay_st0 && rw_st0;
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_init_st0);
|
||||
|
||||
wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0;
|
||||
wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0;
|
||||
wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0;
|
||||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
||||
|
||||
wire [NUM_WAYS-1:0] repl_way_st0;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] repl_tag_st0;
|
||||
assign line_sel_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
`RESET_RELAY (tag_reset, reset);
|
||||
wire [NUM_WAYS-1:0] evict_way_st0;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0;
|
||||
|
||||
VX_cache_tags #(
|
||||
.INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)),
|
||||
|
@ -294,42 +312,51 @@ module VX_cache_bank #(
|
|||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
) cache_tags (
|
||||
.clk (clk),
|
||||
.reset (tag_reset),
|
||||
.reset (reset),
|
||||
|
||||
.req_uuid (req_uuid_st0),
|
||||
|
||||
.stall (pipe_stall),
|
||||
|
||||
// init/fill/lookup/flush
|
||||
.init (do_init_st0 || do_flush_st0),
|
||||
// init/flush/fill/write/lookup
|
||||
.init (do_init_st0),
|
||||
.flush (do_flush_st0),
|
||||
.fill (do_fill_st0),
|
||||
.write (do_cache_wr_st0),
|
||||
.lookup (do_lookup_st0),
|
||||
.line_addr (addr_st0),
|
||||
.way_sel (flush_way_st0),
|
||||
.tag_matches(tag_matches_st0),
|
||||
|
||||
// replacement
|
||||
.repl_way (repl_way_st0),
|
||||
.repl_tag (repl_tag_st0)
|
||||
.evict_dirty(evict_dirty_st0),
|
||||
.evict_way (evict_way_st0),
|
||||
.evict_tag (evict_tag_st0)
|
||||
);
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr2_st0;
|
||||
|
||||
wire is_flush2_st0 = WRITEBACK && is_flush_st0;
|
||||
|
||||
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
|
||||
|
||||
assign way_sel_st0 = is_fill_st0 ? repl_way_st0 : (is_flush_st0 ? flush_way_st0 : tag_matches_st0);
|
||||
assign way_sel_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_r_st0 = (is_fill_st0 || is_flush_st0) ? {repl_tag_st0, addr_st0[`CS_LINE_SEL_BITS-1:0]} : addr_st0;
|
||||
assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_st0, is_flush_st0, is_replay_st0, is_fill_st0, is_creq_st0, creq_flush_st0, rw_st0, addr_r_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_flush_st1, is_replay_st1, is_fill_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, mshr_pending_st1})
|
||||
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, creq_flush_st0, rw_st0, addr2_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, evict_dirty_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, evict_dirty_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
// we have a tag hit
|
||||
|
@ -343,35 +370,40 @@ module VX_cache_bank #(
|
|||
|
||||
wire is_read_st1 = is_creq_st1 && ~rw_st1;
|
||||
wire is_write_st1 = is_creq_st1 && rw_st1;
|
||||
|
||||
wire do_init_st1 = valid_st1 && is_init_st1;
|
||||
wire do_fill_st1 = valid_st1 && is_fill_st1;
|
||||
wire do_flush_st1 = valid_st1 && is_flush_st1;
|
||||
|
||||
wire do_creq_rd_st1 = valid_st1 && is_read_st1;
|
||||
wire do_creq_wr_st1 = valid_st1 && is_write_st1;
|
||||
wire do_fill_st1 = valid_st1 && is_fill_st1;
|
||||
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
|
||||
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
|
||||
|
||||
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
|
||||
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
|
||||
|
||||
wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1;
|
||||
wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1;
|
||||
|
||||
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
|
||||
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
|
||||
|
||||
wire do_flush_st1 = valid_st1 && is_flush_st1;
|
||||
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
|
||||
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
|
||||
|
||||
assign line_sel_st1 = addr_st1[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
`UNUSED_VAR (do_write_miss_st1)
|
||||
|
||||
// ensure mshr replay always get a hit
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: invalid mshr replay"));
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("missed mshr replay"));
|
||||
|
||||
// detect BRAM's read-during-write hazard
|
||||
assign rdw_hazard_st0 = do_fill_st0; // stall cycle after a fill
|
||||
wire rdw_case1 = do_cache_rd_st0 && do_cache_wr_st1 && (addr_st0 == addr_st1); // standard cache access
|
||||
wire rdw_case2 = WRITEBACK && (do_flush_st0 || do_fill_st0) && do_cache_wr_st1; // a writeback can evict preceeding write
|
||||
always @(posedge clk) begin // after a write to same address
|
||||
rdw_hazard_st1 <= (rdw_case1 || rdw_case2)
|
||||
&& ~rdw_hazard_st1; // invalidate if pipeline stalled to avoid repeats
|
||||
// both tag and data stores use BRAM with no read-during-write protection.
|
||||
// we ned to stall the pipeline to prevent read-after-write hazards.
|
||||
assign rdw_hazard1_sel = do_fill_st0; // stall first replay following a fill
|
||||
assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write
|
||||
always @(posedge clk) begin
|
||||
// stall reads following writes to same line address
|
||||
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_sel_st0 == line_sel_st1)
|
||||
&& ~rdw_hazard3_st1; // release pipeline stall
|
||||
end
|
||||
|
||||
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
|
||||
|
@ -380,7 +412,6 @@ module VX_cache_bank #(
|
|||
|
||||
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
|
||||
wire [LINE_SIZE-1:0] dirty_byteen_st1;
|
||||
wire dirty_valid_st1;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
reg [LINE_SIZE-1:0] write_byteen_r;
|
||||
|
@ -393,8 +424,6 @@ module VX_cache_bank #(
|
|||
assign write_byteen_st1 = byteen_st1;
|
||||
end
|
||||
|
||||
`RESET_RELAY (data_reset, reset);
|
||||
|
||||
VX_cache_data #(
|
||||
.INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
|
@ -405,17 +434,19 @@ module VX_cache_bank #(
|
|||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
) cache_data (
|
||||
.clk (clk),
|
||||
.reset (data_reset),
|
||||
.reset (reset),
|
||||
|
||||
.req_uuid (req_uuid_st1),
|
||||
|
||||
.stall (pipe_stall),
|
||||
|
||||
.init (do_init_st1),
|
||||
.read (do_cache_rd_st1),
|
||||
.fill (do_fill_st1 && ~rdw_hazard_st1),
|
||||
.fill (do_fill_st1),
|
||||
.flush (do_flush_st1),
|
||||
.write (do_cache_wr_st1),
|
||||
.way_sel (way_sel_st1),
|
||||
|
@ -425,7 +456,6 @@ module VX_cache_bank #(
|
|||
.write_data (write_data_st1),
|
||||
.write_byteen(write_byteen_st1),
|
||||
.read_data (read_data_st1),
|
||||
.dirty_valid(dirty_valid_st1),
|
||||
.dirty_data (dirty_data_st1),
|
||||
.dirty_byteen(dirty_byteen_st1)
|
||||
);
|
||||
|
@ -461,8 +491,6 @@ module VX_cache_bank #(
|
|||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
`RESET_RELAY (mshr_reset, reset);
|
||||
|
||||
VX_cache_mshr #(
|
||||
.INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
|
@ -473,7 +501,7 @@ module VX_cache_bank #(
|
|||
.DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH)
|
||||
) cache_mshr (
|
||||
.clk (clk),
|
||||
.reset (mshr_reset),
|
||||
.reset (reset),
|
||||
|
||||
.deq_req_uuid (req_uuid_sel),
|
||||
.lkp_req_uuid (req_uuid_st0),
|
||||
|
@ -536,16 +564,14 @@ module VX_cache_bank #(
|
|||
assign crsp_queue_data = read_data_st1;
|
||||
assign crsp_queue_tag = tag_st1;
|
||||
|
||||
`RESET_RELAY (crsp_queue_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (TAG_WIDTH + `CS_WORD_WIDTH + REQ_SEL_WIDTH),
|
||||
.SIZE (CRSQ_SIZE),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
) core_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (crsp_queue_reset),
|
||||
.valid_in (crsp_queue_valid && ~rdw_hazard_st1),
|
||||
.reset (reset),
|
||||
.valid_in (crsp_queue_valid && ~rdw_hazard3_st1),
|
||||
.ready_in (crsp_queue_ready),
|
||||
.data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}),
|
||||
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
|
||||
|
@ -557,7 +583,7 @@ module VX_cache_bank #(
|
|||
|
||||
// schedule memory request
|
||||
|
||||
wire mreq_queue_push, mreq_queue_pop, mreq_queue_empty;
|
||||
wire mreq_queue_push, mreq_queue_pop;
|
||||
wire [`CS_LINE_WIDTH-1:0] mreq_queue_data;
|
||||
wire [LINE_SIZE-1:0] mreq_queue_byteen;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
|
||||
|
@ -565,30 +591,42 @@ module VX_cache_bank #(
|
|||
wire mreq_queue_rw;
|
||||
wire mreq_queue_flush;
|
||||
|
||||
wire is_evict_st1 = (is_fill_st1 || is_flush_st1) && dirty_valid_st1;
|
||||
wire do_writeback_st1 = valid_st1 && is_evict_st1;
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1;
|
||||
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
|
||||
wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1;
|
||||
|
||||
if (WRITEBACK) begin
|
||||
if (DIRTY_BYTES) begin
|
||||
// ensure dirty bytes match the tag info
|
||||
wire has_dirty_bytes = (| dirty_byteen_st1);
|
||||
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)));
|
||||
end
|
||||
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|
||||
|| do_writeback_st1)
|
||||
&& ~rdw_hazard_st1;
|
||||
&& ~rdw_hazard3_st1;
|
||||
end else begin
|
||||
`UNUSED_VAR (dirty_valid_st1)
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1)
|
||||
&& ~rdw_hazard_st1;
|
||||
&& ~rdw_hazard3_st1;
|
||||
end
|
||||
|
||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||
assign mreq_queue_rw = WRITE_ENABLE && (WRITEBACK ? is_evict_st1 : rw_st1);
|
||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||
assign mreq_queue_addr = addr_st1;
|
||||
assign mreq_queue_id = mshr_id_st1;
|
||||
assign mreq_queue_data = is_write_st1 ? write_data_st1 : dirty_data_st1;
|
||||
assign mreq_queue_byteen = is_write_st1 ? write_byteen_st1 : dirty_byteen_st1;
|
||||
assign mreq_queue_id = mshr_id_st1;
|
||||
assign mreq_queue_flush = creq_flush_st1;
|
||||
|
||||
`RESET_RELAY (mreq_queue_reset, reset);
|
||||
if (WRITE_ENABLE) begin
|
||||
assign mreq_queue_rw = WRITEBACK ? is_fill_or_flush_st1 : rw_st1;
|
||||
assign mreq_queue_data = WRITEBACK ? dirty_data_st1 : write_data_st1;
|
||||
assign mreq_queue_byteen = WRITEBACK ? dirty_byteen_st1 : write_byteen_st1;
|
||||
end else begin
|
||||
assign mreq_queue_rw = 0;
|
||||
assign mreq_queue_data = 0;
|
||||
assign mreq_queue_byteen = 0;
|
||||
`UNUSED_VAR (dirty_data_st1)
|
||||
`UNUSED_VAR (dirty_byteen_st1)
|
||||
end
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1),
|
||||
|
@ -597,7 +635,7 @@ module VX_cache_bank #(
|
|||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (mreq_queue_reset),
|
||||
.reset (reset),
|
||||
.push (mreq_queue_push),
|
||||
.pop (mreq_queue_pop),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}),
|
||||
|
@ -621,32 +659,32 @@ module VX_cache_bank #(
|
|||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
|
||||
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid || line_flush_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || line_flush_valid);
|
||||
wire input_stall = (replay_valid || mem_rsp_valid || core_req_valid || flush_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
|
||||
always @(posedge clk) begin
|
||||
if (pipeline_stall) begin
|
||||
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw_st0=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard_st0));
|
||||
if (input_stall || pipe_stall) begin
|
||||
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
|
||||
`TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
|
||||
end
|
||||
if (replay_fire) begin
|
||||
`TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
|
||||
end
|
||||
if (core_req_fire) begin
|
||||
if (core_req_rw)
|
||||
`TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
|
||||
`TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
|
||||
else
|
||||
`TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
|
||||
end
|
||||
if (crsp_queue_fire) begin
|
||||
`TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
|
||||
`TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
|
||||
end
|
||||
if (mreq_queue_push) begin
|
||||
if (do_creq_wr_st1 && !WRITEBACK)
|
||||
`TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
|
||||
`TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
|
||||
else if (do_writeback_st1)
|
||||
`TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%b, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data));
|
||||
`TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data));
|
||||
else
|
||||
`TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1));
|
||||
end
|
||||
|
|
10
hw/rtl/cache/VX_cache_cluster.sv
vendored
10
hw/rtl/cache/VX_cache_cluster.sv
vendored
|
@ -49,6 +49,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -99,6 +102,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (ARB_TAG_WIDTH)
|
||||
) arb_core_bus_if[NUM_CACHES * NUM_REQS]();
|
||||
|
||||
`RESET_RELAY_EX (cache_arb_reset, reset, NUM_REQS, `MAX_FANOUT);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
|
@ -114,8 +119,6 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
|
||||
end
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (NUM_INPUTS),
|
||||
.NUM_OUTPUTS (NUM_CACHES),
|
||||
|
@ -127,7 +130,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0)
|
||||
) cache_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.reset (cache_arb_reset[i]),
|
||||
.bus_in_if (core_bus_tmp_if),
|
||||
.bus_out_if (arb_core_bus_tmp_if)
|
||||
);
|
||||
|
@ -155,6 +158,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (ARB_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
|
|
123
hw/rtl/cache/VX_cache_data.sv
vendored
123
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -30,6 +30,8 @@ module VX_cache_data #(
|
|||
parameter WRITE_ENABLE = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0
|
||||
) (
|
||||
|
@ -42,6 +44,7 @@ module VX_cache_data #(
|
|||
|
||||
input wire stall,
|
||||
|
||||
input wire init,
|
||||
input wire read,
|
||||
input wire fill,
|
||||
input wire flush,
|
||||
|
@ -53,89 +56,88 @@ module VX_cache_data #(
|
|||
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
|
||||
input wire [NUM_WAYS-1:0] way_sel,
|
||||
output wire [`CS_WORD_WIDTH-1:0] read_data,
|
||||
output wire dirty_valid,
|
||||
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
|
||||
output wire [LINE_SIZE-1:0] dirty_byteen
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_PARAM (WORD_SIZE)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (stall)
|
||||
`UNUSED_VAR (line_addr)
|
||||
`UNUSED_VAR (init)
|
||||
`UNUSED_VAR (read)
|
||||
`UNUSED_VAR (flush)
|
||||
|
||||
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
|
||||
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
|
||||
|
||||
if (WRITEBACK) begin
|
||||
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0][LINE_SIZE-1:0] dirty_bytes_r;
|
||||
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0] dirty_blocks_r;
|
||||
if (DIRTY_BYTES) begin
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata;
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata;
|
||||
|
||||
wire [`CLOG2(`CS_LINES_PER_BANK * NUM_WAYS)-1:0] way_addr;
|
||||
if (NUM_WAYS > 1) begin
|
||||
assign way_addr = {line_sel, way_idx};
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]);
|
||||
assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]);
|
||||
end
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (LINE_SIZE * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK)
|
||||
) byteen_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (write || fill || flush),
|
||||
.write (init || write || fill || flush),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.wdata (bs_wdata),
|
||||
.rdata (bs_rdata)
|
||||
);
|
||||
|
||||
assign dirty_byteen = bs_rdata[way_idx];
|
||||
end else begin
|
||||
assign way_addr = line_sel;
|
||||
assign dirty_byteen = {LINE_SIZE{1'b1}};
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (fill) begin
|
||||
dirty_bytes_r[way_addr] <= '0;
|
||||
end else if (write) begin
|
||||
dirty_bytes_r[way_addr] <= dirty_bytes_r[way_addr] | write_byteen;
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign flipped_rdata[j][i] = line_rdata[i][j];
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer i = 0; i < `CS_LINES_PER_BANK * NUM_WAYS; ++i) begin
|
||||
dirty_blocks_r[i] <= 0;
|
||||
end
|
||||
end else begin
|
||||
if (fill) begin
|
||||
dirty_blocks_r[way_addr] <= 0;
|
||||
end else if (write) begin
|
||||
dirty_blocks_r[way_addr] <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign dirty_byteen = dirty_bytes_r[way_addr];
|
||||
assign dirty_valid = dirty_blocks_r[way_addr];
|
||||
assign dirty_data = flipped_rdata[way_idx];
|
||||
end else begin
|
||||
assign dirty_byteen = '0;
|
||||
assign dirty_valid = 0;
|
||||
assign dirty_data = '0;
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last.
|
||||
// this allows converting way index to binary in parallel with BRAM read.
|
||||
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
|
||||
wire [BYTEENW-1:0] wren;
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||
wire [BYTEENW-1:0] line_wren;
|
||||
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
assign wdata[i] = (fill || !WRITE_ENABLE) ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{write_data[i]}};
|
||||
end
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i];
|
||||
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
|
||||
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
|
||||
end
|
||||
end
|
||||
assign wren = wren_w;
|
||||
assign line_wren = wren_w;
|
||||
end else begin
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (write_byteen)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign wdata = fill_data;
|
||||
assign wren = fill;
|
||||
assign line_wdata = fill_data;
|
||||
assign line_wren = fill;
|
||||
end
|
||||
|
||||
VX_onehot_encoder #(
|
||||
|
@ -146,53 +148,50 @@ module VX_cache_data #(
|
|||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata;
|
||||
wire line_read = (read && ~stall)
|
||||
|| (WRITEBACK && (fill || flush));
|
||||
|
||||
wire line_write = write || fill;
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (BYTEENW),
|
||||
.NO_RWCHECK (1)
|
||||
.NO_RWCHECK (1),
|
||||
.RW_ASSERT (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (write || fill),
|
||||
.wren (wren),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.write (line_write),
|
||||
.wren (line_wren),
|
||||
.addr (line_sel),
|
||||
.wdata (wdata),
|
||||
.rdata (rdata)
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
assign per_way_rdata = rdata[wsel];
|
||||
assign per_way_rdata = line_rdata[wsel];
|
||||
end else begin
|
||||
`UNUSED_VAR (wsel)
|
||||
assign per_way_rdata = rdata;
|
||||
assign per_way_rdata = line_rdata;
|
||||
end
|
||||
assign read_data = per_way_rdata[way_idx];
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] dirty_data_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign dirty_data_w[j][i] = rdata[i][j];
|
||||
end
|
||||
end
|
||||
assign dirty_data = dirty_data_w[way_idx];
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b, byteen=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_valid, dirty_byteen));
|
||||
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data));
|
||||
end
|
||||
if (read && ~stall) begin
|
||||
`TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid));
|
||||
`TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid));
|
||||
end
|
||||
if (write && ~stall) begin
|
||||
`TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid));
|
||||
`TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
33
hw/rtl/cache/VX_cache_flush.sv
vendored
33
hw/rtl/cache/VX_cache_flush.sv
vendored
|
@ -26,13 +26,16 @@ module VX_cache_flush #(
|
|||
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
|
||||
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
|
||||
input wire [NUM_BANKS-1:0] bank_req_fire,
|
||||
output wire [NUM_BANKS-1:0] flush_valid,
|
||||
input wire [NUM_BANKS-1:0] flush_ready
|
||||
output wire [NUM_BANKS-1:0] flush_begin,
|
||||
input wire [NUM_BANKS-1:0] flush_end
|
||||
);
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_WAIT = 1;
|
||||
localparam STATE_WAIT1 = 1;
|
||||
localparam STATE_FLUSH = 2;
|
||||
localparam STATE_DONE = 3;
|
||||
localparam STATE_WAIT2 = 3;
|
||||
localparam STATE_DONE = 4;
|
||||
|
||||
reg [2:0] state, state_n;
|
||||
|
||||
// track in-flight core requests
|
||||
|
||||
|
@ -76,8 +79,6 @@ module VX_cache_flush #(
|
|||
`UNUSED_VAR (bank_req_fire)
|
||||
end
|
||||
|
||||
|
||||
reg [1:0] state, state_n;
|
||||
reg [NUM_BANKS-1:0] flush_done, flush_done_n;
|
||||
|
||||
wire [NUM_REQS-1:0] flush_req_mask;
|
||||
|
@ -113,22 +114,32 @@ module VX_cache_flush #(
|
|||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (flush_req_enable) begin
|
||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT : STATE_FLUSH;
|
||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_WAIT: begin
|
||||
STATE_WAIT1: begin
|
||||
if (no_inflight_reqs) begin
|
||||
state_n = STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
flush_done_n = flush_done | flush_ready;
|
||||
if (flush_done_n == 0) begin
|
||||
// generate a flush request pulse
|
||||
state_n = STATE_WAIT2;
|
||||
end
|
||||
STATE_WAIT2: begin
|
||||
// wait for all banks to finish flushing
|
||||
flush_done_n = flush_done | flush_end;
|
||||
if (flush_done_n == {NUM_BANKS{1'b1}}) begin
|
||||
state_n = STATE_DONE;
|
||||
flush_done_n = '0;
|
||||
// only release current flush requests
|
||||
// and keep normal requests locked
|
||||
lock_released_n = flush_req_mask;
|
||||
end
|
||||
end
|
||||
STATE_DONE: begin
|
||||
// wait until released flush requests are issued
|
||||
// when returning to IDLE state other requests will unlock
|
||||
lock_released_n = lock_released & ~core_bus_out_ready;
|
||||
if (lock_released_n == 0) begin
|
||||
state_n = STATE_IDLE;
|
||||
|
@ -149,6 +160,6 @@ module VX_cache_flush #(
|
|||
end
|
||||
end
|
||||
|
||||
assign flush_valid = {NUM_BANKS{state == STATE_FLUSH}};
|
||||
assign flush_begin = {NUM_BANKS{state == STATE_FLUSH}};
|
||||
|
||||
endmodule
|
||||
|
|
52
hw/rtl/cache/VX_cache_init.sv
vendored
52
hw/rtl/cache/VX_cache_init.sv
vendored
|
@ -1,52 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
// cache flush unit
|
||||
module VX_cache_init #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] addr_out,
|
||||
output wire valid_out
|
||||
);
|
||||
reg enabled;
|
||||
reg [`CS_LINE_SEL_BITS-1:0] line_ctr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
enabled <= 1;
|
||||
line_ctr <= '0;
|
||||
end else begin
|
||||
if (enabled) begin
|
||||
if (line_ctr == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
|
||||
enabled <= 0;
|
||||
end
|
||||
line_ctr <= line_ctr + `CS_LINE_SEL_BITS'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign addr_out = line_ctr;
|
||||
assign valid_out = enabled;
|
||||
|
||||
endmodule
|
3
hw/rtl/cache/VX_cache_mshr.sv
vendored
3
hw/rtl/cache/VX_cache_mshr.sv
vendored
|
@ -232,9 +232,10 @@ module VX_cache_mshr #(
|
|||
.LUTRAM (1)
|
||||
) entries (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (allocate_valid),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (allocate_id_r),
|
||||
.wdata (allocate_data),
|
||||
.raddr (dequeue_id_r),
|
||||
|
|
96
hw/rtl/cache/VX_cache_tags.sv
vendored
96
hw/rtl/cache/VX_cache_tags.sv
vendored
|
@ -26,6 +26,8 @@ module VX_cache_tags #(
|
|||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0
|
||||
) (
|
||||
|
@ -40,74 +42,100 @@ module VX_cache_tags #(
|
|||
|
||||
// init/fill/lookup
|
||||
input wire init,
|
||||
input wire flush,
|
||||
input wire fill,
|
||||
input wire write,
|
||||
input wire lookup,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire [NUM_WAYS-1:0] way_sel,
|
||||
output wire [NUM_WAYS-1:0] tag_matches,
|
||||
|
||||
// replacement
|
||||
output wire [NUM_WAYS-1:0] repl_way,
|
||||
output wire [`CS_TAG_SEL_BITS-1:0] repl_tag
|
||||
// eviction
|
||||
output wire evict_dirty,
|
||||
output wire [NUM_WAYS-1:0] evict_way,
|
||||
output wire [`CS_TAG_SEL_BITS-1:0] evict_tag
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (lookup)
|
||||
|
||||
// valid, tag
|
||||
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS;
|
||||
// valid, dirty, tag
|
||||
localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||
wire [NUM_WAYS-1:0] read_valid;
|
||||
wire [NUM_WAYS-1:0] read_dirty;
|
||||
|
||||
if (NUM_WAYS > 1) begin
|
||||
reg [NUM_WAYS-1:0] repl_way_r;
|
||||
reg [NUM_WAYS-1:0] evict_way_r;
|
||||
// cyclic assignment of replacement way
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
repl_way_r <= 1;
|
||||
end else if (~stall) begin // hold the value on stalls prevent filling different slots twice
|
||||
repl_way_r <= {repl_way_r[NUM_WAYS-2:0], repl_way_r[NUM_WAYS-1]};
|
||||
evict_way_r <= 1;
|
||||
end else if (~stall) begin // holding the value on stalls prevents filling different slots twice
|
||||
evict_way_r <= {evict_way_r[NUM_WAYS-2:0], evict_way_r[NUM_WAYS-1]};
|
||||
end
|
||||
end
|
||||
|
||||
assign repl_way = repl_way_r;
|
||||
assign evict_way = fill ? evict_way_r : way_sel;
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (`CS_TAG_SEL_BITS),
|
||||
.N (NUM_WAYS)
|
||||
) repl_tag_sel (
|
||||
) evict_tag_sel (
|
||||
.data_in (read_tag),
|
||||
.sel_in (repl_way_r),
|
||||
.data_out (repl_tag)
|
||||
.sel_in (evict_way),
|
||||
.data_out (evict_tag)
|
||||
);
|
||||
end else begin
|
||||
`UNUSED_VAR (stall)
|
||||
assign repl_way = 1'b1;
|
||||
assign repl_tag = read_tag;
|
||||
assign evict_way = 1'b1;
|
||||
assign evict_tag = read_tag;
|
||||
end
|
||||
|
||||
// fill and flush need to also read in writeback mode
|
||||
wire fill_s = fill && (!WRITEBACK || ~stall);
|
||||
wire flush_s = flush && (!WRITEBACK || ~stall);
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
|
||||
wire do_fill = fill && repl_way[i];
|
||||
wire do_write = init || do_fill;
|
||||
wire line_valid = ~init;
|
||||
wire do_fill = fill_s && evict_way[i];
|
||||
wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode
|
||||
wire do_write = WRITEBACK && write && tag_matches[i];
|
||||
|
||||
wire line_read = (WRITEBACK && (fill_s || flush_s));
|
||||
wire line_write = init || do_fill || do_flush || do_write;
|
||||
wire line_valid = ~(init || flush);
|
||||
|
||||
wire [TAG_WIDTH-1:0] line_wdata;
|
||||
wire [TAG_WIDTH-1:0] line_rdata;
|
||||
|
||||
if (WRITEBACK) begin
|
||||
assign line_wdata = {line_valid, write, line_tag};
|
||||
assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata;
|
||||
end else begin
|
||||
assign line_wdata = {line_valid, line_tag};
|
||||
assign {read_valid[i], read_tag[i]} = line_rdata;
|
||||
assign read_dirty[i] = 1'b0;
|
||||
end
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (TAG_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.NO_RWCHECK (1)
|
||||
.NO_RWCHECK (1),
|
||||
.RW_ASSERT (1)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (do_write),
|
||||
`UNUSED_PIN (wren),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.write (line_write),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.wdata ({line_valid, line_tag}),
|
||||
.rdata ({read_valid[i], read_tag[i]})
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -115,19 +143,31 @@ module VX_cache_tags #(
|
|||
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
|
||||
end
|
||||
|
||||
assign evict_dirty = | (read_dirty & evict_way);
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel};
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), repl_way, line_sel, line_tag));
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)));
|
||||
end
|
||||
if (init) begin
|
||||
`TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty));
|
||||
end
|
||||
if (lookup && ~stall) begin
|
||||
if (tag_matches != 0) begin
|
||||
`TRACE(3, ("%d: %s hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
if (write)
|
||||
`TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
else
|
||||
`TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
end else begin
|
||||
`TRACE(3, ("%d: %s miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
if (write)
|
||||
`TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
else
|
||||
`TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
46
hw/rtl/cache/VX_cache_top.sv
vendored
46
hw/rtl/cache/VX_cache_top.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -20,20 +20,20 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 16384,
|
||||
parameter CACHE_SIZE = 16384,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 4,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 4,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 16,
|
||||
parameter MSHR_SIZE = 16,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
|
@ -42,6 +42,12 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -55,7 +61,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
parameter MEM_OUT_BUF = 2,
|
||||
|
||||
parameter MEM_TAG_WIDTH = `CLOG2(MSHR_SIZE) + `CLOG2(NUM_BANKS)
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -82,17 +88,17 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire mem_req_rw,
|
||||
output wire [LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready
|
||||
);
|
||||
VX_mem_bus_if #(
|
||||
|
@ -127,18 +133,18 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
|
||||
// Memory request
|
||||
assign mem_req_valid = mem_bus_if.req_valid;
|
||||
assign mem_req_rw = mem_bus_if.req_data.rw;
|
||||
assign mem_req_rw = mem_bus_if.req_data.rw;
|
||||
assign mem_req_byteen = mem_bus_if.req_data.byteen;
|
||||
assign mem_req_addr = mem_bus_if.req_data.addr;
|
||||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
`UNUSED_VAR (mem_bus_if.req_data.atype)
|
||||
|
||||
|
||||
// Memory response
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_data.data = mem_rsp_data;
|
||||
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
|
||||
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_bus_if.rsp_ready;
|
||||
|
||||
VX_cache #(
|
||||
|
@ -156,6 +162,8 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.CORE_OUT_BUF (CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (MEM_OUT_BUF)
|
||||
) cache (
|
||||
|
|
12
hw/rtl/cache/VX_cache_wrap.sv
vendored
12
hw/rtl/cache/VX_cache_wrap.sv
vendored
|
@ -48,6 +48,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -187,6 +190,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
|
||||
|
@ -223,12 +227,12 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (core_req_fire) begin
|
||||
if (core_bus_if[i].req_data.rw)
|
||||
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
|
||||
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid));
|
||||
end
|
||||
if (core_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
|
||||
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -250,14 +254,14 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_bus_if.req_data.rw)
|
||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
|
||||
end
|
||||
end
|
||||
|
|
|
@ -83,7 +83,7 @@ module VX_alu_muldiv #(
|
|||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, mul_result_tmp}),
|
||||
|
@ -324,6 +324,7 @@ module VX_alu_muldiv #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (1)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
|
|
|
@ -57,7 +57,7 @@ module VX_alu_unit #(
|
|||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
|
||||
`RESET_RELAY (block_reset, reset);
|
||||
`RESET_RELAY_EN (block_reset, reset,(BLOCK_SIZE > 1));
|
||||
|
||||
wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV);
|
||||
|
||||
|
@ -72,15 +72,13 @@ module VX_alu_unit #(
|
|||
assign int_execute_if.valid = per_block_execute_if[block_idx].valid && ~is_muldiv_op;
|
||||
assign int_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
|
||||
`RESET_RELAY (int_reset, block_reset);
|
||||
|
||||
VX_alu_int #(
|
||||
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
|
||||
.BLOCK_IDX (block_idx),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) alu_int (
|
||||
.clk (clk),
|
||||
.reset (int_reset),
|
||||
.reset (block_reset),
|
||||
.execute_if (int_execute_if),
|
||||
.branch_ctl_if (branch_ctl_if[block_idx]),
|
||||
.commit_if (int_commit_if)
|
||||
|
@ -99,14 +97,12 @@ module VX_alu_unit #(
|
|||
assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
|
||||
assign muldiv_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
|
||||
`RESET_RELAY (muldiv_reset, block_reset);
|
||||
|
||||
VX_alu_muldiv #(
|
||||
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) muldiv_unit (
|
||||
.clk (clk),
|
||||
.reset (muldiv_reset),
|
||||
.reset (block_reset),
|
||||
.execute_if (muldiv_execute_if),
|
||||
.commit_if (muldiv_commit_if)
|
||||
);
|
||||
|
@ -121,15 +117,14 @@ module VX_alu_unit #(
|
|||
|
||||
// send response
|
||||
|
||||
`RESET_RELAY (arb_reset, block_reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 3),
|
||||
.ARBITER ("F")
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.reset (block_reset),
|
||||
.valid_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
muldiv_commit_if.valid,
|
||||
|
|
|
@ -313,6 +313,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH),
|
||||
.ARBITER ("P"),
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (0)
|
||||
) lsu_adapter (
|
||||
|
|
|
@ -52,7 +52,7 @@ module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; (
|
|||
if (dcr_bus_if.write_valid) begin
|
||||
`TRACE(1, ("%d: base-dcr: state=", $time));
|
||||
trace_base_dcr(1, dcr_bus_if.write_addr);
|
||||
`TRACE(1, (", data=0x%0h\n", dcr_bus_if.write_data));
|
||||
`TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -40,7 +40,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH);
|
||||
localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN);
|
||||
localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam FANOUT_ENABLE= (`NUM_THREADS > MAX_FANOUT);
|
||||
localparam FANOUT_ENABLE= (`NUM_THREADS > (MAX_FANOUT + MAX_FANOUT /2));
|
||||
|
||||
localparam DATA_TMASK_OFF = IN_DATAW - (`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS);
|
||||
localparam DATA_REGS_OFF = 0;
|
||||
|
@ -85,6 +85,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx);
|
||||
assign issue_indices[block_idx] = issue_idx;
|
||||
|
||||
`RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1));
|
||||
|
||||
wire valid_p, ready_p;
|
||||
|
||||
if (`NUM_THREADS != NUM_LANES) begin
|
||||
|
@ -100,7 +102,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
wire fire_eop = fire_p && is_last_p;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
if (block_reset) begin
|
||||
sent_mask_p <= '0;
|
||||
is_first_p <= 1;
|
||||
end else begin
|
||||
|
@ -215,8 +217,6 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
assign isw = block_idx;
|
||||
end
|
||||
|
||||
`RESET_RELAY(buf_out_reset, reset);
|
||||
|
||||
wire [`NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_data[issue_idx][DATA_TMASK_OFF+`NUM_THREADS +: ISSUE_WIS_W], isw);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
|
@ -225,7 +225,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) buf_out (
|
||||
.clk (clk),
|
||||
.reset (buf_out_reset),
|
||||
.reset (block_reset),
|
||||
.valid_in (valid_p),
|
||||
.ready_in (ready_p),
|
||||
.data_in ({
|
||||
|
|
|
@ -56,9 +56,10 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
.LUTRAM (1)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (icache_req_fire),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (req_tag),
|
||||
.wdata ({schedule_if.data.PC, schedule_if.data.tmask}),
|
||||
.raddr (rsp_tag),
|
||||
|
|
|
@ -57,7 +57,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
`UNUSED_VAR (per_block_execute_if[block_idx].data.tid)
|
||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.wb)
|
||||
|
||||
`RESET_RELAY (block_reset, reset);
|
||||
`RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1));
|
||||
|
||||
// Store request info
|
||||
wire fpu_req_valid, fpu_req_ready;
|
||||
|
@ -84,14 +84,12 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
wire execute_fire = per_block_execute_if[block_idx].valid && per_block_execute_if[block_idx].ready;
|
||||
wire fpu_rsp_fire = fpu_rsp_valid && fpu_rsp_ready;
|
||||
|
||||
`RESET_RELAY (ibuf_reset, block_reset);
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + PID_WIDTH + 1 + 1),
|
||||
.SIZE (`FPUQ_SIZE)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (ibuf_reset),
|
||||
.reset (block_reset),
|
||||
.acquire_en (execute_fire),
|
||||
.write_addr (fpu_req_tag),
|
||||
.write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}),
|
||||
|
@ -113,8 +111,6 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
assign fpu_req_valid = per_block_execute_if[block_idx].valid && ~mdata_full;
|
||||
assign per_block_execute_if[block_idx].ready = fpu_req_ready && ~mdata_full;
|
||||
|
||||
`RESET_RELAY (fpu_reset, block_reset);
|
||||
|
||||
`ifdef FPU_DPI
|
||||
|
||||
VX_fpu_dpi #(
|
||||
|
@ -123,7 +119,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_dpi (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.reset (block_reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
|
@ -152,7 +148,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_fpnew (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.reset (block_reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
|
@ -181,7 +177,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_dsp (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.reset (block_reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
|
@ -228,14 +224,12 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
// send response
|
||||
|
||||
`RESET_RELAY (rsp_reset, block_reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
||||
.SIZE (0)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (rsp_reset),
|
||||
.reset (block_reset),
|
||||
.valid_in (fpu_rsp_valid),
|
||||
.ready_in (fpu_rsp_ready),
|
||||
.data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),
|
||||
|
|
|
@ -79,15 +79,13 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES)
|
||||
) commit_tmp_if();
|
||||
|
||||
`RESET_RELAY(commit_out_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (commit_out_reset),
|
||||
.reset (reset),
|
||||
.valid_in (commit_out_valid[i]),
|
||||
.ready_in (commit_out_ready[i]),
|
||||
.data_in (commit_out_data[i]),
|
||||
|
|
|
@ -72,9 +72,10 @@ module VX_ipdom_stack #(
|
|||
.LUTRAM (OUT_REG ? 0 : 1)
|
||||
) store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (wr_ptr),
|
||||
.wdata ({q1, q0}),
|
||||
.raddr (rd_ptr),
|
||||
|
|
|
@ -39,6 +39,8 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lsu_switch_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
`RESET_RELAY_EX (block_reset, reset, `NUM_LSU_BLOCKS, 1);
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
|
||||
|
@ -52,15 +54,13 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
wire req_global_ready;
|
||||
wire req_local_ready;
|
||||
|
||||
`RESET_RELAY (switch_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (1)
|
||||
) req_global_buf (
|
||||
.clk (clk),
|
||||
.reset (switch_reset),
|
||||
.reset (block_reset[i]),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
|
||||
.data_in ({
|
||||
lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask,
|
||||
|
@ -91,7 +91,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
.OUT_REG (0)
|
||||
) req_local_buf (
|
||||
.clk (clk),
|
||||
.reset (switch_reset),
|
||||
.reset (block_reset[i]),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
|
||||
.data_in ({
|
||||
lsu_mem_in_if[i].req_data.mask & is_addr_local_mask,
|
||||
|
@ -126,7 +126,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
.OUT_BUF (1)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (switch_reset),
|
||||
.reset (block_reset[i]),
|
||||
.valid_in ({
|
||||
lsu_switch_if[i].rsp_valid,
|
||||
lsu_mem_out_if[i].rsp_valid
|
||||
|
@ -157,18 +157,17 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) lmem_bus_tmp_if[`NUM_LSU_LANES]();
|
||||
|
||||
`RESET_RELAY (adapter_reset, reset);
|
||||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
|
||||
.ARBITER ("P"),
|
||||
.REQ_OUT_BUF (3),
|
||||
.RSP_OUT_BUF (0)
|
||||
) lsu_adapter (
|
||||
.clk (clk),
|
||||
.reset (adapter_reset),
|
||||
.reset (block_reset[i]),
|
||||
.lsu_mem_if (lsu_switch_if[i]),
|
||||
.mem_bus_if (lmem_bus_tmp_if)
|
||||
);
|
||||
|
|
|
@ -490,6 +490,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.ARBITER ("P"), // prioritize commit_rsp_if
|
||||
.OUT_BUF (3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
|
|
|
@ -13,6 +13,13 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
// reset all GPRs in debug mode
|
||||
`ifdef SIMULATION
|
||||
`ifndef NDEBUG
|
||||
`define GPR_RESET
|
||||
`endif
|
||||
`endif
|
||||
|
||||
module VX_operands import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_BANKS = 4,
|
||||
|
@ -36,8 +43,9 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
|
||||
localparam METADATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS;
|
||||
localparam DATAW = `UUID_WIDTH + METADATAW + 3 * `NUM_THREADS * `XLEN;
|
||||
localparam META_DATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS + `UUID_WIDTH;
|
||||
localparam REGS_DATAW = `XLEN * `NUM_THREADS;
|
||||
localparam DATAW = META_DATAW + NUM_SRC_REGS * REGS_DATAW;
|
||||
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
|
||||
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
|
||||
localparam XLEN_SIZE = `XLEN / 8;
|
||||
|
@ -46,30 +54,28 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
`UNUSED_VAR (writeback_if.data.sop)
|
||||
|
||||
wire [NUM_SRC_REGS-1:0] src_valid;
|
||||
wire [NUM_SRC_REGS-1:0] req_in_valid;
|
||||
wire [NUM_SRC_REGS-1:0] req_in_ready;
|
||||
wire [NUM_SRC_REGS-1:0] req_in_valid, req_in_ready;
|
||||
wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data;
|
||||
wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
|
||||
|
||||
wire [NUM_BANKS-1:0] gpr_rd_valid_n, gpr_rd_ready;
|
||||
reg [NUM_BANKS-1:0] gpr_rd_valid;
|
||||
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr_n;
|
||||
reg [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr;
|
||||
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx_n;
|
||||
reg [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx;
|
||||
wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready;
|
||||
wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2;
|
||||
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1;
|
||||
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1, gpr_rd_data_st2;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2;
|
||||
|
||||
wire pipe_in_ready;
|
||||
reg pipe_out_valid;
|
||||
wire pipe_out_ready;
|
||||
reg [`UUID_WIDTH-1:0] pipe_out_uuid;
|
||||
reg [METADATAW-1:0] pipe_out_data;
|
||||
wire pipe_valid_st1, pipe_ready_st1;
|
||||
wire pipe_valid_st2, pipe_ready_st2;
|
||||
wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2;
|
||||
|
||||
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data, src_data_n;
|
||||
reg [NUM_SRC_REGS-1:0] data_fetched;
|
||||
reg has_collision, has_collision_n;
|
||||
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n;
|
||||
wire [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2;
|
||||
|
||||
wire stg_in_valid, stg_in_ready;
|
||||
reg [NUM_SRC_REGS-1:0] data_fetched_n;
|
||||
wire [NUM_SRC_REGS-1:0] data_fetched_st1;
|
||||
|
||||
reg has_collision_n;
|
||||
wire has_collision_st1;
|
||||
|
||||
wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3,
|
||||
scoreboard_if.data.rs2,
|
||||
|
@ -89,7 +95,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
|
||||
assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched[i];
|
||||
assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched_st1[i];
|
||||
end
|
||||
|
||||
assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid;
|
||||
|
@ -109,13 +115,20 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
.data_in (req_in_data),
|
||||
.sel_in (req_bank_idx),
|
||||
.ready_in (req_in_ready),
|
||||
.valid_out (gpr_rd_valid_n),
|
||||
.data_out (gpr_rd_addr_n),
|
||||
.sel_out (gpr_rd_req_idx_n),
|
||||
.valid_out (gpr_rd_valid),
|
||||
.data_out (gpr_rd_addr),
|
||||
.sel_out (gpr_rd_req_idx),
|
||||
.ready_out (gpr_rd_ready)
|
||||
);
|
||||
|
||||
assign gpr_rd_ready = {NUM_BANKS{stg_in_ready}};
|
||||
wire pipe_in_ready = pipe_ready_st1 || ~pipe_valid_st1;
|
||||
|
||||
assign gpr_rd_ready = {NUM_BANKS{pipe_in_ready}};
|
||||
|
||||
assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n;
|
||||
|
||||
wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1;
|
||||
wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2;
|
||||
|
||||
always @(*) begin
|
||||
has_collision_n = 0;
|
||||
|
@ -129,83 +142,82 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
always @(*) begin
|
||||
src_data_n = src_data;
|
||||
for (integer b = 0; b < NUM_BANKS; ++b) begin
|
||||
if (gpr_rd_valid[b]) begin
|
||||
src_data_n[gpr_rd_req_idx[b]] = gpr_rd_data[b];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire pipe_stall = pipe_out_valid && ~pipe_out_ready;
|
||||
assign pipe_in_ready = ~pipe_stall;
|
||||
|
||||
assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n;
|
||||
|
||||
wire stg_in_fire = stg_in_valid && stg_in_ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pipe_out_valid <= 0;
|
||||
gpr_rd_valid <= '0;
|
||||
data_fetched <= '0;
|
||||
src_data <= '0;
|
||||
data_fetched_n = data_fetched_st1;
|
||||
if (scoreboard_if.ready) begin
|
||||
data_fetched_n = '0;
|
||||
end else begin
|
||||
if (~pipe_stall) begin
|
||||
pipe_out_valid <= scoreboard_if.valid;
|
||||
gpr_rd_valid <= gpr_rd_valid_n;
|
||||
if (scoreboard_if.ready) begin
|
||||
data_fetched <= '0;
|
||||
end else begin
|
||||
data_fetched <= data_fetched | req_in_ready;
|
||||
end
|
||||
if (stg_in_fire) begin
|
||||
src_data <= '0;
|
||||
end else begin
|
||||
src_data <= src_data_n;
|
||||
end
|
||||
end
|
||||
end
|
||||
if (~pipe_stall) begin
|
||||
pipe_out_uuid <= scoreboard_if.data.uuid;
|
||||
pipe_out_data <= {
|
||||
scoreboard_if.data.wis,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.rd
|
||||
};
|
||||
has_collision <= has_collision_n;
|
||||
gpr_rd_addr <= gpr_rd_addr_n;
|
||||
gpr_rd_req_idx <= gpr_rd_req_idx_n;
|
||||
data_fetched_n = data_fetched_st1 | req_in_ready;
|
||||
end
|
||||
end
|
||||
|
||||
assign pipe_out_ready = stg_in_ready;
|
||||
assign stg_in_valid = pipe_out_valid && ~has_collision;
|
||||
assign pipe_data = {
|
||||
scoreboard_if.data.wis,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.rd,
|
||||
scoreboard_if.data.uuid
|
||||
};
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_SRC_REGS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)),
|
||||
.RESETW (1 + NUM_SRC_REGS)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (pipe_in_ready),
|
||||
.data_in ({scoreboard_if.valid, data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}),
|
||||
.data_out ({pipe_valid_st1, data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1})
|
||||
);
|
||||
|
||||
assign pipe_ready_st1 = pipe_ready_st2 || ~pipe_valid_st2;
|
||||
|
||||
assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_n;
|
||||
|
||||
wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1;
|
||||
|
||||
`RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_SRC_REGS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH),
|
||||
.RESETW (1 + NUM_SRC_REGS * REGS_DATAW)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (pipe2_reset),
|
||||
.enable (pipe_ready_st1),
|
||||
.data_in ({pipe_valid2_st1, src_data_st1, gpr_rd_valid_st1, gpr_rd_data_st1, pipe_data_st1, gpr_rd_req_idx_st1}),
|
||||
.data_out ({pipe_valid_st2, src_data_st2, gpr_rd_valid_st2, gpr_rd_data_st2, pipe_data_st2, gpr_rd_req_idx_st2})
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
src_data_n = src_data_st2;
|
||||
for (integer b = 0; b < NUM_BANKS; ++b) begin
|
||||
if (gpr_rd_valid_st2[b]) begin
|
||||
src_data_n[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
|
||||
.LUTRAM (1)
|
||||
) out_buffer (
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (stg_in_valid),
|
||||
.ready_in (stg_in_ready),
|
||||
.valid_in (pipe_valid_st2),
|
||||
.ready_in (pipe_ready_st2),
|
||||
.data_in ({
|
||||
pipe_out_uuid,
|
||||
pipe_out_data,
|
||||
pipe_data_st2,
|
||||
src_data_n[0],
|
||||
src_data_n[1],
|
||||
src_data_n[2]
|
||||
}),
|
||||
.data_out ({
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.wis,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.PC,
|
||||
|
@ -214,6 +226,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
operands_if.data.op_type,
|
||||
operands_if.data.op_args,
|
||||
operands_if.data.rd,
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.rs1_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs3_data
|
||||
|
@ -262,27 +275,24 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}};
|
||||
end
|
||||
|
||||
`ifdef GPR_RESET
|
||||
VX_dp_ram_rst #(
|
||||
`else
|
||||
VX_dp_ram #(
|
||||
`endif
|
||||
.DATAW (`XLEN * `NUM_THREADS),
|
||||
.DATAW (REGS_DATAW),
|
||||
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
|
||||
.WRENW (BYTEENW),
|
||||
`ifdef GPR_RESET
|
||||
.RESET_RAM (1),
|
||||
`endif
|
||||
.NO_RWCHECK (1)
|
||||
) gpr_ram (
|
||||
.clk (clk),
|
||||
`ifdef GPR_RESET
|
||||
.reset (reset),
|
||||
`endif
|
||||
.read (1'b1),
|
||||
.read (pipe_fire_st1),
|
||||
.wren (wren),
|
||||
.write (gpr_wr_enabled),
|
||||
.waddr (gpr_wr_addr),
|
||||
.wdata (writeback_if.data.data),
|
||||
.raddr (gpr_rd_addr[b]),
|
||||
.rdata (gpr_rd_data[b])
|
||||
.raddr (gpr_rd_addr_st1[b]),
|
||||
.rdata (gpr_rd_data_st1[b])
|
||||
);
|
||||
end
|
||||
|
||||
|
|
|
@ -383,16 +383,16 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
wire [`NUM_WARPS-1:0] pending_warp_empty;
|
||||
wire [`NUM_WARPS-1:0] pending_warp_alm_empty;
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
`RESET_RELAY_EX (pending_instr_reset, reset, `NUM_WARPS, `MAX_FANOUT);
|
||||
|
||||
`RESET_RELAY (pending_instr_reset, reset);
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (4096),
|
||||
.ALM_EMPTY (1)
|
||||
) counter (
|
||||
.clk (clk),
|
||||
.reset (pending_instr_reset),
|
||||
.reset (pending_instr_reset[i]),
|
||||
.incr (per_warp_incr[i]),
|
||||
.decr (commit_sched_if.committed_warps[i]),
|
||||
.empty (pending_warp_empty[i]),
|
||||
|
|
|
@ -179,7 +179,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
VX_gather_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_BUF (1)
|
||||
.OUT_BUF (3)
|
||||
) gather_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,7 +21,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
parameter TAG_WIDTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
@ -36,7 +36,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
input wire is_signed,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -45,25 +45,26 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
fflags_t [NUM_LANES-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][31:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FCVT),
|
||||
.DATA_IN_WIDTH(32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0)
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -94,7 +95,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
.enable (pe_enable),
|
||||
.frm (frm),
|
||||
.is_itof (is_itof),
|
||||
.is_signed (is_signed),
|
||||
.is_signed (is_signed),
|
||||
.dataa (pe_data_in[i][0 +: 32]),
|
||||
.result (pe_data_out[i][0 +: 32]),
|
||||
.fflags (pe_data_out[i][32 +: `FP_FLAGS_BITS])
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,7 +21,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
parameter TAG_WIDTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset,
|
||||
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
|
@ -31,10 +31,10 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
input wire [TAG_WIDTH-1:0] tag_in,
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -47,27 +47,28 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
`UNUSED_VAR (frm)
|
||||
|
||||
wire [NUM_LANES-1:0][2*32-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign data_in[i][0 +: 32] = dataa[i];
|
||||
assign data_in[i][32 +: 32] = datab[i];
|
||||
end
|
||||
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FDIV),
|
||||
.DATA_IN_WIDTH(2*32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0)
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -92,7 +93,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
fflags_t [NUM_LANES-1:0] per_lane_fflags;
|
||||
|
||||
`ifdef QUARTUS
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
acl_fdiv fdiv (
|
||||
.clk (clk),
|
||||
|
@ -103,8 +104,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
.q (pe_data_out[i][0 +: 32])
|
||||
);
|
||||
assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
assign has_fflags = 0;
|
||||
assign per_lane_fflags = 'x;
|
||||
`UNUSED_VAR (fflags_out)
|
||||
|
@ -131,21 +132,21 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
assign has_fflags = 1;
|
||||
assign per_lane_fflags = fflags_out;
|
||||
|
||||
`else
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
reg [63:0] r;
|
||||
`UNUSED_VAR (r)
|
||||
`UNUSED_VAR (r)
|
||||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
dpi_fdiv (
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]},
|
||||
frm,
|
||||
r,
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]},
|
||||
frm,
|
||||
r,
|
||||
f
|
||||
);
|
||||
end
|
||||
|
|
|
@ -98,7 +98,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
.DATA_IN_WIDTH(3*32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0)
|
||||
.PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -35,7 +35,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -44,15 +44,15 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
|
||||
wire [NUM_LANES-1:0][2*32-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
fflags_t [NUM_LANES-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
|
@ -60,15 +60,16 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
assign data_in[i][0 +: 32] = dataa[i];
|
||||
assign data_in[i][32 +: 32] = datab[i];
|
||||
end
|
||||
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FNCP),
|
||||
.DATA_IN_WIDTH(2*32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0)
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -97,8 +98,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (pe_enable),
|
||||
.frm (frm),
|
||||
.op_type (op_type),
|
||||
.frm (frm),
|
||||
.op_type (op_type),
|
||||
.dataa (pe_data_in[i][0 +: 32]),
|
||||
.datab (pe_data_in[i][32 +: 32]),
|
||||
.result (pe_data_out[i][0 +: 32]),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -18,10 +18,10 @@
|
|||
module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter NUM_PES = `UP(NUM_LANES /`FSQRT_PE_RATIO),
|
||||
parameter TAG_WIDTH = 1
|
||||
parameter TAG_WIDTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
@ -29,11 +29,11 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0] mask_in,
|
||||
|
||||
input wire [TAG_WIDTH-1:0] tag_in,
|
||||
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -46,22 +46,23 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
|
||||
`UNUSED_VAR (frm)
|
||||
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][31:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FSQRT),
|
||||
.DATA_IN_WIDTH(32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0)
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -83,10 +84,10 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
end
|
||||
|
||||
fflags_t [NUM_LANES-1:0] per_lane_fflags;
|
||||
fflags_t [NUM_LANES-1:0] per_lane_fflags;
|
||||
|
||||
`ifdef QUARTUS
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
acl_fsqrt fsqrt (
|
||||
.clk (clk),
|
||||
|
@ -105,7 +106,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
`elsif VIVADO
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
wire tuser;
|
||||
wire tuser;
|
||||
|
||||
xil_fsqrt fsqrt (
|
||||
.aclk (clk),
|
||||
|
@ -130,17 +131,17 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
`UNUSED_VAR (r)
|
||||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
dpi_fsqrt (
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i]},
|
||||
frm,
|
||||
r,
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i]},
|
||||
frm,
|
||||
r,
|
||||
f
|
||||
);
|
||||
end
|
||||
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (32 + $bits(fflags_t)),
|
||||
.DEPTH (`LATENCY_FSQRT)
|
||||
|
|
|
@ -81,12 +81,15 @@ module VX_avs_adapter #(
|
|||
assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i);
|
||||
end
|
||||
|
||||
`RESET_RELAY_EX (bank_reset, reset, NUM_BANKS, 1);
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (RD_QUEUE_SIZE)
|
||||
) pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (bank_reset[i]),
|
||||
.incr (req_queue_push[i]),
|
||||
.decr (req_queue_pop[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
|
@ -102,7 +105,7 @@ module VX_avs_adapter #(
|
|||
.DEPTH (RD_QUEUE_SIZE)
|
||||
) rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (bank_reset[i]),
|
||||
.push (req_queue_push[i]),
|
||||
.pop (req_queue_pop[i]),
|
||||
.data_in (mem_req_tag),
|
||||
|
@ -132,7 +135,7 @@ module VX_avs_adapter #(
|
|||
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF))
|
||||
) req_out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (bank_reset[i]),
|
||||
.valid_in (valid_out_w),
|
||||
.ready_in (ready_out_w),
|
||||
.data_in ({mem_req_rw, mem_req_byteen, req_bank_off, mem_req_data}),
|
||||
|
@ -168,12 +171,13 @@ module VX_avs_adapter #(
|
|||
wire [NUM_BANKS-1:0] rsp_queue_empty;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (DATA_WIDTH),
|
||||
.DEPTH (RD_QUEUE_SIZE)
|
||||
) rd_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (bank_reset[i]),
|
||||
.push (avs_readdatavalid[i]),
|
||||
.pop (req_queue_pop[i]),
|
||||
.data_in (avs_readdata[i]),
|
||||
|
@ -195,7 +199,7 @@ module VX_avs_adapter #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (DATA_WIDTH + TAG_WIDTH),
|
||||
.ARBITER ("R"),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (RSP_OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -15,10 +15,10 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_axi_adapter #(
|
||||
parameter DATA_WIDTH = 512,
|
||||
parameter DATA_WIDTH = 512,
|
||||
parameter ADDR_WIDTH = 32,
|
||||
parameter TAG_WIDTH = 8,
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter AVS_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)),
|
||||
parameter RSP_OUT_BUF = 0
|
||||
) (
|
||||
|
@ -34,13 +34,13 @@ module VX_axi_adapter #(
|
|||
input wire [TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire mem_req_ready,
|
||||
|
||||
// Vortex response
|
||||
output wire mem_rsp_valid,
|
||||
// Vortex response
|
||||
output wire mem_rsp_valid,
|
||||
output wire [DATA_WIDTH-1:0] mem_rsp_data,
|
||||
output wire [TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
input wire mem_rsp_ready,
|
||||
|
||||
// AXI write request address channel
|
||||
// AXI write request address channel
|
||||
output wire m_axi_awvalid [NUM_BANKS],
|
||||
input wire m_axi_awready [NUM_BANKS],
|
||||
output wire [ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS],
|
||||
|
@ -54,7 +54,7 @@ module VX_axi_adapter #(
|
|||
output wire [3:0] m_axi_awqos [NUM_BANKS],
|
||||
output wire [3:0] m_axi_awregion [NUM_BANKS],
|
||||
|
||||
// AXI write request data channel
|
||||
// AXI write request data channel
|
||||
output wire m_axi_wvalid [NUM_BANKS],
|
||||
input wire m_axi_wready [NUM_BANKS],
|
||||
output wire [DATA_WIDTH-1:0] m_axi_wdata [NUM_BANKS],
|
||||
|
@ -66,7 +66,7 @@ module VX_axi_adapter #(
|
|||
output wire m_axi_bready [NUM_BANKS],
|
||||
input wire [TAG_WIDTH-1:0] m_axi_bid [NUM_BANKS],
|
||||
input wire [1:0] m_axi_bresp [NUM_BANKS],
|
||||
|
||||
|
||||
// AXI read address channel
|
||||
output wire m_axi_arvalid [NUM_BANKS],
|
||||
input wire m_axi_arready [NUM_BANKS],
|
||||
|
@ -74,13 +74,13 @@ module VX_axi_adapter #(
|
|||
output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS],
|
||||
output wire [7:0] m_axi_arlen [NUM_BANKS],
|
||||
output wire [2:0] m_axi_arsize [NUM_BANKS],
|
||||
output wire [1:0] m_axi_arburst [NUM_BANKS],
|
||||
output wire [1:0] m_axi_arburst [NUM_BANKS],
|
||||
output wire [1:0] m_axi_arlock [NUM_BANKS],
|
||||
output wire [3:0] m_axi_arcache [NUM_BANKS],
|
||||
output wire [2:0] m_axi_arprot [NUM_BANKS],
|
||||
output wire [3:0] m_axi_arqos [NUM_BANKS],
|
||||
output wire [3:0] m_axi_arregion [NUM_BANKS],
|
||||
|
||||
|
||||
// AXI read response channel
|
||||
input wire m_axi_rvalid [NUM_BANKS],
|
||||
output wire m_axi_rready [NUM_BANKS],
|
||||
|
@ -88,15 +88,15 @@ module VX_axi_adapter #(
|
|||
input wire m_axi_rlast [NUM_BANKS],
|
||||
input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS],
|
||||
input wire [1:0] m_axi_rresp [NUM_BANKS]
|
||||
);
|
||||
);
|
||||
localparam AXSIZE = `CLOG2(DATA_WIDTH/8);
|
||||
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
|
||||
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
|
||||
localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS);
|
||||
|
||||
wire [BANK_ADDRW-1:0] req_bank_sel;
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
|
||||
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
|
||||
end else begin
|
||||
assign req_bank_sel = '0;
|
||||
end
|
||||
|
@ -108,12 +108,12 @@ module VX_axi_adapter #(
|
|||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i];
|
||||
wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i];
|
||||
wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i];
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
m_axi_aw_ack[i] <= 0;
|
||||
m_axi_w_ack[i] <= 0;
|
||||
end else begin
|
||||
end else begin
|
||||
if (mem_req_fire && (req_bank_sel == i)) begin
|
||||
m_axi_aw_ack[i] <= 0;
|
||||
m_axi_w_ack[i] <= 0;
|
||||
|
@ -127,10 +127,10 @@ module VX_axi_adapter #(
|
|||
end
|
||||
end
|
||||
|
||||
wire axi_write_ready [NUM_BANKS];
|
||||
wire axi_write_ready [NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i])
|
||||
assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i])
|
||||
&& (m_axi_wready[i] || m_axi_w_ack[i]);
|
||||
end
|
||||
|
||||
|
@ -141,17 +141,17 @@ module VX_axi_adapter #(
|
|||
assign mem_req_ready = mem_req_rw ? axi_write_ready[0] : m_axi_arready[0];
|
||||
end
|
||||
|
||||
// AXI write request address channel
|
||||
// AXI write request address channel
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i];
|
||||
assign m_axi_awaddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE;
|
||||
assign m_axi_awid[i] = mem_req_tag;
|
||||
assign m_axi_awlen[i] = 8'b00000000;
|
||||
assign m_axi_awlen[i] = 8'b00000000;
|
||||
assign m_axi_awsize[i] = 3'(AXSIZE);
|
||||
assign m_axi_awburst[i] = 2'b00;
|
||||
assign m_axi_awlock[i] = 2'b00;
|
||||
assign m_axi_awburst[i] = 2'b00;
|
||||
assign m_axi_awlock[i] = 2'b00;
|
||||
assign m_axi_awcache[i] = 4'b0000;
|
||||
assign m_axi_awprot[i] = 3'b000;
|
||||
assign m_axi_awprot[i] = 3'b000;
|
||||
assign m_axi_awqos[i] = 4'b0000;
|
||||
assign m_axi_awregion[i]= 4'b0000;
|
||||
end
|
||||
|
@ -170,31 +170,31 @@ module VX_axi_adapter #(
|
|||
`UNUSED_VAR (m_axi_bid[i])
|
||||
`UNUSED_VAR (m_axi_bresp[i])
|
||||
assign m_axi_bready[i] = 1'b1;
|
||||
`RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time));
|
||||
`RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time));
|
||||
end
|
||||
|
||||
// AXI read request channel
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i);
|
||||
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i);
|
||||
assign m_axi_araddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE;
|
||||
assign m_axi_arid[i] = mem_req_tag;
|
||||
assign m_axi_arlen[i] = 8'b00000000;
|
||||
assign m_axi_arsize[i] = 3'(AXSIZE);
|
||||
assign m_axi_arburst[i] = 2'b00;
|
||||
assign m_axi_arlock[i] = 2'b00;
|
||||
assign m_axi_arburst[i] = 2'b00;
|
||||
assign m_axi_arlock[i] = 2'b00;
|
||||
assign m_axi_arcache[i] = 4'b0000;
|
||||
assign m_axi_arprot[i] = 3'b000;
|
||||
assign m_axi_arqos[i] = 4'b0000;
|
||||
assign m_axi_arregion[i]= 4'b0000;
|
||||
end
|
||||
|
||||
// AXI read response channel
|
||||
// AXI read response channel
|
||||
|
||||
wire [NUM_BANKS-1:0] rsp_arb_valid_in;
|
||||
wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in;
|
||||
wire [NUM_BANKS-1:0] rsp_arb_ready_in;
|
||||
|
||||
`UNUSED_VAR (m_axi_rlast)
|
||||
`UNUSED_VAR (m_axi_rlast)
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign rsp_arb_valid_in[i] = m_axi_rvalid[i];
|
||||
|
@ -203,11 +203,11 @@ module VX_axi_adapter #(
|
|||
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time));
|
||||
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time));
|
||||
end
|
||||
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (DATA_WIDTH + TAG_WIDTH),
|
||||
.ARBITER ("R"),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (RSP_OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
|
|
|
@ -22,12 +22,16 @@ module VX_dp_ram #(
|
|||
parameter OUT_REG = 0,
|
||||
parameter NO_RWCHECK = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter RW_ASSERT = 0,
|
||||
parameter RESET_RAM = 0,
|
||||
parameter READ_ENABLE = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire read,
|
||||
input wire write,
|
||||
input wire [WRENW-1:0] wren,
|
||||
|
@ -50,44 +54,44 @@ module VX_dp_ram #(
|
|||
end \
|
||||
end
|
||||
|
||||
`UNUSED_PARAM (RW_ASSERT)
|
||||
`UNUSED_VAR (read)
|
||||
|
||||
if (WRENW > 1) begin
|
||||
`RUNTIME_ASSERT(~write || (| wren), ("invalid write enable mask"));
|
||||
end
|
||||
|
||||
wire [DATAW-1:0] rdata_w;
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
if (WRENW > 1) begin
|
||||
`ifdef QUARTUS
|
||||
if (LUTRAM != 0) begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
|
@ -97,37 +101,8 @@ module VX_dp_ram #(
|
|||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end
|
||||
end
|
||||
`else
|
||||
|
@ -135,35 +110,18 @@ module VX_dp_ram #(
|
|||
if (LUTRAM != 0) begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -172,37 +130,20 @@ module VX_dp_ram #(
|
|||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
@ -211,64 +152,36 @@ module VX_dp_ram #(
|
|||
if (LUTRAM != 0) begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
`else
|
||||
// RAM emulation
|
||||
// simulation
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
|
@ -277,39 +190,57 @@ module VX_dp_ram #(
|
|||
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
|
||||
end
|
||||
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_waddr;
|
||||
reg prev_write;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (RESET_RAM && reset) begin
|
||||
for (integer i = 0; i < SIZE; ++i) begin
|
||||
ram[i] <= DATAW'(INIT_VALUE);
|
||||
end
|
||||
end else begin
|
||||
if (write) begin
|
||||
ram[waddr] <= ram_n;
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_waddr;
|
||||
reg prev_write;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= ram_n;
|
||||
end
|
||||
prev_write <= (| wren);
|
||||
if (reset) begin
|
||||
prev_write <= 0;
|
||||
prev_data <= '0;
|
||||
prev_waddr <= '0;
|
||||
end else begin
|
||||
prev_write <= write;
|
||||
prev_data <= ram[waddr];
|
||||
prev_waddr <= waddr;
|
||||
end
|
||||
if (LUTRAM || !NO_RWCHECK) begin
|
||||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
`UNUSED_VAR (prev_waddr)
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
end
|
||||
|
||||
if (LUTRAM || !NO_RWCHECK) begin
|
||||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
`UNUSED_VAR (prev_waddr)
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
if (RW_ASSERT) begin
|
||||
`RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("read after write hazard"));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (READ_ENABLE && reset) begin
|
||||
rdata_r <= '0;
|
||||
end else if (!READ_ENABLE || read) begin
|
||||
rdata_r <= rdata_w;
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
assign rdata = rdata_w;
|
||||
end
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,115 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_dp_ram_rst #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter ADDR_MIN = 0,
|
||||
parameter WRENW = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter NO_RWCHECK = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire read,
|
||||
input wire write,
|
||||
input wire [WRENW-1:0] wren,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire [ADDRW-1:0] raddr,
|
||||
output wire [DATAW-1:0] rdata
|
||||
);
|
||||
localparam WSELW = DATAW / WRENW;
|
||||
`STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter"))
|
||||
|
||||
`define RAM_INITIALIZATION \
|
||||
if (INIT_ENABLE != 0) begin \
|
||||
if (INIT_FILE != "") begin \
|
||||
initial $readmemh(INIT_FILE, ram); \
|
||||
end else begin \
|
||||
initial \
|
||||
for (integer i = 0; i < SIZE; ++i) \
|
||||
ram[i] = INIT_VALUE; \
|
||||
end \
|
||||
end
|
||||
|
||||
`UNUSED_VAR (read)
|
||||
|
||||
// RAM emulation
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
wire [DATAW-1:0] ram_n;
|
||||
for (genvar i = 0; i < WRENW; ++i) begin
|
||||
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
|
||||
end
|
||||
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer i = 0; i < SIZE; ++i) begin
|
||||
ram[i] <= DATAW'(INIT_VALUE);
|
||||
end
|
||||
rdata_r <= '0;
|
||||
end else begin
|
||||
if (write) begin
|
||||
ram[waddr] <= ram_n;
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_waddr;
|
||||
reg prev_write;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer i = 0; i < SIZE; ++i) begin
|
||||
ram[i] <= DATAW'(INIT_VALUE);
|
||||
end
|
||||
prev_write <= 0;
|
||||
prev_data <= '0;
|
||||
prev_waddr <= '0;
|
||||
end else begin
|
||||
if (write) begin
|
||||
ram[waddr] <= ram_n;
|
||||
end
|
||||
prev_write <= (| wren);
|
||||
prev_data <= ram[waddr];
|
||||
prev_waddr <= waddr;
|
||||
end
|
||||
end
|
||||
if (LUTRAM || !NO_RWCHECK) begin
|
||||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
`UNUSED_VAR (prev_waddr)
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -18,7 +18,8 @@ module VX_elastic_buffer #(
|
|||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter LUTRAM = 0
|
||||
parameter LUTRAM = 0,
|
||||
parameter MAX_FANOUT = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -40,6 +41,43 @@ module VX_elastic_buffer #(
|
|||
assign data_out = data_in;
|
||||
assign ready_in = ready_out;
|
||||
|
||||
end else if (MAX_FANOUT != 0 && (DATAW > (MAX_FANOUT + MAX_FANOUT/2))) begin
|
||||
|
||||
localparam NUM_SLICES = `CDIV(DATAW, MAX_FANOUT);
|
||||
localparam N_DATAW = DATAW / NUM_SLICES;
|
||||
|
||||
for (genvar i = 0; i < NUM_SLICES; ++i) begin
|
||||
|
||||
localparam S_DATAW = (i == NUM_SLICES-1) ? (DATAW - i * N_DATAW) : N_DATAW;
|
||||
|
||||
wire valid_out_t, ready_in_t;
|
||||
`UNUSED_VAR (valid_out_t)
|
||||
`UNUSED_VAR (ready_in_t)
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (S_DATAW),
|
||||
.SIZE (SIZE),
|
||||
.OUT_REG (OUT_REG),
|
||||
.LUTRAM (LUTRAM)
|
||||
) buffer_slice (
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
.valid_in (valid_in),
|
||||
.data_in (data_in[i * N_DATAW +: S_DATAW]),
|
||||
.ready_in (ready_in_t),
|
||||
.valid_out (valid_out_t),
|
||||
.data_out (data_out[i * N_DATAW +: S_DATAW]),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
if (i == 0) begin
|
||||
assign ready_in = ready_in_t;
|
||||
assign valid_out = valid_out_t;
|
||||
end
|
||||
end
|
||||
|
||||
end else if (SIZE == 1) begin
|
||||
|
||||
VX_pipe_buffer #(
|
||||
|
@ -103,9 +141,9 @@ module VX_elastic_buffer #(
|
|||
|
||||
assign ready_in = ~full;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE ((OUT_REG == 2) ? 1 : 0)
|
||||
.DEPTH ((OUT_REG > 0) ? (OUT_REG-1) : 0)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -38,17 +38,16 @@ module VX_fair_arbiter #(
|
|||
|
||||
end else begin
|
||||
|
||||
reg [NUM_REQS-1:0] grant_mask;
|
||||
reg [NUM_REQS-1:0] requests_r;
|
||||
|
||||
wire [NUM_REQS-1:0] requests_rem = requests & ~grant_mask;
|
||||
wire rem_valid = (| requests_rem);
|
||||
wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_rem : requests;
|
||||
wire [NUM_REQS-1:0] requests_sel = requests_r & requests;
|
||||
wire [NUM_REQS-1:0] requests_qual = (| requests_sel) ? requests_sel : requests;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
grant_mask <= '0;
|
||||
requests_r <= '0;
|
||||
end else if (grant_ready) begin
|
||||
grant_mask <= rem_valid ? (grant_mask | grant_onehot) : grant_onehot;
|
||||
requests_r <= requests_qual & ~grant_onehot;
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -177,10 +177,11 @@ module VX_fifo_queue #(
|
|||
.SIZE (DEPTH),
|
||||
.LUTRAM (LUTRAM)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (wr_ptr_r),
|
||||
.wdata (data_in),
|
||||
.raddr (rd_ptr_r),
|
||||
|
@ -226,9 +227,10 @@ module VX_fifo_queue #(
|
|||
.LUTRAM (LUTRAM)
|
||||
) dp_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (wr_ptr_r),
|
||||
.wdata (data_in),
|
||||
.raddr (rd_ptr_n_r),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,10 +17,10 @@
|
|||
module VX_find_first #(
|
||||
parameter N = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter REVERSE = 0
|
||||
parameter REVERSE = 0
|
||||
) (
|
||||
input wire [N-1:0][DATAW-1:0] data_in,
|
||||
input wire [N-1:0] valid_in,
|
||||
input wire [N-1:0] valid_in,
|
||||
output wire [DATAW-1:0] data_out,
|
||||
output wire valid_out
|
||||
);
|
||||
|
@ -37,10 +37,12 @@ module VX_find_first #(
|
|||
assign s_n[TL+i] = REVERSE ? valid_in[N-1-i] : valid_in[i];
|
||||
assign d_n[TL+i] = REVERSE ? data_in[N-1-i] : data_in[i];
|
||||
end
|
||||
|
||||
for (genvar i = TL+N; i < TN; ++i) begin
|
||||
assign s_n[i] = 0;
|
||||
assign d_n[i] = '0;
|
||||
|
||||
if (TL < (TN-N)) begin
|
||||
for (genvar i = TL+N; i < TN; ++i) begin
|
||||
assign s_n[i] = 0;
|
||||
assign d_n[i] = '0;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar j = 0; j < LOGN; ++j) begin
|
||||
|
@ -48,10 +50,10 @@ module VX_find_first #(
|
|||
assign s_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] | s_n[2**(j+1)-1+i*2+1];
|
||||
assign d_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] ? d_n[2**(j+1)-1+i*2] : d_n[2**(j+1)-1+i*2+1];
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
assign valid_out = s_n[0];
|
||||
assign data_out = d_n[0];
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -24,17 +24,17 @@ module VX_index_buffer #(
|
|||
input wire reset,
|
||||
|
||||
output wire [ADDRW-1:0] write_addr,
|
||||
input wire [DATAW-1:0] write_data,
|
||||
input wire [DATAW-1:0] write_data,
|
||||
input wire acquire_en,
|
||||
|
||||
input wire [ADDRW-1:0] read_addr,
|
||||
output wire [DATAW-1:0] read_data,
|
||||
input wire release_en,
|
||||
|
||||
|
||||
output wire empty,
|
||||
output wire full
|
||||
output wire full
|
||||
);
|
||||
|
||||
|
||||
VX_allocator #(
|
||||
.SIZE (SIZE)
|
||||
) allocator (
|
||||
|
@ -43,9 +43,9 @@ module VX_index_buffer #(
|
|||
.acquire_en (acquire_en),
|
||||
.acquire_addr (write_addr),
|
||||
.release_en (release_en),
|
||||
.release_addr (read_addr),
|
||||
.release_addr (read_addr),
|
||||
.empty (empty),
|
||||
.full (full)
|
||||
.full (full)
|
||||
);
|
||||
|
||||
VX_dp_ram #(
|
||||
|
@ -54,14 +54,15 @@ module VX_index_buffer #(
|
|||
.LUTRAM (LUTRAM)
|
||||
) data_table (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (acquire_en),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (write_addr),
|
||||
.wdata (write_data),
|
||||
.raddr (read_addr),
|
||||
.rdata (read_data)
|
||||
);
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -15,10 +15,10 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_mem_adapter #(
|
||||
parameter SRC_DATA_WIDTH = 1,
|
||||
parameter SRC_ADDR_WIDTH = 1,
|
||||
parameter DST_DATA_WIDTH = 1,
|
||||
parameter DST_ADDR_WIDTH = 1,
|
||||
parameter SRC_DATA_WIDTH = 1,
|
||||
parameter SRC_ADDR_WIDTH = 1,
|
||||
parameter DST_DATA_WIDTH = 1,
|
||||
parameter DST_ADDR_WIDTH = 1,
|
||||
parameter SRC_TAG_WIDTH = 1,
|
||||
parameter DST_TAG_WIDTH = 1,
|
||||
parameter REQ_OUT_BUF = 0,
|
||||
|
@ -35,9 +35,9 @@ module VX_mem_adapter #(
|
|||
input wire [SRC_TAG_WIDTH-1:0] mem_req_tag_in,
|
||||
output wire mem_req_ready_in,
|
||||
|
||||
output wire mem_rsp_valid_in,
|
||||
output wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in,
|
||||
output wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in,
|
||||
output wire mem_rsp_valid_in,
|
||||
output wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in,
|
||||
output wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in,
|
||||
input wire mem_rsp_ready_in,
|
||||
|
||||
output wire mem_req_valid_out,
|
||||
|
@ -48,12 +48,12 @@ module VX_mem_adapter #(
|
|||
output wire [DST_TAG_WIDTH-1:0] mem_req_tag_out,
|
||||
input wire mem_req_ready_out,
|
||||
|
||||
input wire mem_rsp_valid_out,
|
||||
input wire [DST_DATA_WIDTH-1:0] mem_rsp_data_out,
|
||||
input wire mem_rsp_valid_out,
|
||||
input wire [DST_DATA_WIDTH-1:0] mem_rsp_data_out,
|
||||
input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_out,
|
||||
output wire mem_rsp_ready_out
|
||||
);
|
||||
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
|
||||
);
|
||||
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
|
||||
|
||||
localparam DST_DATA_SIZE = (DST_DATA_WIDTH / 8);
|
||||
localparam DST_LDATAW = `CLOG2(DST_DATA_WIDTH);
|
||||
|
@ -69,7 +69,7 @@ module VX_mem_adapter #(
|
|||
wire [DST_TAG_WIDTH-1:0] mem_req_tag_out_w;
|
||||
wire mem_req_ready_out_w;
|
||||
|
||||
wire mem_rsp_valid_in_w;
|
||||
wire mem_rsp_valid_in_w;
|
||||
wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in_w;
|
||||
wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in_w;
|
||||
wire mem_rsp_ready_in_w;
|
||||
|
@ -80,7 +80,7 @@ module VX_mem_adapter #(
|
|||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
|
||||
wire [D-1:0] req_idx = mem_req_addr_in[D-1:0];
|
||||
wire [D-1:0] rsp_idx = mem_rsp_tag_out[D-1:0];
|
||||
|
||||
|
@ -99,31 +99,31 @@ module VX_mem_adapter #(
|
|||
|
||||
assign mem_req_valid_out_w = mem_req_valid_in;
|
||||
assign mem_req_rw_out_w = mem_req_rw_in;
|
||||
assign mem_req_byteen_out_w = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3));
|
||||
assign mem_req_byteen_out_w = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3));
|
||||
assign mem_req_data_out_w = DST_DATA_WIDTH'(mem_req_data_in) << ((DST_LDATAW'(req_idx)) << SRC_LDATAW);
|
||||
assign mem_req_tag_out_w = DST_TAG_WIDTH'({mem_req_tag_in, req_idx});
|
||||
assign mem_req_ready_in = mem_req_ready_out_w;
|
||||
|
||||
assign mem_rsp_valid_in_w = mem_rsp_valid_out;
|
||||
assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx];
|
||||
assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx];
|
||||
assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]);
|
||||
assign mem_rsp_ready_out = mem_rsp_ready_in_w;
|
||||
|
||||
end else if (DST_LDATAW < SRC_LDATAW) begin
|
||||
|
||||
|
||||
reg [D-1:0] req_ctr, rsp_ctr;
|
||||
|
||||
reg [P-1:0][DST_DATA_WIDTH-1:0] mem_rsp_data_out_r, mem_rsp_data_out_n;
|
||||
|
||||
wire mem_req_out_fire = mem_req_valid_out && mem_req_ready_out;
|
||||
wire mem_rsp_in_fire = mem_rsp_valid_out && mem_rsp_ready_out;
|
||||
wire mem_rsp_in_fire = mem_rsp_valid_out && mem_rsp_ready_out;
|
||||
|
||||
wire [P-1:0][DST_DATA_WIDTH-1:0] mem_req_data_in_w = mem_req_data_in;
|
||||
wire [P-1:0][DST_DATA_SIZE-1:0] mem_req_byteen_in_w = mem_req_byteen_in;
|
||||
|
||||
always @(*) begin
|
||||
mem_rsp_data_out_n = mem_rsp_data_out_r;
|
||||
if (mem_rsp_in_fire) begin
|
||||
if (mem_rsp_in_fire) begin
|
||||
mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_out;
|
||||
end
|
||||
end
|
||||
|
@ -139,24 +139,24 @@ module VX_mem_adapter #(
|
|||
if (mem_rsp_in_fire) begin
|
||||
rsp_ctr <= rsp_ctr + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
mem_rsp_data_out_r <= mem_rsp_data_out_n;
|
||||
end
|
||||
|
||||
reg [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_r;
|
||||
wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_x;
|
||||
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (mem_rsp_in_fire) begin
|
||||
mem_rsp_tag_in_r <= mem_rsp_tag_out;
|
||||
end
|
||||
end
|
||||
end
|
||||
assign mem_rsp_tag_in_x = (rsp_ctr != 0) ? mem_rsp_tag_in_r : mem_rsp_tag_out;
|
||||
`RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_x == mem_rsp_tag_out),
|
||||
`RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_x == mem_rsp_tag_out),
|
||||
("%t: *** out-of-order memory reponse! cur=%d, expected=%d", $time, mem_rsp_tag_in_x, mem_rsp_tag_out))
|
||||
|
||||
wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr};
|
||||
|
||||
|
||||
if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin
|
||||
`UNUSED_VAR (mem_req_addr_in_qual)
|
||||
assign mem_req_addr_out_w = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0];
|
||||
|
@ -181,8 +181,8 @@ module VX_mem_adapter #(
|
|||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin
|
||||
`UNUSED_VAR (mem_req_addr_in)
|
||||
assign mem_req_addr_out_w = mem_req_addr_in[DST_ADDR_WIDTH-1:0];
|
||||
|
|
|
@ -87,16 +87,16 @@ module VX_mem_coalescer #(
|
|||
localparam STATE_SETUP = 0;
|
||||
localparam STATE_SEND = 1;
|
||||
|
||||
reg state_r, state_n;
|
||||
logic state_r, state_n;
|
||||
|
||||
reg out_req_valid_r, out_req_valid_n;
|
||||
reg out_req_rw_r, out_req_rw_n;
|
||||
reg [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
|
||||
reg [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
|
||||
reg [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
|
||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
|
||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
|
||||
reg [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
|
||||
logic out_req_valid_r, out_req_valid_n;
|
||||
logic out_req_rw_r, out_req_rw_n;
|
||||
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
|
||||
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
|
||||
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
|
||||
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
|
||||
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
|
||||
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
|
||||
|
||||
reg in_req_ready_n;
|
||||
|
||||
|
@ -135,7 +135,11 @@ module VX_mem_coalescer #(
|
|||
`UNUSED_PIN (onehot),
|
||||
.valid_out (batch_valid_n[i])
|
||||
);
|
||||
assign seed_idx[i] = NUM_REQS_W'(i * DATA_RATIO) + NUM_REQS_W'(batch_idx);
|
||||
if (OUT_REQS > 1) begin
|
||||
assign seed_idx[i] = {(NUM_REQS_W-DATA_RATIO_W)'(i), batch_idx};
|
||||
end else begin
|
||||
assign seed_idx[i] = batch_idx;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
|
@ -149,29 +153,6 @@ module VX_mem_coalescer #(
|
|||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state_r <= STATE_SETUP;
|
||||
processed_mask_r <= '0;
|
||||
out_req_valid_r <= 0;
|
||||
end else begin
|
||||
state_r <= state_n;
|
||||
batch_valid_r <= batch_valid_n;
|
||||
seed_addr_r <= seed_addr_n;
|
||||
seed_atype_r <= seed_atype_n;
|
||||
addr_matches_r <= addr_matches_n;
|
||||
out_req_valid_r <= out_req_valid_n;
|
||||
out_req_mask_r <= out_req_mask_n;
|
||||
out_req_rw_r <= out_req_rw_n;
|
||||
out_req_addr_r <= out_req_addr_n;
|
||||
out_req_atype_r <= out_req_atype_n;
|
||||
out_req_byteen_r <= out_req_byteen_n;
|
||||
out_req_data_r <= out_req_data_n;
|
||||
out_req_tag_r <= out_req_tag_n;
|
||||
processed_mask_r <= processed_mask_n;
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r;
|
||||
|
||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged;
|
||||
|
@ -248,6 +229,17 @@ module VX_mem_coalescer #(
|
|||
endcase
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + ATYPE_WIDTH + OUT_ADDR_WIDTH + ATYPE_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
|
||||
.RESETW (1 + NUM_REQS + 1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_atype_n, out_req_addr_n, out_req_atype_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}),
|
||||
.data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_atype_r, out_req_addr_r, out_req_atype_r, out_req_byteen_r, out_req_data_r, out_req_tag_r})
|
||||
);
|
||||
|
||||
wire out_rsp_fire = out_rsp_valid && out_rsp_ready;
|
||||
|
||||
wire out_rsp_eop;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -23,13 +23,13 @@ module VX_onehot_encoder #(
|
|||
parameter MODEL = 1,
|
||||
parameter LN = `LOG2UP(N)
|
||||
) (
|
||||
input wire [N-1:0] data_in,
|
||||
input wire [N-1:0] data_in,
|
||||
output wire [LN-1:0] data_out,
|
||||
output wire valid_out
|
||||
);
|
||||
);
|
||||
if (N == 1) begin
|
||||
|
||||
assign data_out = data_in;
|
||||
assign data_out = 0;
|
||||
assign valid_out = data_in;
|
||||
|
||||
end else if (N == 2) begin
|
||||
|
@ -37,43 +37,43 @@ module VX_onehot_encoder #(
|
|||
assign data_out = data_in[!REVERSE];
|
||||
assign valid_out = (| data_in);
|
||||
|
||||
end else if (MODEL == 1) begin
|
||||
localparam M = 1 << LN;
|
||||
`IGNORE_UNOPTFLAT_BEGIN
|
||||
end else if (MODEL == 1) begin
|
||||
localparam M = 1 << LN;
|
||||
`IGNORE_UNOPTFLAT_BEGIN
|
||||
wire [LN-1:0][M-1:0] addr;
|
||||
wire [LN:0][M-1:0] v;
|
||||
`IGNORE_UNOPTFLAT_END
|
||||
|
||||
|
||||
// base case, also handle padding for non-power of two inputs
|
||||
assign v[0] = REVERSE ? (M'(data_in) << (M - N)) : M'(data_in);
|
||||
|
||||
|
||||
for (genvar lvl = 1; lvl < (LN+1); ++lvl) begin
|
||||
localparam SN = 1 << (LN - lvl);
|
||||
localparam SI = M / SN;
|
||||
localparam SW = lvl;
|
||||
|
||||
|
||||
for (genvar s = 0; s < SN; ++s) begin
|
||||
`IGNORE_UNOPTFLAT_BEGIN
|
||||
wire [1:0] vs = {v[lvl-1][s*SI+(SI>>1)], v[lvl-1][s*SI]};
|
||||
`IGNORE_UNOPTFLAT_END
|
||||
|
||||
|
||||
assign v[lvl][s*SI] = (| vs);
|
||||
|
||||
if (lvl == 1) begin
|
||||
assign addr[lvl-1][s*SI +: SW] = vs[!REVERSE];
|
||||
assign addr[lvl-1][s*SI +: SW] = vs[!REVERSE];
|
||||
end else begin
|
||||
assign addr[lvl-1][s*SI +: SW] = {
|
||||
assign addr[lvl-1][s*SI +: SW] = {
|
||||
vs[!REVERSE],
|
||||
addr[lvl-2][s*SI +: SW-1] | addr[lvl-2][s*SI+(SI>>1) +: SW-1]
|
||||
};
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = addr[LN-1][LN-1:0];
|
||||
assign valid_out = v[LN][0];
|
||||
|
||||
end else if (MODEL == 2 && REVERSE == 0) begin
|
||||
end else if (MODEL == 2 && REVERSE == 0) begin
|
||||
|
||||
for (genvar j = 0; j < LN; ++j) begin
|
||||
wire [N-1:0] mask;
|
||||
|
@ -90,19 +90,19 @@ module VX_onehot_encoder #(
|
|||
reg [LN-1:0] index_r;
|
||||
|
||||
if (REVERSE != 0) begin
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
for (integer i = N-1; i >= 0; --i) begin
|
||||
if (data_in[i]) begin
|
||||
if (data_in[i]) begin
|
||||
index_r = LN'(N-1-i);
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
for (integer i = 0; i < N; ++i) begin
|
||||
if (data_in[i]) begin
|
||||
if (data_in[i]) begin
|
||||
index_r = LN'(i);
|
||||
end
|
||||
end
|
||||
|
|
|
@ -17,7 +17,8 @@
|
|||
module VX_onehot_mux #(
|
||||
parameter DATAW = 1,
|
||||
parameter N = 1,
|
||||
parameter MODEL = 1
|
||||
parameter MODEL = 1,
|
||||
parameter LUT_OPT = 0
|
||||
) (
|
||||
input wire [N-1:0][DATAW-1:0] data_in,
|
||||
input wire [N-1:0] sel_in,
|
||||
|
@ -26,6 +27,90 @@ module VX_onehot_mux #(
|
|||
if (N == 1) begin
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = data_in;
|
||||
end else if (LUT_OPT && N == 2) begin
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = sel_in[0] ? data_in[0] : data_in[1];
|
||||
end else if (LUT_OPT && N == 3) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
3'b001: data_out_r = data_in[0];
|
||||
3'b010: data_out_r = data_in[1];
|
||||
3'b100: data_out_r = data_in[2];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 4) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
4'b0001: data_out_r = data_in[0];
|
||||
4'b0010: data_out_r = data_in[1];
|
||||
4'b0100: data_out_r = data_in[2];
|
||||
4'b1000: data_out_r = data_in[3];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 5) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
5'b00001: data_out_r = data_in[0];
|
||||
5'b00010: data_out_r = data_in[1];
|
||||
5'b00100: data_out_r = data_in[2];
|
||||
5'b01000: data_out_r = data_in[3];
|
||||
5'b10000: data_out_r = data_in[4];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 6) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
6'b000001: data_out_r = data_in[0];
|
||||
6'b000010: data_out_r = data_in[1];
|
||||
6'b000100: data_out_r = data_in[2];
|
||||
6'b001000: data_out_r = data_in[3];
|
||||
6'b010000: data_out_r = data_in[4];
|
||||
6'b100000: data_out_r = data_in[5];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 7) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
7'b0000001: data_out_r = data_in[0];
|
||||
7'b0000010: data_out_r = data_in[1];
|
||||
7'b0000100: data_out_r = data_in[2];
|
||||
7'b0001000: data_out_r = data_in[3];
|
||||
7'b0010000: data_out_r = data_in[4];
|
||||
7'b0100000: data_out_r = data_in[5];
|
||||
7'b1000000: data_out_r = data_in[6];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 8) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
8'b00000001: data_out_r = data_in[0];
|
||||
8'b00000010: data_out_r = data_in[1];
|
||||
8'b00000100: data_out_r = data_in[2];
|
||||
8'b00001000: data_out_r = data_in[3];
|
||||
8'b00010000: data_out_r = data_in[4];
|
||||
8'b00100000: data_out_r = data_in[5];
|
||||
8'b01000000: data_out_r = data_in[6];
|
||||
8'b10000000: data_out_r = data_in[7];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (MODEL == 1) begin
|
||||
wire [N-1:0][DATAW-1:0] mask;
|
||||
for (genvar i = 0; i < N; ++i) begin
|
||||
|
|
|
@ -21,7 +21,8 @@ module VX_pe_serializer #(
|
|||
parameter DATA_IN_WIDTH = 1,
|
||||
parameter DATA_OUT_WIDTH = 1,
|
||||
parameter TAG_WIDTH = 0,
|
||||
parameter PE_REG = 0
|
||||
parameter PE_REG = 0,
|
||||
parameter OUT_BUF = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -43,6 +44,11 @@ module VX_pe_serializer #(
|
|||
output wire [TAG_WIDTH-1:0] tag_out,
|
||||
input wire ready_out
|
||||
);
|
||||
wire valid_out_u;
|
||||
wire [NUM_LANES-1:0][DATA_OUT_WIDTH-1:0] data_out_u;
|
||||
wire [TAG_WIDTH-1:0] tag_out_u;
|
||||
wire ready_out_u;
|
||||
|
||||
wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s;
|
||||
wire valid_out_s;
|
||||
wire [TAG_WIDTH-1:0] tag_out_s;
|
||||
|
@ -105,7 +111,7 @@ module VX_pe_serializer #(
|
|||
reg [TAG_WIDTH-1:0] tag_out_r;
|
||||
|
||||
wire valid_out_b = valid_out_s && batch_out_done;
|
||||
wire ready_out_b = ready_out || ~valid_out;
|
||||
wire ready_out_b = ready_out_u || ~valid_out_u;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -119,29 +125,42 @@ module VX_pe_serializer #(
|
|||
end
|
||||
end
|
||||
|
||||
assign enable = ready_out_b || ~valid_out_b;
|
||||
assign ready_in = enable && batch_in_done;
|
||||
assign enable = ready_out_b || ~valid_out_b;
|
||||
assign ready_in = enable && batch_in_done;
|
||||
assign pe_enable = enable;
|
||||
|
||||
assign pe_enable = enable;
|
||||
|
||||
assign valid_out = valid_out_r;
|
||||
assign data_out = data_out_r;
|
||||
assign tag_out = tag_out_r;
|
||||
assign valid_out_u = valid_out_r;
|
||||
assign data_out_u = data_out_r;
|
||||
assign tag_out_u = tag_out_r;
|
||||
|
||||
end else begin
|
||||
|
||||
assign pe_data_in_s = data_in;
|
||||
|
||||
assign enable = ready_out || ~valid_out;
|
||||
assign ready_in = enable;
|
||||
assign enable = ready_out_u || ~valid_out_u;
|
||||
assign ready_in = enable;
|
||||
assign pe_enable = enable;
|
||||
|
||||
assign pe_enable = enable;
|
||||
|
||||
assign valid_out = valid_out_s;
|
||||
assign data_out = pe_data_out;
|
||||
assign tag_out = tag_out_s;
|
||||
assign valid_out_u = valid_out_s;
|
||||
assign data_out_u = pe_data_out;
|
||||
assign tag_out_u = tag_out_s;
|
||||
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (NUM_LANES * DATA_OUT_WIDTH + TAG_WIDTH),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_out_u),
|
||||
.ready_in (ready_out_u),
|
||||
.data_in ({data_out_u, tag_out_u}),
|
||||
.data_out ({data_out, tag_out}),
|
||||
.valid_out (valid_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
// Copyright 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -24,39 +24,53 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_pipe_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter PASSTHRU = 0
|
||||
) (
|
||||
parameter DATAW = 1,
|
||||
parameter DEPTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
output wire ready_in,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [DATAW-1:0] data_out,
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
if (PASSTHRU != 0) begin
|
||||
);
|
||||
if (DEPTH == 0) begin
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
assign ready_in = ready_out;
|
||||
assign valid_out = valid_in;
|
||||
assign valid_out = valid_in;
|
||||
assign data_out = data_in;
|
||||
end else begin
|
||||
wire stall = valid_out && ~ready_out;
|
||||
wire [DEPTH:0] valid;
|
||||
`IGNORE_UNOPTFLAT_BEGIN
|
||||
wire [DEPTH:0] ready;
|
||||
`IGNORE_UNOPTFLAT_END
|
||||
wire [DEPTH:0][DATAW-1:0] data;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + DATAW),
|
||||
.RESETW (1)
|
||||
) pipe_register (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in, data_in}),
|
||||
.data_out ({valid_out, data_out})
|
||||
);
|
||||
assign valid[0] = valid_in;
|
||||
assign data[0] = data_in;
|
||||
assign ready_in = ready[0];
|
||||
|
||||
for (genvar i = 0; i < DEPTH; ++i) begin
|
||||
assign ready[i] = (ready[i+1] || ~valid[i+1]);
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + DATAW),
|
||||
.RESETW (1)
|
||||
) pipe_register (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (ready[i]),
|
||||
.data_in ({valid[i], data[i]}),
|
||||
.data_out ({valid[i+1], data[i+1]})
|
||||
);
|
||||
end
|
||||
|
||||
assign valid_out = valid[DEPTH];
|
||||
assign data_out = data[DEPTH];
|
||||
assign ready[DEPTH] = ready_out;
|
||||
|
||||
assign ready_in = ~stall;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,10 +14,11 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_pipe_register #(
|
||||
parameter DATAW = 1,
|
||||
parameter RESETW = 0,
|
||||
parameter DEPTH = 1
|
||||
module VX_pipe_register #(
|
||||
parameter DATAW = 1,
|
||||
parameter RESETW = 0,
|
||||
parameter DEPTH = 1,
|
||||
parameter MAX_FANOUT = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -25,54 +26,76 @@ module VX_pipe_register #(
|
|||
input wire [DATAW-1:0] data_in,
|
||||
output wire [DATAW-1:0] data_out
|
||||
);
|
||||
if (DEPTH == 0) begin
|
||||
if (DEPTH == 0) begin
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (enable)
|
||||
assign data_out = data_in;
|
||||
end else if (DEPTH == 1) begin
|
||||
if (RESETW == 0) begin
|
||||
`UNUSED_VAR (reset)
|
||||
reg [DATAW-1:0] value;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
value <= data_in;
|
||||
end
|
||||
assign data_out = data_in;
|
||||
end else if (DEPTH == 1) begin
|
||||
if (MAX_FANOUT != 0 && (DATAW > (MAX_FANOUT + MAX_FANOUT/2))) begin
|
||||
localparam NUM_SLICES = `CDIV(DATAW, MAX_FANOUT);
|
||||
localparam N_DATAW = DATAW / NUM_SLICES;
|
||||
for (genvar i = 0; i < NUM_SLICES; ++i) begin
|
||||
localparam SLICE_START = i * N_DATAW;
|
||||
localparam SLICE_END = SLICE_START + S_DATAW - 1;
|
||||
localparam S_DATAW = (i == NUM_SLICES-1) ? (DATAW - SLICE_START) : N_DATAW;
|
||||
localparam S_RESETW = (SLICE_END >= (DATAW - RESETW)) ?
|
||||
((SLICE_START >= (DATAW - RESETW)) ? S_DATAW : (SLICE_END - (DATAW - RESETW) + 1)) : 0;
|
||||
VX_pipe_register #(
|
||||
.DATAW (S_DATAW),
|
||||
.RESETW (S_RESETW)
|
||||
) pipe_register_slice (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in (data_in[i * N_DATAW +: S_DATAW]),
|
||||
.data_out (data_out[i * N_DATAW +: S_DATAW])
|
||||
);
|
||||
end
|
||||
assign data_out = value;
|
||||
end else if (RESETW == DATAW) begin
|
||||
reg [DATAW-1:0] value;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
value <= RESETW'(0);
|
||||
end else if (enable) begin
|
||||
value <= data_in;
|
||||
end
|
||||
end
|
||||
assign data_out = value;
|
||||
end else begin
|
||||
reg [DATAW-RESETW-1:0] value_d;
|
||||
reg [RESETW-1:0] value_r;
|
||||
if (RESETW == 0) begin
|
||||
`UNUSED_VAR (reset)
|
||||
reg [DATAW-1:0] value;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
value_r <= RESETW'(0);
|
||||
end else if (enable) begin
|
||||
value_r <= data_in[DATAW-1:DATAW-RESETW];
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
value <= data_in;
|
||||
end
|
||||
end
|
||||
assign data_out = value;
|
||||
end else if (RESETW == DATAW) begin
|
||||
reg [DATAW-1:0] value;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
value <= RESETW'(0);
|
||||
end else if (enable) begin
|
||||
value <= data_in;
|
||||
end
|
||||
end
|
||||
assign data_out = value;
|
||||
end else begin
|
||||
reg [DATAW-RESETW-1:0] value_d;
|
||||
reg [RESETW-1:0] value_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
value_r <= RESETW'(0);
|
||||
end else if (enable) begin
|
||||
value_r <= data_in[DATAW-1:DATAW-RESETW];
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
value_d <= data_in[DATAW-RESETW-1:0];
|
||||
end
|
||||
end
|
||||
assign data_out = {value_r, value_d};
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
value_d <= data_in[DATAW-RESETW-1:0];
|
||||
end
|
||||
end
|
||||
assign data_out = {value_r, value_d};
|
||||
end
|
||||
end else begin
|
||||
wire [DEPTH:0][DATAW-1:0] data_delayed;
|
||||
wire [DEPTH:0][DATAW-1:0] data_delayed;
|
||||
assign data_delayed[0] = data_in;
|
||||
for (genvar i = 1; i <= DEPTH; ++i) begin
|
||||
VX_pipe_register #(
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,8 +21,8 @@ module VX_reset_relay #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
output wire [N-1:0] reset_o
|
||||
);
|
||||
if (MAX_FANOUT >= 0 && N > MAX_FANOUT) begin
|
||||
);
|
||||
if (MAX_FANOUT >= 0 && N > (MAX_FANOUT + MAX_FANOUT/2)) begin
|
||||
localparam F = `UP(MAX_FANOUT);
|
||||
localparam R = N / F;
|
||||
`PRESERVE_NET reg [R-1:0] reset_r;
|
||||
|
@ -38,6 +38,6 @@ module VX_reset_relay #(
|
|||
`UNUSED_VAR (clk)
|
||||
assign reset_o = {N{reset}};
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -15,9 +15,10 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_rr_arbiter #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter MODEL = 1,
|
||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
parameter NUM_REQS = 1,
|
||||
parameter MODEL = 1,
|
||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS),
|
||||
parameter LUT_OPT = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -37,7 +38,7 @@ module VX_rr_arbiter #(
|
|||
assign grant_onehot = requests;
|
||||
assign grant_valid = requests[0];
|
||||
|
||||
end else if (NUM_REQS == 2) begin
|
||||
end else if (LUT_OPT && NUM_REQS == 2) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
|
@ -63,7 +64,7 @@ module VX_rr_arbiter #(
|
|||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end /*else if (NUM_REQS == 3) begin
|
||||
end else if (LUT_OPT && NUM_REQS == 3) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
|
@ -93,7 +94,7 @@ module VX_rr_arbiter #(
|
|||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end */else if (NUM_REQS == 4) begin
|
||||
end else if (LUT_OPT && NUM_REQS == 4) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
|
@ -129,7 +130,7 @@ module VX_rr_arbiter #(
|
|||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end /*else if (NUM_REQS == 5) begin
|
||||
end else if (LUT_OPT && NUM_REQS == 5) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
|
@ -173,7 +174,7 @@ module VX_rr_arbiter #(
|
|||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end else if (NUM_REQS == 6) begin
|
||||
end else if (LUT_OPT && NUM_REQS == 6) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
|
@ -227,7 +228,7 @@ module VX_rr_arbiter #(
|
|||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end else if (NUM_REQS == 7) begin
|
||||
end else if (LUT_OPT && NUM_REQS == 7) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
|
@ -293,7 +294,7 @@ module VX_rr_arbiter #(
|
|||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end */else if (NUM_REQS == 8) begin
|
||||
end else if (LUT_OPT && NUM_REQS == 8) begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
|
|
|
@ -21,13 +21,16 @@ module VX_sp_ram #(
|
|||
parameter WRENW = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter NO_RWCHECK = 0,
|
||||
parameter RW_ASSERT = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter RESET_RAM = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire read,
|
||||
input wire write,
|
||||
input wire [WRENW-1:0] wren,
|
||||
|
@ -42,13 +45,16 @@ module VX_sp_ram #(
|
|||
.WRENW (WRENW),
|
||||
.OUT_REG (OUT_REG),
|
||||
.NO_RWCHECK (NO_RWCHECK),
|
||||
.RW_ASSERT (RW_ASSERT),
|
||||
.LUTRAM (LUTRAM),
|
||||
.RESET_RAM (RESET_RAM),
|
||||
.INIT_ENABLE (INIT_ENABLE),
|
||||
.INIT_FILE (INIT_FILE),
|
||||
.INIT_VALUE (INIT_VALUE),
|
||||
.ADDRW (ADDRW)
|
||||
) dp_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (read),
|
||||
.write (write),
|
||||
.wren (wren),
|
||||
|
|
|
@ -18,7 +18,7 @@ module VX_stream_arb #(
|
|||
parameter NUM_INPUTS = 1,
|
||||
parameter NUM_OUTPUTS = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter `STRING ARBITER = "P",
|
||||
parameter `STRING ARBITER = "R",
|
||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||
parameter OUT_BUF = 0,
|
||||
parameter LUTRAM = 0,
|
||||
|
@ -46,14 +46,14 @@ module VX_stream_arb #(
|
|||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
|
||||
localparam BATCH_BEGIN = i * NUM_REQS;
|
||||
localparam BATCH_END = `MIN(BATCH_BEGIN + NUM_REQS, NUM_INPUTS);
|
||||
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
|
||||
localparam SLICE_BEGIN = i * NUM_REQS;
|
||||
localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_INPUTS);
|
||||
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (BATCH_SIZE),
|
||||
.NUM_INPUTS (SLICE_SIZE),
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
|
@ -63,9 +63,9 @@ module VX_stream_arb #(
|
|||
) arb_slice (
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
.valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.data_in (data_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]),
|
||||
.ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]),
|
||||
.data_in (data_in[SLICE_END-1: SLICE_BEGIN]),
|
||||
.data_out (data_out[i]),
|
||||
.sel_out (sel_out[i]),
|
||||
.valid_out (valid_out[i]),
|
||||
|
@ -73,32 +73,32 @@ module VX_stream_arb #(
|
|||
);
|
||||
end
|
||||
|
||||
end else if (MAX_FANOUT != 0 && (NUM_INPUTS > MAX_FANOUT)) begin
|
||||
end else if (MAX_FANOUT != 0 && (NUM_INPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin
|
||||
|
||||
// (#inputs > max_fanout) and (#outputs == 1)
|
||||
|
||||
localparam NUM_BATCHES = `CDIV(NUM_INPUTS, MAX_FANOUT);
|
||||
localparam NUM_SLICES = `CDIV(NUM_INPUTS, MAX_FANOUT);
|
||||
localparam LOG_NUM_REQS2 = `CLOG2(MAX_FANOUT);
|
||||
localparam LOG_NUM_REQS3 = `CLOG2(NUM_BATCHES);
|
||||
localparam LOG_NUM_REQS3 = `CLOG2(NUM_SLICES);
|
||||
|
||||
wire [NUM_BATCHES-1:0] valid_tmp;
|
||||
wire [NUM_BATCHES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp;
|
||||
wire [NUM_BATCHES-1:0] ready_tmp;
|
||||
wire [NUM_SLICES-1:0] valid_tmp;
|
||||
wire [NUM_SLICES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp;
|
||||
wire [NUM_SLICES-1:0] ready_tmp;
|
||||
|
||||
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_SLICES; ++i) begin
|
||||
|
||||
localparam BATCH_BEGIN = i * MAX_FANOUT;
|
||||
localparam BATCH_END = `MIN(BATCH_BEGIN + MAX_FANOUT, NUM_INPUTS);
|
||||
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
|
||||
localparam SLICE_BEGIN = i * MAX_FANOUT;
|
||||
localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_INPUTS);
|
||||
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
|
||||
|
||||
wire [DATAW-1:0] data_tmp_u;
|
||||
wire [`LOG2UP(BATCH_SIZE)-1:0] sel_tmp_u;
|
||||
wire [`LOG2UP(SLICE_SIZE)-1:0] sel_tmp_u;
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
if (MAX_FANOUT != 1) begin
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (BATCH_SIZE),
|
||||
.NUM_INPUTS (SLICE_SIZE),
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
|
@ -108,9 +108,9 @@ module VX_stream_arb #(
|
|||
) fanout_slice_arb (
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
.valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.data_in (data_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]),
|
||||
.data_in (data_in[SLICE_END-1: SLICE_BEGIN]),
|
||||
.ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]),
|
||||
.valid_out (valid_tmp[i]),
|
||||
.data_out (data_tmp_u),
|
||||
.sel_out (sel_tmp_u),
|
||||
|
@ -125,7 +125,7 @@ module VX_stream_arb #(
|
|||
wire [LOG_NUM_REQS3-1:0] sel_out_u;
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BATCHES),
|
||||
.NUM_INPUTS (NUM_SLICES),
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW + LOG_NUM_REQS2),
|
||||
.ARBITER (ARBITER),
|
||||
|
@ -174,17 +174,9 @@ module VX_stream_arb #(
|
|||
);
|
||||
|
||||
assign valid_in_r = arb_valid;
|
||||
assign data_in_r = data_in[arb_index];
|
||||
assign arb_ready = ready_in_r;
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (DATAW),
|
||||
.N (NUM_REQS)
|
||||
) onehot_mux (
|
||||
.data_in (data_in),
|
||||
.sel_in (arb_onehot),
|
||||
.data_out (data_in_r)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign ready_in[i] = ready_in_r && arb_onehot[i];
|
||||
end
|
||||
|
@ -214,15 +206,15 @@ module VX_stream_arb #(
|
|||
|
||||
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
|
||||
|
||||
localparam BATCH_BEGIN = i * NUM_REQS;
|
||||
localparam BATCH_END = `MIN(BATCH_BEGIN + NUM_REQS, NUM_OUTPUTS);
|
||||
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
|
||||
localparam SLICE_BEGIN = i * NUM_REQS;
|
||||
localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_OUTPUTS);
|
||||
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (1),
|
||||
.NUM_OUTPUTS (BATCH_SIZE),
|
||||
.NUM_OUTPUTS (SLICE_SIZE),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
|
@ -234,30 +226,30 @@ module VX_stream_arb #(
|
|||
.valid_in (valid_in[i]),
|
||||
.ready_in (ready_in[i]),
|
||||
.data_in (data_in[i]),
|
||||
.data_out (data_out[BATCH_END-1: BATCH_BEGIN]),
|
||||
.valid_out (valid_out[BATCH_END-1: BATCH_BEGIN]),
|
||||
.ready_out (ready_out[BATCH_END-1: BATCH_BEGIN]),
|
||||
.data_out (data_out[SLICE_END-1: SLICE_BEGIN]),
|
||||
.valid_out (valid_out[SLICE_END-1: SLICE_BEGIN]),
|
||||
.ready_out (ready_out[SLICE_END-1: SLICE_BEGIN]),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
for (genvar j = BATCH_BEGIN; j < BATCH_END; ++j) begin
|
||||
for (genvar j = SLICE_BEGIN; j < SLICE_END; ++j) begin
|
||||
assign sel_out[j] = i;
|
||||
end
|
||||
end
|
||||
|
||||
end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > MAX_FANOUT)) begin
|
||||
end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin
|
||||
|
||||
// (#inputs == 1) and (#outputs > max_fanout)
|
||||
|
||||
localparam NUM_BATCHES = `CDIV(NUM_OUTPUTS, MAX_FANOUT);
|
||||
localparam NUM_SLICES = `CDIV(NUM_OUTPUTS, MAX_FANOUT);
|
||||
|
||||
wire [NUM_BATCHES-1:0] valid_tmp;
|
||||
wire [NUM_BATCHES-1:0][DATAW-1:0] data_tmp;
|
||||
wire [NUM_BATCHES-1:0] ready_tmp;
|
||||
wire [NUM_SLICES-1:0] valid_tmp;
|
||||
wire [NUM_SLICES-1:0][DATAW-1:0] data_tmp;
|
||||
wire [NUM_SLICES-1:0] ready_tmp;
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (1),
|
||||
.NUM_OUTPUTS (NUM_BATCHES),
|
||||
.NUM_OUTPUTS (NUM_SLICES),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
|
@ -275,17 +267,17 @@ module VX_stream_arb #(
|
|||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_SLICES; ++i) begin
|
||||
|
||||
localparam BATCH_BEGIN = i * MAX_FANOUT;
|
||||
localparam BATCH_END = `MIN(BATCH_BEGIN + MAX_FANOUT, NUM_OUTPUTS);
|
||||
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
|
||||
localparam SLICE_BEGIN = i * MAX_FANOUT;
|
||||
localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_OUTPUTS);
|
||||
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (1),
|
||||
.NUM_OUTPUTS (BATCH_SIZE),
|
||||
.NUM_OUTPUTS (SLICE_SIZE),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
|
@ -297,9 +289,9 @@ module VX_stream_arb #(
|
|||
.valid_in (valid_tmp[i]),
|
||||
.ready_in (ready_tmp[i]),
|
||||
.data_in (data_tmp[i]),
|
||||
.data_out (data_out[BATCH_END-1: BATCH_BEGIN]),
|
||||
.valid_out (valid_out[BATCH_END-1: BATCH_BEGIN]),
|
||||
.ready_out (ready_out[BATCH_END-1: BATCH_BEGIN]),
|
||||
.data_out (data_out[SLICE_END-1: SLICE_BEGIN]),
|
||||
.valid_out (valid_out[SLICE_END-1: SLICE_BEGIN]),
|
||||
.ready_out (ready_out[SLICE_END-1: SLICE_BEGIN]),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
end
|
||||
|
@ -357,9 +349,9 @@ module VX_stream_arb #(
|
|||
|
||||
// #Inputs == #Outputs
|
||||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
|
||||
|
||||
`RESET_RELAY_EN (out_buf_reset, reset, (NUM_OUTPUTS > 1));
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
|
@ -368,7 +360,7 @@ module VX_stream_arb #(
|
|||
.LUTRAM (LUTRAM)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (out_buf_reset),
|
||||
.reset (out_buf_reset[i]),
|
||||
.valid_in (valid_in[i]),
|
||||
.ready_in (ready_in[i]),
|
||||
.data_in (data_in[i]),
|
||||
|
|
|
@ -39,8 +39,9 @@ module VX_stream_pack #(
|
|||
input wire ready_out
|
||||
);
|
||||
if (NUM_REQS > 1) begin
|
||||
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS);
|
||||
|
||||
wire [NUM_REQS-1:0] grant_onehot;
|
||||
wire [LOG_NUM_REQS-1:0] grant_index;
|
||||
wire grant_valid;
|
||||
wire grant_ready;
|
||||
|
||||
|
@ -52,21 +53,12 @@ module VX_stream_pack #(
|
|||
.reset (reset),
|
||||
.requests (valid_in),
|
||||
.grant_valid (grant_valid),
|
||||
`UNUSED_PIN (grant_index),
|
||||
.grant_onehot(grant_onehot),
|
||||
.grant_index (grant_index),
|
||||
`UNUSED_PIN (grant_onehot),
|
||||
.grant_ready (grant_ready)
|
||||
);
|
||||
|
||||
wire [TAG_WIDTH-1:0] tag_sel;
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (TAG_WIDTH),
|
||||
.N (NUM_REQS)
|
||||
) onehot_mux (
|
||||
.data_in (tag_in),
|
||||
.sel_in (grant_onehot),
|
||||
.data_out (tag_sel)
|
||||
);
|
||||
wire [TAG_WIDTH-1:0] tag_sel = tag_in[grant_index];
|
||||
|
||||
wire [NUM_REQS-1:0] tag_matches;
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -33,7 +33,7 @@ module VX_stream_switch #(
|
|||
output wire [NUM_INPUTS-1:0] ready_in,
|
||||
|
||||
output wire [NUM_OUTPUTS-1:0] valid_out,
|
||||
output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out,
|
||||
output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out,
|
||||
input wire [NUM_OUTPUTS-1:0] ready_out
|
||||
);
|
||||
if (NUM_INPUTS > NUM_OUTPUTS) begin
|
||||
|
@ -52,7 +52,7 @@ module VX_stream_switch #(
|
|||
assign data_in_r[i][j] = '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_OUTPUTS-1:0] valid_out_r;
|
||||
wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_r;
|
||||
|
@ -65,25 +65,24 @@ module VX_stream_switch #(
|
|||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
for (genvar j = 0; j < NUM_REQS; ++j) begin
|
||||
localparam ii = i * NUM_REQS + j;
|
||||
if (ii < NUM_INPUTS) begin
|
||||
localparam ii = i * NUM_REQS + j;
|
||||
if (ii < NUM_INPUTS) begin
|
||||
assign ready_in[ii] = ready_out_r[i] & (sel_in[i] == LOG_NUM_REQS'(j));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
|
||||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
|
||||
`RESET_RELAY_EN (out_buf_reset, reset, (NUM_OUTPUTS > 1));
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (out_buf_reset),
|
||||
.valid_in (valid_out_r[i]),
|
||||
.reset (out_buf_reset[i]),
|
||||
.valid_in (valid_out_r[i]),
|
||||
.ready_in (ready_out_r[i]),
|
||||
.data_in (data_out_r[i]),
|
||||
.data_out (data_out[i]),
|
||||
|
@ -93,7 +92,7 @@ module VX_stream_switch #(
|
|||
end
|
||||
|
||||
end else if (NUM_OUTPUTS > NUM_INPUTS) begin
|
||||
|
||||
|
||||
wire [NUM_INPUTS-1:0][NUM_REQS-1:0] valid_out_r;
|
||||
wire [NUM_INPUTS-1:0][NUM_REQS-1:0] ready_out_r;
|
||||
|
||||
|
@ -104,51 +103,50 @@ module VX_stream_switch #(
|
|||
assign ready_in[i] = ready_out_r[i][sel_in[i]];
|
||||
end
|
||||
|
||||
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
|
||||
|
||||
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
|
||||
for (genvar j = 0; j < NUM_REQS; ++j) begin
|
||||
localparam ii = i * NUM_REQS + j;
|
||||
if (ii < NUM_OUTPUTS) begin
|
||||
|
||||
`RESET_RELAY (out_buf_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (out_buf_reset),
|
||||
.reset (out_buf_reset[ii]),
|
||||
.valid_in (valid_out_r[i][j]),
|
||||
.ready_in (ready_out_r[i][j]),
|
||||
.data_in (data_in[i]),
|
||||
.data_in (data_in[i]),
|
||||
.data_out (data_out[ii]),
|
||||
.valid_out (valid_out[ii]),
|
||||
.ready_out (ready_out[ii])
|
||||
);
|
||||
end else begin
|
||||
`UNUSED_VAR (out_buf_reset[ii])
|
||||
`UNUSED_VAR (valid_out_r[i][j])
|
||||
assign ready_out_r[i][j] = '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end else begin
|
||||
|
||||
// #Inputs == #Outputs
|
||||
|
||||
|
||||
`UNUSED_VAR (sel_in)
|
||||
|
||||
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
|
||||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
|
||||
`RESET_RELAY_EN (out_buf_reset, reset, (NUM_OUTPUTS > 1));
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (out_buf_reset),
|
||||
.reset (out_buf_reset[i]),
|
||||
.valid_in (valid_in[i]),
|
||||
.ready_in (ready_in[i]),
|
||||
.data_in (data_in[i]),
|
||||
|
@ -159,6 +157,6 @@ module VX_stream_switch #(
|
|||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -20,7 +20,7 @@ module VX_stream_xbar #(
|
|||
parameter DATAW = 4,
|
||||
parameter IN_WIDTH = `LOG2UP(NUM_INPUTS),
|
||||
parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS),
|
||||
parameter ARBITER = "P",
|
||||
parameter ARBITER = "R",
|
||||
parameter OUT_BUF = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||
|
@ -126,10 +126,9 @@ module VX_stream_xbar #(
|
|||
assign data_out_r = {NUM_OUTPUTS{data_in}};
|
||||
assign ready_in = ready_out_r[sel_in];
|
||||
|
||||
`RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT);
|
||||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
|
||||
`RESET_RELAY (out_buf_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
|
@ -137,7 +136,7 @@ module VX_stream_xbar #(
|
|||
.LUTRAM (LUTRAM)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (out_buf_reset),
|
||||
.reset (out_buf_reset[i]),
|
||||
.valid_in (valid_out_r[i]),
|
||||
.ready_in (ready_out_r[i]),
|
||||
.data_in (data_out_r[i]),
|
||||
|
|
|
@ -94,7 +94,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_all;
|
||||
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_aos;
|
||||
|
||||
wire [NUM_REQS-1:0] req_valid_in;
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
|
||||
|
@ -111,7 +111,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
req_bank_addr[i],
|
||||
mem_bus_if[i].req_data.byteen,
|
||||
mem_bus_if[i].req_data.data,
|
||||
mem_bus_if[i].req_data.tag};
|
||||
mem_bus_if[i].req_data.tag
|
||||
};
|
||||
assign mem_bus_if[i].req_ready = req_ready_in[i];
|
||||
end
|
||||
|
||||
|
@ -120,6 +121,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (REQ_DATAW),
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (3) // output should be registered for the data_store addressing
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
|
@ -134,7 +136,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
.sel_in (req_bank_idx),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (per_bank_req_valid),
|
||||
.data_out (per_bank_req_data_all),
|
||||
.data_out (per_bank_req_data_aos),
|
||||
.sel_out (per_bank_req_idx),
|
||||
.ready_out (per_bank_req_ready)
|
||||
);
|
||||
|
@ -145,7 +147,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
per_bank_req_addr[i],
|
||||
per_bank_req_byteen[i],
|
||||
per_bank_req_data[i],
|
||||
per_bank_req_tag[i]} = per_bank_req_data_all[i];
|
||||
per_bank_req_tag[i]
|
||||
} = per_bank_req_data_aos[i];
|
||||
end
|
||||
|
||||
// banks access
|
||||
|
@ -156,38 +159,55 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_rsp_ready;
|
||||
|
||||
`RESET_RELAY (bank_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
wire bank_rsp_valid, bank_rsp_ready;
|
||||
wire [WORD_WIDTH-1:0] bank_rsp_data;
|
||||
|
||||
`RESET_RELAY_EN (bram_reset, reset, (NUM_BANKS > 1));
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (WORD_WIDTH),
|
||||
.SIZE (WORDS_PER_BANK),
|
||||
.WRENW (WORD_SIZE)
|
||||
.WRENW (WORD_SIZE),
|
||||
.NO_RWCHECK (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.reset (bram_reset),
|
||||
.read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]),
|
||||
.write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]),
|
||||
.wren (per_bank_req_byteen[i]),
|
||||
.addr (per_bank_req_addr[i]),
|
||||
.wdata (per_bank_req_data[i]),
|
||||
.rdata (per_bank_rsp_data[i])
|
||||
.rdata (bank_rsp_data)
|
||||
);
|
||||
|
||||
// drop write response
|
||||
wire per_bank_req_valid_w, per_bank_req_ready_w;
|
||||
assign per_bank_req_valid_w = per_bank_req_valid[i] && ~per_bank_req_rw[i];
|
||||
assign per_bank_req_ready[i] = per_bank_req_ready_w || per_bank_req_rw[i];
|
||||
// read-during-write hazard detection
|
||||
reg [BANK_ADDR_WIDTH-1:0] last_wr_addr;
|
||||
reg last_wr_valid;
|
||||
always @(posedge clk) begin
|
||||
if (bram_reset) begin
|
||||
last_wr_valid <= 0;
|
||||
end else begin
|
||||
last_wr_valid <= per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i];
|
||||
end
|
||||
last_wr_addr <= per_bank_req_addr[i];
|
||||
end
|
||||
wire is_rdw_hazard = last_wr_valid && ~per_bank_req_rw[i] && (per_bank_req_addr[i] == last_wr_addr);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (REQ_SEL_WIDTH + TAG_WIDTH),
|
||||
.SIZE (0)
|
||||
) bank_buf (
|
||||
// drop write response and stall on read-during-write hazard
|
||||
assign bank_rsp_valid = per_bank_req_valid[i] && ~per_bank_req_rw[i] && ~is_rdw_hazard;
|
||||
assign per_bank_req_ready[i] = (bank_rsp_ready || per_bank_req_rw[i]) && ~is_rdw_hazard;
|
||||
|
||||
// register BRAM output
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH)
|
||||
) bram_buf (
|
||||
.clk (clk),
|
||||
.reset (bank_reset),
|
||||
.valid_in (per_bank_req_valid_w),
|
||||
.ready_in (per_bank_req_ready_w),
|
||||
.data_in ({per_bank_req_idx[i], per_bank_req_tag[i]}),
|
||||
.data_out ({per_bank_rsp_idx[i], per_bank_rsp_tag[i]}),
|
||||
.reset (bram_reset),
|
||||
.valid_in (bank_rsp_valid),
|
||||
.ready_in (bank_rsp_ready),
|
||||
.data_in ({per_bank_req_idx[i], bank_rsp_data, per_bank_req_tag[i]}),
|
||||
.data_out ({per_bank_rsp_idx[i], per_bank_rsp_data[i], per_bank_rsp_tag[i]}),
|
||||
.valid_out (per_bank_rsp_valid[i]),
|
||||
.ready_out (per_bank_rsp_ready[i])
|
||||
);
|
||||
|
@ -195,10 +215,10 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
|
||||
// bank responses gather
|
||||
|
||||
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_all;
|
||||
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_aos;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign per_bank_rsp_data_all[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
|
||||
assign per_bank_rsp_data_aos[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] rsp_valid_out;
|
||||
|
@ -209,6 +229,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
.NUM_INPUTS (NUM_BANKS),
|
||||
.NUM_OUTPUTS (NUM_REQS),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("P"), // this priority arbiter has negligeable impact om performance
|
||||
.OUT_BUF (OUT_BUF)
|
||||
) rsp_xbar (
|
||||
.clk (clk),
|
||||
|
@ -216,7 +237,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
`UNUSED_PIN (collisions),
|
||||
.sel_in (per_bank_rsp_idx),
|
||||
.valid_in (per_bank_rsp_valid),
|
||||
.data_in (per_bank_rsp_data_all),
|
||||
.data_in (per_bank_rsp_data_aos),
|
||||
.ready_in (per_bank_rsp_ready),
|
||||
.valid_out (rsp_valid_out),
|
||||
.data_out (rsp_data_out),
|
||||
|
@ -310,7 +331,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin
|
||||
if (mem_bus_if[i].req_data.rw) begin
|
||||
`TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i]));
|
||||
end else begin
|
||||
`TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
|
@ -318,7 +339,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin
|
||||
`TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i]));
|
||||
end
|
||||
end
|
||||
|
@ -328,7 +349,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin
|
||||
if (per_bank_req_rw[i]) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
`TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i]));
|
||||
end else begin
|
||||
`TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
|
@ -336,7 +357,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
`TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i]));
|
||||
end
|
||||
end
|
||||
|
|
|
@ -73,12 +73,12 @@ ifneq ($(TARGET), fpga)
|
|||
CFLAGS += -DSIMULATION
|
||||
endif
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
ifneq ($(TARGET), fpga)
|
||||
CFLAGS += -DNDEBUG
|
||||
CFLAGS += -DDEBUG_LEVEL=$(DEBUG) $(DBG_TRACE_FLAGS)
|
||||
else
|
||||
CFLAGS += $(DBG_TRACE_FLAGS)
|
||||
CFLAGS += -DNDEBUG
|
||||
endif
|
||||
else
|
||||
CFLAGS += -DNDEBUG
|
||||
|
|
|
@ -1 +1 @@
|
|||
create_clock -name {clk} -period "220 MHz" -waveform { 0.000 1.0 } [get_ports {clk}]
|
||||
create_clock -name {clk} -period "200 MHz" -waveform { 0.000 1.0 } [get_ports {clk}]
|
|
@ -45,6 +45,7 @@ FPGA_BIN_DIR=<bin_dir> XRT_DEVICE_INDEX=1 TARGET=hw ./ci/blackbox.sh --driver=xr
|
|||
|
||||
# build report logs
|
||||
<build_dir>/bin/vortex_afu.xclbin.info
|
||||
<build_dir>/_x/logs/link/vivado.log # search for keyword "Very high fanout"
|
||||
<build_dir>/_x/reports/link/link/imp/impl_1_full_util_routed.rpt
|
||||
<build_dir>/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt # search for keyword "VIOLATED"
|
||||
<build_dir>/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log
|
||||
|
|
|
@ -111,14 +111,14 @@ ifeq ($(TARGET), hw_emu)
|
|||
CFLAGS += -DSIMULATION
|
||||
endif
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
VPP_FLAGS += -g --debug.protocol all
|
||||
ifneq ($(TARGET), hw)
|
||||
CFLAGS += -DNDEBUG
|
||||
else
|
||||
VPP_FLAGS += --vivado.prop fileset.sim_1.xsim.elaborate.debug_level=all
|
||||
CFLAGS += $(DBG_TRACE_FLAGS)
|
||||
CFLAGS += -DDEBUG_LEVEL=$(DEBUG) $(DBG_TRACE_FLAGS)
|
||||
else
|
||||
CFLAGS += -DNDEBUG
|
||||
endif
|
||||
else
|
||||
VPP_FLAGS += --optimize 3
|
||||
|
|
|
@ -49,7 +49,7 @@ endif
|
|||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
|
||||
RTL_INCLUDE += $(FPU_INCLUDE)
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CFLAGS += $(DBG_TRACE_FLAGS)
|
||||
else
|
||||
|
|
|
@ -29,7 +29,7 @@ THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count()
|
|||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include <VX_config.h>
|
||||
#include <VX_types.h>
|
||||
#include <newlib.h>
|
||||
#include "common.h"
|
||||
|
||||
.section .init, "ax"
|
||||
|
@ -51,12 +52,10 @@ _start:
|
|||
# la t0, trap_entry
|
||||
# csrw mtvec, t0
|
||||
|
||||
# register global termination functions
|
||||
la a0, __libc_fini_array
|
||||
call atexit
|
||||
|
||||
#ifdef HAVE_INITFINI_ARRAY
|
||||
# run global initialization functions
|
||||
call __libc_init_array
|
||||
#endif
|
||||
|
||||
# call main program routine
|
||||
call main
|
||||
|
|
|
@ -119,70 +119,13 @@ void __libc_fini_array (void) {
|
|||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
#define MAX_CORES 64
|
||||
volatile int g_cxa_locks[MAX_CORES] = {0};
|
||||
*/
|
||||
|
||||
void __cxa_lock() {
|
||||
/*int core_id = vx_core_id();
|
||||
g_cxa_locks[core_id] = 1;
|
||||
vx_fence();
|
||||
for (int i = 1; i < MAX_CORES; ++i) {
|
||||
int other = (core_id + i) % MAX_CORES;
|
||||
while (g_cxa_locks[other]) {
|
||||
vx_fence(); // cache coherence not supported, so we need to flush the caches
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
void __cxa_unlock() {
|
||||
/*vx_fence();
|
||||
int core_id = vx_core_id();
|
||||
g_cxa_locks[core_id] = 0;*/
|
||||
}
|
||||
|
||||
#define MAX_FEXITS 64
|
||||
|
||||
typedef struct {
|
||||
void (*f[MAX_FEXITS])(void*);
|
||||
void *a[MAX_FEXITS];
|
||||
} fexit_list_t;
|
||||
|
||||
static fexit_list_t g_fexit_list;
|
||||
static int g_num_fexits = 0;
|
||||
|
||||
void __funcs_on_exit() {
|
||||
void (*func)(void *), *arg;
|
||||
fexit_list_t* fexit_list = &g_fexit_list;
|
||||
for (int i = 0; i < g_num_fexits; ++i) {
|
||||
func = fexit_list->f[i];
|
||||
arg = fexit_list->a[i];
|
||||
func(arg);
|
||||
}
|
||||
}
|
||||
|
||||
void __cxa_finalize(void *dso) {}
|
||||
|
||||
int __cxa_atexit(void (*func)(void *), void *arg, void *dso) {
|
||||
__cxa_lock();
|
||||
int num_fexits = g_num_fexits;
|
||||
if (num_fexits >= MAX_FEXITS)
|
||||
return -1;
|
||||
fexit_list_t* fexit_list = &g_fexit_list;
|
||||
fexit_list->f[num_fexits] = func;
|
||||
fexit_list->a[num_fexits] = arg;
|
||||
g_num_fexits = num_fexits + 1;
|
||||
__cxa_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void call(void *p) {
|
||||
((void (*)(void))(uintptr_t)p)();
|
||||
}
|
||||
|
||||
int atexit(void (*func)(void)) {
|
||||
return __cxa_atexit(call, (void*)(uintptr_t)func, 0);
|
||||
// This function will be called by LIBC at program exit.
|
||||
// Since this platform only support statically linked programs,
|
||||
// it is not required to support LIBC's exit functions registration via atexit().
|
||||
void __funcs_on_exit (void) {
|
||||
#ifdef HAVE_INITFINI_ARRAY
|
||||
__libc_fini_array();
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
#include <array>
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ typedef void* vx_buffer_h;
|
|||
#define VX_CAPS_GLOBAL_MEM_SIZE 0x5
|
||||
#define VX_CAPS_LOCAL_MEM_SIZE 0x6
|
||||
#define VX_CAPS_ISA_FLAGS 0x7
|
||||
#define VX_CAPS_NUM_MEM_BANKS 0x8
|
||||
|
||||
// device isa flags
|
||||
#define VX_ISA_STD_A (1ull << ISA_STD_A)
|
||||
|
|
|
@ -30,7 +30,7 @@ else
|
|||
CXXFLAGS += -I$(SYN_DIR)
|
||||
endif
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
|
|
|
@ -232,6 +232,9 @@ public:
|
|||
case VX_CAPS_ISA_FLAGS:
|
||||
_value = isa_caps_;
|
||||
break;
|
||||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = MEMORY_BANKS;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
|
||||
std::abort();
|
||||
|
|
|
@ -19,7 +19,7 @@ LDFLAGS += -L$(DESTDIR) -lrtlsim
|
|||
|
||||
SRCS := $(SRC_DIR)/vortex.cpp
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
|
|
|
@ -77,6 +77,9 @@ public:
|
|||
case VX_CAPS_ISA_FLAGS:
|
||||
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
||||
break;
|
||||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = MEMORY_BANKS;
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
std::abort();
|
||||
|
|
|
@ -19,7 +19,7 @@ LDFLAGS += -L$(DESTDIR) -lsimx
|
|||
|
||||
SRCS := $(SRC_DIR)/vortex.cpp
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
|
|
|
@ -105,6 +105,9 @@ public:
|
|||
case VX_CAPS_ISA_FLAGS:
|
||||
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
||||
break;
|
||||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = MEMORY_BANKS;
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
std::abort();
|
||||
|
|
|
@ -12,7 +12,7 @@ LDFLAGS += -shared -pthread -ldl
|
|||
|
||||
SRCS := $(SRC_DIR)/vortex.cpp $(SRC_DIR)/utils.cpp
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
|
|
|
@ -211,6 +211,8 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
uint64_t mem_reads = 0;
|
||||
uint64_t mem_writes = 0;
|
||||
uint64_t mem_lat = 0;
|
||||
uint64_t mem_req_counter = 0;
|
||||
uint64_t mem_ticks = 0;
|
||||
|
||||
uint64_t num_cores;
|
||||
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), {
|
||||
|
@ -221,6 +223,11 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_ISA_FLAGS, &isa_flags), {
|
||||
return err;
|
||||
});
|
||||
|
||||
uint64_t num_mem_bank_ports;
|
||||
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_MEM_BANKS, &num_mem_bank_ports), {
|
||||
return err;
|
||||
});
|
||||
|
||||
bool icache_enable = isa_flags & VX_ISA_EXT_ICACHE;
|
||||
bool dcache_enable = isa_flags & VX_ISA_EXT_DCACHE;
|
||||
|
@ -314,7 +321,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
if (num_cores > 1) {
|
||||
uint64_t scrb_total = scrb_alu_per_core + scrb_fpu_per_core + scrb_lsu_per_core + scrb_csrs_per_core + scrb_wctl_per_core;
|
||||
int scrb_percent_per_core = calcAvgPercent(scrb_stalls_per_core, cycles_per_core);
|
||||
fprintf(stream, "PERF: core%d: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, scrs=%d%%, wctl=%d%%)\n"
|
||||
fprintf(stream, "PERF: core%d: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, csrs=%d%%, wctl=%d%%)\n"
|
||||
, core_id
|
||||
, scrb_stalls_per_core
|
||||
, scrb_percent_per_core
|
||||
|
@ -533,6 +540,12 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), {
|
||||
return err;
|
||||
});
|
||||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_CNTR, core_id, &mem_req_counter), {
|
||||
return err;
|
||||
});
|
||||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_TICK, core_id, &mem_ticks), {
|
||||
return err;
|
||||
});
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
|
@ -559,7 +572,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
fprintf(stream, "PERF: scheduler idle=%ld (%d%%)\n", sched_idles, sched_idles_percent);
|
||||
fprintf(stream, "PERF: scheduler stalls=%ld (%d%%)\n", sched_stalls, sched_stalls_percent);
|
||||
fprintf(stream, "PERF: ibuffer stalls=%ld (%d%%)\n", ibuffer_stalls, ibuffer_percent);
|
||||
fprintf(stream, "PERF: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, scrs=%d%%, wctl=%d%%)\n"
|
||||
fprintf(stream, "PERF: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, csrs=%d%%, wctl=%d%%)\n"
|
||||
, scrb_stalls
|
||||
, scrb_percent
|
||||
, calcAvgPercent(scrb_alu, scrb_total)
|
||||
|
@ -599,7 +612,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
int read_hit_ratio = calcRatio(l3cache_read_misses, l3cache_reads);
|
||||
int write_hit_ratio = calcRatio(l3cache_write_misses, l3cache_writes);
|
||||
int bank_utilization = calcAvgPercent(l3cache_reads + l3cache_writes, l3cache_reads + l3cache_writes + l3cache_bank_stalls);
|
||||
int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls);
|
||||
int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls);
|
||||
fprintf(stream, "PERF: l3cache reads=%ld\n", l3cache_reads);
|
||||
fprintf(stream, "PERF: l3cache writes=%ld\n", l3cache_writes);
|
||||
fprintf(stream, "PERF: l3cache read misses=%ld (hit ratio=%d%%)\n", l3cache_read_misses, read_hit_ratio);
|
||||
|
@ -609,8 +622,10 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
}
|
||||
|
||||
int mem_avg_lat = caclAverage(mem_lat, mem_reads);
|
||||
int memory_bank_port_utilization = calcAvgPercent(mem_req_counter, (mem_ticks * num_mem_bank_ports));
|
||||
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
|
||||
fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat);
|
||||
fprintf(stream, "PERF: memory bank port utilization=%d%%\n", memory_bank_port_utilization);
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
|
|
|
@ -26,7 +26,7 @@ endif
|
|||
|
||||
PROJECT := libvortex-xrt.so
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
|
|
|
@ -404,6 +404,9 @@ public:
|
|||
case VX_CAPS_ISA_FLAGS:
|
||||
_value = isa_caps_;
|
||||
break;
|
||||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = MEMORY_BANKS;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
|
||||
std::abort();
|
||||
|
|
|
@ -41,11 +41,11 @@ public:
|
|||
dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2";
|
||||
dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb";
|
||||
dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192;
|
||||
dram_config["MemorySystem"]["DRAM"]["org"]["channel"] = 8;
|
||||
dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps";
|
||||
dram_config["MemorySystem"]["Controller"]["impl"] = "Generic";
|
||||
dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS";
|
||||
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
|
||||
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
|
||||
dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy";
|
||||
{
|
||||
YAML::Node draw_plugin;
|
||||
|
@ -66,7 +66,7 @@ public:
|
|||
auto original_buf = std::cout.rdbuf();
|
||||
std::cout.rdbuf(nullstream.rdbuf());
|
||||
ramulator_frontend_->finalize();
|
||||
ramulator_memorysystem_->finalize();
|
||||
ramulator_memorysystem_->finalize();
|
||||
std::cout.rdbuf(original_buf);
|
||||
}
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ void RamMemDevice::read(void* data, uint64_t addr, uint64_t size) {
|
|||
if ((addr & (wordSize_-1))
|
||||
|| (addr_end & (wordSize_-1))
|
||||
|| (addr_end <= contents_.size())) {
|
||||
std::cout << "lookup of 0x" << std::hex << (addr_end-1) << " failed.\n";
|
||||
std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
|
||||
throw BadAddress();
|
||||
}
|
||||
|
||||
|
@ -74,7 +74,7 @@ void RamMemDevice::write(const void* data, uint64_t addr, uint64_t size) {
|
|||
if ((addr & (wordSize_-1))
|
||||
|| (addr_end & (wordSize_-1))
|
||||
|| (addr_end <= contents_.size())) {
|
||||
std::cout << "lookup of 0x" << std::hex << (addr_end-1) << " failed.\n";
|
||||
std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
|
||||
throw BadAddress();
|
||||
}
|
||||
|
||||
|
@ -115,8 +115,7 @@ void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) {
|
|||
void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) {
|
||||
mem_accessor_t ma;
|
||||
if (!this->lookup(addr, size, &ma)) {
|
||||
assert(0);
|
||||
std::cout << "lookup of 0x" << std::hex << addr << " failed.\n";
|
||||
std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
|
||||
throw BadAddress();
|
||||
}
|
||||
ma.md->read(data, ma.addr, size);
|
||||
|
@ -125,8 +124,7 @@ void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) {
|
|||
void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) {
|
||||
mem_accessor_t ma;
|
||||
if (!this->lookup(addr, size, &ma)) {
|
||||
assert(0);
|
||||
std::cout << "lookup of 0x" << std::hex << addr << " failed.\n";
|
||||
std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
|
||||
throw BadAddress();
|
||||
}
|
||||
ma.md->write(data, ma.addr, size);
|
||||
|
@ -408,7 +406,7 @@ bool ACLManager::check(uint64_t addr, uint64_t size, int flags) const {
|
|||
while (it != acl_map_.end() && it->first < end) {
|
||||
if (it->second.end > addr) {
|
||||
if ((it->second.flags & flags) != flags) {
|
||||
std::cout << "Memory access violation from 0x" << std::hex << addr << " to 0x" << end << ", curent flags=" << it->second.flags << ", access flags=" << flags << std::endl;
|
||||
std::cout << "Memory access violation from 0x" << std::hex << addr << " to 0x" << end << ", curent flags=" << it->second.flags << ", access flags=" << flags << std::dec << std::endl;
|
||||
return false; // Overlapping entry is missing at least one required flag bit
|
||||
}
|
||||
addr = it->second.end; // Move to the end of the current matching range
|
||||
|
@ -759,4 +757,4 @@ std::pair<uint64_t, uint8_t> MemoryUnit::page_table_walk(uint64_t vAddr_bits, AC
|
|||
return std::make_pair(cur_base_ppn, flags);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -168,23 +168,23 @@ public:
|
|||
{}
|
||||
|
||||
void* operator new(size_t /*size*/) {
|
||||
return allocator().allocate();
|
||||
return allocator_.allocate();
|
||||
}
|
||||
|
||||
void operator delete(void* ptr) {
|
||||
allocator().deallocate(ptr);
|
||||
allocator_.deallocate(ptr);
|
||||
}
|
||||
|
||||
protected:
|
||||
Func func_;
|
||||
Pkt pkt_;
|
||||
|
||||
static MemoryPool<SimCallEvent<Pkt>>& allocator() {
|
||||
static MemoryPool<SimCallEvent<Pkt>> instance(64);
|
||||
return instance;
|
||||
}
|
||||
static MemoryPool<SimCallEvent<Pkt>> allocator_;
|
||||
};
|
||||
|
||||
template <typename Pkt>
|
||||
MemoryPool<SimCallEvent<Pkt>> SimCallEvent<Pkt>::allocator_(64);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Pkt>
|
||||
|
@ -201,23 +201,23 @@ public:
|
|||
{}
|
||||
|
||||
void* operator new(size_t /*size*/) {
|
||||
return allocator().allocate();
|
||||
return allocator_.allocate();
|
||||
}
|
||||
|
||||
void operator delete(void* ptr) {
|
||||
allocator().deallocate(ptr);
|
||||
allocator_.deallocate(ptr);
|
||||
}
|
||||
|
||||
protected:
|
||||
const SimPort<Pkt>* port_;
|
||||
Pkt pkt_;
|
||||
|
||||
static MemoryPool<SimPortEvent<Pkt>>& allocator() {
|
||||
static MemoryPool<SimPortEvent<Pkt>> instance(64);
|
||||
return instance;
|
||||
}
|
||||
static MemoryPool<SimPortEvent<Pkt>> allocator_;
|
||||
};
|
||||
|
||||
template <typename Pkt>
|
||||
MemoryPool<SimPortEvent<Pkt>> SimPortEvent<Pkt>::allocator_(64);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class SimContext;
|
||||
|
|
|
@ -70,4 +70,28 @@ const char* fileExtension(const char* filepath);
|
|||
#endif
|
||||
|
||||
void *aligned_malloc(size_t size, size_t alignment);
|
||||
void aligned_free(void *ptr);
|
||||
void aligned_free(void *ptr);
|
||||
|
||||
namespace vortex {
|
||||
|
||||
// Verilator data type casting
|
||||
template <typename R, size_t W, typename Enable = void>
|
||||
class VDataCast;
|
||||
template <typename R, size_t W>
|
||||
class VDataCast<R, W, typename std::enable_if<(W > 8)>::type> {
|
||||
public:
|
||||
template <typename T>
|
||||
static R get(T& obj) {
|
||||
return reinterpret_cast<R>(obj.data());
|
||||
}
|
||||
};
|
||||
template <typename R, size_t W>
|
||||
class VDataCast<R, W, typename std::enable_if<(W <= 8)>::type> {
|
||||
public:
|
||||
template <typename T>
|
||||
static R get(T& obj) {
|
||||
return reinterpret_cast<R>(&obj);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
|
@ -83,13 +83,13 @@ THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count()
|
|||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CXXFLAGS += -O3 -DNDEBUG
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable scope analyzer
|
||||
|
@ -123,7 +123,7 @@ $(DESTDIR)/vortex_afu.h : $(AFU_DIR)/vortex_afu.vh
|
|||
$(SCRIPT_DIR)/gen_config.py -i $^ -o $@
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) $(DESTDIR)/vortex_afu.h $(SCOPE_JSON)
|
||||
verilator --build --exe -O3 $(VL_FLAGS) --cc $(TOP) --top-module $(TOP) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' --Mdir $@.obj_dir -o $@
|
||||
verilator --build --exe $(VL_FLAGS) --cc $(TOP) --top-module $(TOP) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' --Mdir $@.obj_dir -o $@
|
||||
|
||||
clean:
|
||||
rm -rf $(DESTDIR)/$(PROJECT).obj_dir
|
||||
|
|
|
@ -35,13 +35,13 @@
|
|||
#include <unordered_map>
|
||||
#include <util.h>
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
//#ifndef MEMORY_BANKS
|
||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#else
|
||||
#define MEMORY_BANKS 2
|
||||
#endif
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
#ifndef MEM_CLOCK_RATIO
|
||||
#define MEM_CLOCK_RATIO 1
|
||||
|
@ -380,7 +380,7 @@ private:
|
|||
device_->vcp2af_sRxPort_c0_hdr_resp_type = 0;
|
||||
memcpy(device_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE);
|
||||
device_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
|
||||
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
|
||||
/*printf("%0ld: [sim] CCI Rd Rsp: addr=0x%lx, mdata=0x%x, data=0x", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
|
||||
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
|
||||
printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]);
|
||||
printf("\n");*/
|
||||
|
@ -398,7 +398,7 @@ private:
|
|||
cci_req.mdata = device_->af2cp_sTxPort_c0_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(device_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE);
|
||||
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, device_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
|
||||
//printf("%0ld: [sim] CCI Rd Req: addr=0x%lx, mdata=0x%x\n", timestamp, device_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
|
||||
cci_reads_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
|
@ -453,7 +453,7 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, byte_addr);
|
||||
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, 0x%x, data=0x", timestamp, b, byte_addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
|
|
|
@ -65,7 +65,7 @@ THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count()
|
|||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
|
|
|
@ -39,6 +39,7 @@ typedef VVortex Device;
|
|||
#include <unordered_map>
|
||||
|
||||
#include <dram_sim.h>
|
||||
#include <util.h>
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
|
@ -316,11 +317,11 @@ private:
|
|||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd Rsp: addr=%0lx, data=", timestamp, mem_rsp->addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
printf("%0ld: [sim] MEM Rd Rsp: addr=0x%0lx, data=0x", timestamp, mem_rsp->addr);
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", mem_rsp->block[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
device_->m_axi_rvalid[0] = 1;
|
||||
device_->m_axi_rid[0] = mem_rsp->tag;
|
||||
|
@ -347,7 +348,7 @@ private:
|
|||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr Rsp: addr=%0lx\n", timestamp, mem_rsp->addr);
|
||||
printf("%0ld: [sim] MEM Wr Rsp: addr=0x%0lx\n", timestamp, mem_rsp->addr);
|
||||
*/
|
||||
device_->m_axi_bvalid[0] = 1;
|
||||
device_->m_axi_bid[0] = mem_rsp->tag;
|
||||
|
@ -387,11 +388,15 @@ private:
|
|||
} else {
|
||||
// process writes
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
printf("%0ld: [sim] MEM Wr: addr=0x%0lx, byteen=0x", timestamp, base_addr);
|
||||
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
|
||||
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
|
||||
}
|
||||
printf(", data=0x");
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", data[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
|
@ -459,13 +464,13 @@ private:
|
|||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd: tag=%0lx, addr=%0lx, data=", timestamp, mem_rsp->tag, mem_rsp->addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", mem_rsp->block[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
memcpy(device_->mem_rsp_data.data(), mem_rsp->block.data(), MEM_BLOCK_SIZE);
|
||||
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data), mem_rsp->block.data(), MEM_BLOCK_SIZE);
|
||||
device_->mem_rsp_tag = mem_rsp->tag;
|
||||
pending_mem_reqs_.erase(mem_rsp_it);
|
||||
mem_rd_rsp_active_ = true;
|
||||
|
@ -480,7 +485,7 @@ private:
|
|||
uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
|
||||
if (device_->mem_req_rw) {
|
||||
auto byteen = device_->mem_req_byteen;
|
||||
auto data = (uint8_t*)(device_->mem_req_data.data());
|
||||
auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data);
|
||||
|
||||
if (byte_addr >= uint64_t(IO_COUT_ADDR)
|
||||
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||
|
@ -499,11 +504,15 @@ private:
|
|||
} else {
|
||||
// process writes
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr: tag=%0lx, addr=%0x, byteen=%0lx, data=", timestamp, device_->mem_req_tag, byte_addr, byteen);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr);
|
||||
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
|
||||
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
|
||||
}
|
||||
printf(", data=0x");
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%d=%02x,", i, data[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
|
@ -530,7 +539,7 @@ private:
|
|||
ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE);
|
||||
pending_mem_reqs_.emplace_back(mem_req);
|
||||
|
||||
//printf("%0ld: [sim] MEM Rd Req: addr=%0x, tag=%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
|
||||
//printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
|
||||
|
||||
// send dram request
|
||||
dram_queue_.push(mem_req);
|
||||
|
|
|
@ -24,7 +24,7 @@ LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulato
|
|||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG)
|
||||
#CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG) -fsanitize=address -fno-omit-frame-pointer
|
||||
|
|
|
@ -77,8 +77,8 @@ public:
|
|||
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
|
||||
}
|
||||
|
||||
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
|
||||
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
|
||||
caches_.at(i)->MemReqPorts.at(0).bind(&cache_arb->ReqIn.at(i));
|
||||
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(0));
|
||||
}
|
||||
|
||||
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue