mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
enabling 64-bit ISA with graphics extension
This commit is contained in:
parent
c0378b1fcc
commit
c7d0f1ee34
32 changed files with 235 additions and 213 deletions
|
@ -58,6 +58,9 @@ jobs:
|
|||
- stage: test
|
||||
name: graphics
|
||||
script: cp -r ../build32 ../build32_graphics && cd ../build32_graphics && ./ci/travis_run.py ./ci/regression.sh -graphics
|
||||
- stage: test
|
||||
name: graphics64
|
||||
script: cp -r ../build64 ../build64_graphics && cd ../build64_graphics && XLEN=64 RISCV_TOOLCHAIN_PATH=$TOOLDIR/riscv64-gnu-toolchain ./ci/travis_run.py ./ci/regression.sh -graphics
|
||||
- stage: test
|
||||
name: tex
|
||||
script: cp -r ../build32 ../build32_tex && cd ../build32_tex && ./ci/travis_run.py ./ci/regression.sh -tex
|
||||
|
|
|
@ -94,18 +94,18 @@ CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=t
|
|||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-itoad.png -rtoad_ref_f6.png -f6 -g0"
|
||||
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g0.png -g0"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g0.png -g0"
|
||||
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g0.png -g0"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g0.png -g0"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g2.png -g2"
|
||||
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
|
||||
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g2.png -g2"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g2.png -g2"
|
||||
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
|
||||
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1 -z"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE -DNUM_TEX_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png" --cores=4 --warps=1 --threads=2
|
||||
CONFIGS="-DEXT_TEX_ENABLE -DNUM_TEX_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png" --cores=4 --warps=1 --threads=2
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE -DNUM_TEX_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png" --cores=1 --warps=1 --threads=2
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE -DNUM_TEX_UNITS=1 -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png" --cores=2 --warps=1 --threads=2
|
||||
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE -DNUM_TEX_UNITS=4 -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png" --cores=2 --warps=1 --threads=2
|
||||
|
@ -119,11 +119,11 @@ rop()
|
|||
{
|
||||
echo "begin render output tests..."
|
||||
|
||||
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE" ./ci/blackbox.sh --driver=simx --app=rop --args="-rwhitebox_128.png" --perf=5
|
||||
CONFIGS="-DEXT_ROP_ENABLE" ./ci/blackbox.sh --driver=simx --app=rop --args="-rwhitebox_128.png" --perf=5
|
||||
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --perf=5
|
||||
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE -DOCACHE_NUM_BANKS=8" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --perf=5
|
||||
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE -DNUM_ROP_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=rop --args="-rwhitebox_128.png" --cores=4 --warps=1 --threads=2
|
||||
CONFIGS="-DEXT_ROP_ENABLE -DNUM_ROP_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=rop --args="-rwhitebox_128.png" --cores=4 --warps=1 --threads=2
|
||||
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE -DNUM_ROP_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --cores=1 --warps=1 --threads=2
|
||||
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE -DNUM_ROP_UNITS=1 -DL1_DISABLE -DSM_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --cores=2 --warps=1 --threads=2
|
||||
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE -DNUM_ROP_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --cores=4 --warps=1 --threads=2
|
||||
|
@ -137,15 +137,15 @@ raster()
|
|||
{
|
||||
echo "begin rasterizer tests..."
|
||||
|
||||
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE" ./ci/blackbox.sh --driver=simx --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=4
|
||||
CONFIGS="-DEXT_RASTER_ENABLE" ./ci/blackbox.sh --driver=simx --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=4
|
||||
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=4
|
||||
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE -DRCACHE_NUM_BANKS=4" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=4
|
||||
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=4" ./ci/blackbox.sh --driver=simx --app=raster --args="-k4 -ttriangle.cgltrace -rtriangle_ref_128.png"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=6" ./ci/blackbox.sh --driver=simx --app=raster --args="-k6 -ttriangle.cgltrace -rtriangle_ref_128.png"
|
||||
CONFIGS="-DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=4" ./ci/blackbox.sh --driver=simx --app=raster --args="-k4 -ttriangle.cgltrace -rtriangle_ref_128.png"
|
||||
CONFIGS="-DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=6" ./ci/blackbox.sh --driver=simx --app=raster --args="-k6 -ttriangle.cgltrace -rtriangle_ref_128.png"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=4" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-k4 -ttriangle.cgltrace -rtriangle_ref_128.png"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=6" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-k6 -ttriangle.cgltrace -rtriangle_ref_128.png"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --cores=4 --warps=1 --threads=2
|
||||
CONFIGS="-DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --cores=4 --warps=1 --threads=2
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --cores=1 --warps=1 --threads=2
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=1 -DL1_DISABLE -DSM_DISABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --cores=2 --warps=1 --threads=2
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --cores=4 --warps=1 --threads=2
|
||||
|
@ -161,18 +161,18 @@ graphics()
|
|||
{
|
||||
echo "begin graphics tests..."
|
||||
|
||||
CONFIGS="-DENABLE_DPI -DEXT_IMADD_ENABLE" ./ci/blackbox.sh --driver=simx --app=imadd
|
||||
CONFIGS="-DEXT_IMADD_ENABLE" ./ci/blackbox.sh --driver=simx --app=imadd
|
||||
CONFIGS="-DENABLE_DPI -DEXT_IMADD_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=imadd --args="-n32 -z"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png"
|
||||
CONFIGS="-DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --clusters=2 --cores=2 --warps=1 --threads=2
|
||||
CONFIGS="-DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --clusters=2 --cores=2 --warps=1 --threads=2
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --clusters=2 --cores=2 --warps=1 --threads=2
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-ttriangle.cgltrace -rtriangle_ref_8.png -w8 -h8" --warps=1 --threads=2 --debug=3
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-ttriangle.cgltrace -rtriangle_ref_8.png -w8 -h8" --warps=1 --threads=2 --debug=3
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-ttriangle.cgltrace -rtriangle_ref_8.png -w8 -h8" --warps=1 --threads=2 --debug=3
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tvase.cgltrace -rvase_ref_32.png -w32 -h32" --threads=1
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DIPDOM_STACK_SIZE=128" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-x -ttriangle.cgltrace -rtriangle_ref_128.png"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-y -ttriangle.cgltrace -rtriangle_ref_128.png"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-z -ttriangle.cgltrace -rtriangle_ref_128.png"
|
||||
CONFIGS="-DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tvase.cgltrace -rvase_ref_32.png -w32 -h32" --threads=1
|
||||
CONFIGS="-DEXT_GFX_ENABLE -DIPDOM_STACK_SIZE=128" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-x -ttriangle.cgltrace -rtriangle_ref_128.png"
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-y -ttriangle.cgltrace -rtriangle_ref_128.png"
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-z -ttriangle.cgltrace -rtriangle_ref_128.png"
|
||||
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tvase.cgltrace -rvase_ref_32.png -w32 -h32" --threads=2 || true
|
||||
|
||||
echo "graphics tests done!"
|
||||
|
@ -185,21 +185,21 @@ echo "begin clustering tests..."
|
|||
# warp/threads configurations
|
||||
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=demo
|
||||
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=demo
|
||||
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=simx --cores=1 --warps=8 --threads=16 --app=demo
|
||||
./ci/blackbox.sh --driver=simx --cores=1 --warps=8 --threads=16 --app=demo
|
||||
|
||||
# cores clustering
|
||||
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=1 --clusters=1 --app=demo --args="-n1"
|
||||
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=demo --args="-n1"
|
||||
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=demo --args="-n1"
|
||||
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=demo --args="-n1"
|
||||
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=demo --args="-n1"
|
||||
|
||||
# L2/L3
|
||||
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=demo --args="-n1"
|
||||
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=demo --args="-n1"
|
||||
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
|
||||
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=demo --args="-n1"
|
||||
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=demo --args="-n1"
|
||||
|
||||
echo "clustering tests done!"
|
||||
}
|
||||
|
@ -230,12 +230,12 @@ CONFIGS="-DENABLE_DPI -DEXT_M_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=
|
|||
# disabling F extension
|
||||
CONFIGS="-DENABLE_DPI -DEXT_F_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_mf_ext
|
||||
CONFIGS="-DENABLE_DPI -DEXT_F_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_mf_ext --perf=1
|
||||
CONFIGS="-DENABLE_DPI -DEXT_F_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=no_mf_ext --perf=1
|
||||
CONFIGS="-DEXT_F_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=no_mf_ext --perf=1
|
||||
|
||||
# disable shared memory
|
||||
CONFIGS="-DENABLE_DPI -DSM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_smem
|
||||
CONFIGS="-DENABLE_DPI -DSM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_smem --perf=1
|
||||
CONFIGS="-DENABLE_DPI -DSM_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=no_smem --perf=1
|
||||
CONFIGS="-DSM_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=no_smem --perf=1
|
||||
|
||||
# disable L1 cache
|
||||
CONFIGS="-DENABLE_DPI -DL1_DISABLE -DSM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
|
||||
|
@ -256,17 +256,17 @@ CONFIGS="-DENABLE_DPI -DL1_LINE_SIZE=64" ./ci/blackbox.sh --driver=rtlsim --core
|
|||
# test cache banking
|
||||
CONFIGS="-DENABLE_DPI -DSMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
|
||||
CONFIGS="-DENABLE_DPI -DSMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
|
||||
CONFIGS="-DENABLE_DPI -DSMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemm
|
||||
CONFIGS="-DSMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemm
|
||||
CONFIGS="-DENABLE_DPI -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm
|
||||
CONFIGS="-DENABLE_DPI -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm
|
||||
CONFIGS="-DENABLE_DPI -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemm
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemm
|
||||
|
||||
# test cache multi-porting
|
||||
CONFIGS="-DENABLE_DPI -DDCACHE_NUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm
|
||||
CONFIGS="-DENABLE_DPI -DDCACHE_NUM_PORTS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemm
|
||||
CONFIGS="-DDCACHE_NUM_PORTS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemm
|
||||
CONFIGS="-DENABLE_DPI -DDCACHE_NUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm
|
||||
CONFIGS="-DENABLE_DPI -DL2_NUM_PORTS=2 -DDCACHE_NUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=sgemm
|
||||
CONFIGS="-DENABLE_DPI -DL2_NUM_PORTS=2 -DDCACHE_NUM_PORTS=2" ./ci/blackbox.sh --driver=simx --cores=2 --l2cache --app=sgemm
|
||||
CONFIGS="-DL2_NUM_PORTS=2 -DDCACHE_NUM_PORTS=2" ./ci/blackbox.sh --driver=simx --cores=2 --l2cache --app=sgemm
|
||||
|
||||
# test 128-bit MEM block
|
||||
CONFIGS="-DENABLE_DPI -DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo
|
||||
|
|
|
@ -150,7 +150,7 @@ module VX_gpu_unit #(
|
|||
`ifdef EXT_TEX_ENABLE
|
||||
|
||||
VX_tex_exe_if tex_exe_if();
|
||||
VX_commit_if tex_commit_if();
|
||||
VX_commit_if tex_commit_if();
|
||||
|
||||
assign tex_exe_if.valid = gpu_req_valid && (gpu_exe_if.op_type == `INST_GPU_TEX);
|
||||
assign tex_exe_if.uuid = gpu_exe_if.uuid;
|
||||
|
@ -161,8 +161,8 @@ module VX_gpu_unit #(
|
|||
assign tex_exe_if.stage = gpu_exe_if.op_mod[`TEX_STAGE_BITS-1:0];
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign tex_exe_if.coords[0][i] = gpu_exe_if.rs1_data[i];
|
||||
assign tex_exe_if.coords[1][i] = gpu_exe_if.rs2_data[i];
|
||||
assign tex_exe_if.coords[0][i] = gpu_exe_if.rs1_data[i][31:0];
|
||||
assign tex_exe_if.coords[1][i] = gpu_exe_if.rs2_data[i][31:0];
|
||||
assign tex_exe_if.lod[i] = gpu_exe_if.rs3_data[i][0 +: `TEX_LOD_BITS];
|
||||
end
|
||||
|
||||
|
@ -206,7 +206,7 @@ module VX_gpu_unit #(
|
|||
.reset (raster_reset),
|
||||
.raster_csr_if (raster_csr_if),
|
||||
.raster_bus_if (raster_bus_if),
|
||||
.raster_exe_if (raster_exe_if),
|
||||
.raster_exe_if (raster_exe_if),
|
||||
.raster_commit_if (raster_commit_if)
|
||||
);
|
||||
|
||||
|
@ -231,7 +231,7 @@ module VX_gpu_unit #(
|
|||
assign rop_exe_if.face[i] = gpu_exe_if.rs1_data[i][0];
|
||||
assign rop_exe_if.pos_x[i] = gpu_exe_if.rs1_data[i][1 +: `ROP_DIM_BITS];
|
||||
assign rop_exe_if.pos_y[i] = gpu_exe_if.rs1_data[i][16 +: `ROP_DIM_BITS];
|
||||
assign rop_exe_if.color[i] = gpu_exe_if.rs2_data[i];
|
||||
assign rop_exe_if.color[i] = gpu_exe_if.rs2_data[i][31:0];
|
||||
assign rop_exe_if.depth[i] = gpu_exe_if.rs3_data[i][`ROP_DEPTH_BITS-1:0];
|
||||
end
|
||||
|
||||
|
@ -258,6 +258,7 @@ module VX_gpu_unit #(
|
|||
|
||||
wire imadd_valid_in;
|
||||
wire imadd_ready_in;
|
||||
wire [`NUM_THREADS-1:0][31:0] imadd_data_in [3];
|
||||
|
||||
wire imadd_valid_out;
|
||||
wire [UUID_WIDTH-1:0] imadd_uuid_out;
|
||||
|
@ -270,6 +271,12 @@ module VX_gpu_unit #(
|
|||
|
||||
assign imadd_valid_in = gpu_req_valid && (gpu_exe_if.op_type == `INST_GPU_IMADD);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign imadd_data_in[0][i] = gpu_exe_if.rs1_data[i][31:0];
|
||||
assign imadd_data_in[1][i] = gpu_exe_if.rs2_data[i][31:0];
|
||||
assign imadd_data_in[2][i] = gpu_exe_if.rs3_data[i][31:0];
|
||||
end
|
||||
|
||||
`RESET_RELAY (imadd_reset, reset);
|
||||
|
||||
VX_imadd #(
|
||||
|
@ -285,9 +292,9 @@ module VX_gpu_unit #(
|
|||
// Inputs
|
||||
.valid_in (imadd_valid_in),
|
||||
.shift_in ({gpu_exe_if.op_mod[1:0], 3'b0}),
|
||||
.data1_in (gpu_exe_if.rs1_data),
|
||||
.data2_in (gpu_exe_if.rs2_data),
|
||||
.data3_in (gpu_exe_if.rs3_data),
|
||||
.data1_in (imadd_data_in[0]),
|
||||
.data2_in (imadd_data_in[1]),
|
||||
.data3_in (imadd_data_in[2]),
|
||||
.tag_in ({gpu_exe_if.uuid, gpu_exe_if.wid, gpu_exe_if.tmask, gpu_exe_if.PC, gpu_exe_if.rd}),
|
||||
.ready_in (imadd_ready_in),
|
||||
|
||||
|
@ -298,12 +305,19 @@ module VX_gpu_unit #(
|
|||
.ready_out (imadd_ready_out)
|
||||
);
|
||||
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] imadd_data_out_x;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign imadd_data_out_x[i] = `XLEN'(imadd_data_out[i]);
|
||||
end
|
||||
|
||||
assign rsp_arb_valid_in[RSP_ARB_IDX_IMADD] = imadd_valid_out;
|
||||
assign rsp_arb_data_in[RSP_ARB_IDX_IMADD] = {imadd_uuid_out, imadd_wid_out, imadd_tmask_out, imadd_PC_out, imadd_rd_out, 1'b1, RSP_DATAW'(imadd_data_out), 1'b1, 1'b0};
|
||||
assign rsp_arb_data_in[RSP_ARB_IDX_IMADD] = {imadd_uuid_out, imadd_wid_out, imadd_tmask_out, imadd_PC_out, imadd_rd_out, 1'b1, RSP_DATAW'(imadd_data_out_x), 1'b1, 1'b0};
|
||||
assign imadd_ready_out = rsp_arb_ready_in[RSP_ARB_IDX_IMADD];
|
||||
|
||||
`endif
|
||||
|
||||
|
||||
|
||||
// can accept new request?
|
||||
|
||||
always @(*) begin
|
||||
|
|
|
@ -18,7 +18,8 @@ module VX_multiplier #(
|
|||
|
||||
wire [A_WIDTH-1:0] dataa_w;
|
||||
wire [B_WIDTH-1:0] datab_w;
|
||||
wire [R_WIDTH-1:0] result_w;
|
||||
wire [A_WIDTH+B_WIDTH-1:0] result_w;
|
||||
`UNUSED_VAR (result_w)
|
||||
|
||||
if (SIGNED != 0) begin
|
||||
assign result_w = $signed(dataa_w) * $signed(datab_w);
|
||||
|
@ -29,7 +30,7 @@ module VX_multiplier #(
|
|||
if (LATENCY == 0) begin
|
||||
assign dataa_w = dataa;
|
||||
assign datab_w = datab;
|
||||
assign result = result_w;
|
||||
assign result = R_WIDTH'(result_w);
|
||||
end else begin
|
||||
if (LATENCY >= 2) begin
|
||||
reg [A_WIDTH-1:0] dataa_p [LATENCY-2:0];
|
||||
|
@ -57,7 +58,7 @@ module VX_multiplier #(
|
|||
reg [R_WIDTH-1:0] result_r;
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
result_r <= result_w;
|
||||
result_r <= R_WIDTH'(result_w);
|
||||
end
|
||||
end
|
||||
assign result = result_r;
|
||||
|
|
|
@ -49,25 +49,29 @@ module VX_raster_agent #(
|
|||
|
||||
assign raster_rsp_valid = raster_exe_if.valid && raster_bus_if.req_valid;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] response_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] response_data, commit_data;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign response_data[i] = {31'(raster_bus_if.req_stamps[i].pid), ~raster_bus_if.req_done};
|
||||
end
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + (`NUM_THREADS * 32))
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS + (`NUM_THREADS * 32))
|
||||
) rsp_sbuf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (raster_rsp_valid),
|
||||
.ready_in (raster_rsp_ready),
|
||||
.data_in ({raster_exe_if.uuid, raster_exe_if.wid, raster_exe_if.tmask, raster_exe_if.PC, raster_exe_if.rd, response_data}),
|
||||
.data_out ({raster_commit_if.uuid, raster_commit_if.wid, raster_commit_if.tmask, raster_commit_if.PC, raster_commit_if.rd, raster_commit_if.data}),
|
||||
.data_out ({raster_commit_if.uuid, raster_commit_if.wid, raster_commit_if.tmask, raster_commit_if.PC, raster_commit_if.rd, commit_data}),
|
||||
.valid_out (raster_commit_if.valid),
|
||||
.ready_out (raster_commit_if.ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign raster_commit_if.data[i] = `XLEN'(commit_data[i]);
|
||||
end
|
||||
|
||||
assign raster_commit_if.wb = 1'b1;
|
||||
assign raster_commit_if.eop = 1'b1;
|
||||
|
||||
|
|
|
@ -13,7 +13,6 @@ module VX_raster_dcr #(
|
|||
output raster_dcrs_t raster_dcrs
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
// DCR registers
|
||||
|
@ -24,13 +23,13 @@ module VX_raster_dcr #(
|
|||
if (dcr_bus_if.write_valid) begin
|
||||
case (dcr_bus_if.write_addr)
|
||||
`DCR_RASTER_TBUF_ADDR: begin
|
||||
dcrs.tbuf_addr <= dcr_bus_if.write_data[`RASTER_DCR_DATA_BITS-1:0];
|
||||
dcrs.tbuf_addr <= dcr_bus_if.write_data[`RASTER_ADDR_BITS-1:0];
|
||||
end
|
||||
`DCR_RASTER_TILE_COUNT: begin
|
||||
dcrs.tile_count <= dcr_bus_if.write_data[`RASTER_TILE_BITS-1:0];
|
||||
end
|
||||
`DCR_RASTER_PBUF_ADDR: begin
|
||||
dcrs.pbuf_addr <= dcr_bus_if.write_data[`RASTER_DCR_DATA_BITS-1:0];
|
||||
dcrs.pbuf_addr <= dcr_bus_if.write_data[`RASTER_ADDR_BITS-1:0];
|
||||
end
|
||||
`DCR_RASTER_PBUF_STRIDE: begin
|
||||
dcrs.pbuf_stride <= dcr_bus_if.write_data[`RASTER_STRIDE_BITS-1:0];
|
||||
|
|
|
@ -51,19 +51,19 @@ module VX_raster_mem #(
|
|||
localparam FETCH_FLAG_PID = 2'b01;
|
||||
localparam FETCH_FLAG_PDATA = 2'b10;
|
||||
|
||||
localparam TILE_HEADER_SIZE = 8;
|
||||
localparam TILE_HEADER_SIZEW = 8 / 4;
|
||||
|
||||
// A primitive data contains (xloc, yloc, pid, edges)
|
||||
localparam PRIM_DATA_WIDTH = 2 * `RASTER_DIM_BITS + 9 * `RASTER_DATA_BITS + `RASTER_PID_BITS;
|
||||
|
||||
// Storage to cycle through all primitives and tiles
|
||||
reg [`RASTER_DCR_DATA_BITS-1:0] next_tbuf_addr;
|
||||
reg [`RASTER_DCR_DATA_BITS-1:0] curr_pbuf_addr;
|
||||
reg [`RASTER_PID_BITS-1:0] curr_pid_reqs;
|
||||
reg [`RASTER_PID_BITS-1:0] curr_pid_rsps;
|
||||
reg [`RASTER_TILE_BITS-1:0] curr_num_tiles;
|
||||
reg [`RASTER_DIM_BITS-1:0] curr_xloc;
|
||||
reg [`RASTER_DIM_BITS-1:0] curr_yloc;
|
||||
reg [RCACHE_ADDR_WIDTH-1:0] next_tbuf_addr;
|
||||
reg [RCACHE_ADDR_WIDTH-1:0] curr_pbuf_addr;
|
||||
reg [`RASTER_PID_BITS-1:0] curr_pid_reqs;
|
||||
reg [`RASTER_PID_BITS-1:0] curr_pid_rsps;
|
||||
reg [`RASTER_TILE_BITS-1:0] curr_num_tiles;
|
||||
reg [`RASTER_DIM_BITS-1:0] curr_xloc;
|
||||
reg [`RASTER_DIM_BITS-1:0] curr_yloc;
|
||||
|
||||
// Output buffer
|
||||
wire buf_in_valid;
|
||||
|
@ -72,7 +72,7 @@ module VX_raster_mem #(
|
|||
// Memory request
|
||||
reg mem_req_valid, mem_req_valid_qual;
|
||||
reg [NUM_REQS-1:0] mem_req_mask;
|
||||
reg [8:0][`RASTER_DCR_DATA_BITS-1:0] mem_req_addr;
|
||||
reg [8:0][RCACHE_ADDR_WIDTH-1:0] mem_req_addr;
|
||||
reg [TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_ready;
|
||||
|
||||
|
@ -83,12 +83,12 @@ module VX_raster_mem #(
|
|||
wire mem_rsp_ready;
|
||||
|
||||
// Primitive info
|
||||
wire [`RASTER_DCR_DATA_BITS-1:0] pids_addr;
|
||||
wire [RCACHE_ADDR_WIDTH-1:0] pids_addr;
|
||||
wire prim_id_rsp_valid;
|
||||
wire prim_data_rsp_valid;
|
||||
wire prim_addr_rsp_valid;
|
||||
wire prim_addr_rsp_ready;
|
||||
wire [8:0][`RASTER_DATA_BITS-1:0] prim_mem_addr;
|
||||
wire [8:0][RCACHE_ADDR_WIDTH-1:0] prim_mem_addr;
|
||||
wire [`RASTER_PID_BITS-1:0] primitive_id;
|
||||
|
||||
// Memory fetch FSM
|
||||
|
@ -115,10 +115,10 @@ module VX_raster_mem #(
|
|||
|
||||
// calculate tile start info
|
||||
wire [`RASTER_TILE_BITS-1:0] start_tile_count = (dcrs.tile_count + `RASTER_TILE_BITS'(NUM_INSTANCES - 1 - INSTANCE_IDX)) >> LOG2_NUM_INSTANCES;
|
||||
wire [`RASTER_DCR_DATA_BITS-1:0] start_tbuf_addr = dcrs.tbuf_addr + (INSTANCE_IDX * TILE_HEADER_SIZE);
|
||||
wire [RCACHE_ADDR_WIDTH-1:0] start_tbuf_addr = RCACHE_ADDR_WIDTH'({dcrs.tbuf_addr, 4'b0}) + RCACHE_ADDR_WIDTH'(INSTANCE_IDX * TILE_HEADER_SIZEW);
|
||||
|
||||
// calculate address of primitive ids
|
||||
assign pids_addr = (mem_req_addr[1] + 4) + (`RASTER_DCR_DATA_BITS'(th_pids_offset) << 2);
|
||||
assign pids_addr = (mem_req_addr[1] + 1) + RCACHE_ADDR_WIDTH'(th_pids_offset);
|
||||
|
||||
// scheduler FSM
|
||||
always @(posedge clk) begin
|
||||
|
@ -139,11 +139,11 @@ module VX_raster_mem #(
|
|||
end
|
||||
mem_req_valid <= 1;
|
||||
mem_req_addr[0] <= start_tbuf_addr;
|
||||
mem_req_addr[1] <= start_tbuf_addr + 4;
|
||||
mem_req_addr[1] <= start_tbuf_addr + 1;
|
||||
mem_req_mask <= 9'b11;
|
||||
mem_req_tag <= TAG_WIDTH'(FETCH_FLAG_TILE);
|
||||
// update tile counters
|
||||
next_tbuf_addr <= start_tbuf_addr + (NUM_INSTANCES * TILE_HEADER_SIZE);
|
||||
next_tbuf_addr <= start_tbuf_addr + RCACHE_ADDR_WIDTH'(NUM_INSTANCES * TILE_HEADER_SIZEW);
|
||||
curr_num_tiles <= start_tile_count;
|
||||
end
|
||||
STATE_TILE: begin
|
||||
|
@ -168,7 +168,7 @@ module VX_raster_mem #(
|
|||
if (mem_req_fire) begin
|
||||
if (is_prim_id_req) begin
|
||||
// update pid counters
|
||||
curr_pbuf_addr <= curr_pbuf_addr + 4;
|
||||
curr_pbuf_addr <= curr_pbuf_addr + 1;
|
||||
curr_pid_reqs <= curr_pid_reqs - `RASTER_PID_BITS'(1);
|
||||
end
|
||||
|
||||
|
@ -177,7 +177,7 @@ module VX_raster_mem #(
|
|||
// fetch next primitive pid
|
||||
mem_req_valid <= 1;
|
||||
mem_req_mask <= 9'b1;
|
||||
mem_req_addr[0] <= curr_pbuf_addr + (is_prim_id_req ? 4 : 0);
|
||||
mem_req_addr[0] <= curr_pbuf_addr + (is_prim_id_req ? 1 : 0);
|
||||
mem_req_tag <= TAG_WIDTH'(FETCH_FLAG_PID);
|
||||
end
|
||||
end
|
||||
|
@ -202,9 +202,9 @@ module VX_raster_mem #(
|
|||
mem_req_valid <= 1;
|
||||
mem_req_mask <= 9'b11;
|
||||
mem_req_addr[0] <= next_tbuf_addr;
|
||||
mem_req_addr[1] <= next_tbuf_addr + 4;
|
||||
mem_req_addr[1] <= next_tbuf_addr + 1;
|
||||
mem_req_tag <= TAG_WIDTH'(FETCH_FLAG_TILE);
|
||||
next_tbuf_addr <= next_tbuf_addr + (NUM_INSTANCES * TILE_HEADER_SIZE);
|
||||
next_tbuf_addr <= next_tbuf_addr + RCACHE_ADDR_WIDTH'(NUM_INSTANCES * TILE_HEADER_SIZEW);
|
||||
end
|
||||
// update tile counter
|
||||
curr_num_tiles <= curr_num_tiles - `RASTER_TILE_BITS'(1);
|
||||
|
@ -252,10 +252,8 @@ module VX_raster_mem #(
|
|||
assign mem_rsp_ready = (~prim_id_rsp_valid || prim_addr_rsp_ready)
|
||||
&& (~prim_data_rsp_valid || buf_in_ready);
|
||||
|
||||
wire [8:0][RCACHE_ADDR_WIDTH-1:0] mem_req_addr_w;
|
||||
wire [8:0][RCACHE_WORD_SIZE-1:0] mem_req_byteen;
|
||||
for (genvar i = 0; i < 9; ++i) begin
|
||||
assign mem_req_addr_w[i] = mem_req_addr[i][(32 - RCACHE_ADDR_WIDTH) +: RCACHE_ADDR_WIDTH];
|
||||
assign mem_req_byteen[i] = {RCACHE_WORD_SIZE{1'b1}};
|
||||
end
|
||||
|
||||
|
@ -279,7 +277,7 @@ module VX_raster_mem #(
|
|||
.req_rw (1'b0),
|
||||
.req_mask (mem_req_mask),
|
||||
.req_byteen (mem_req_byteen),
|
||||
.req_addr (mem_req_addr_w),
|
||||
.req_addr (mem_req_addr),
|
||||
`UNUSED_PIN (req_data),
|
||||
.req_tag (mem_req_tag),
|
||||
`UNUSED_PIN (req_empty),
|
||||
|
@ -310,12 +308,12 @@ module VX_raster_mem #(
|
|||
.mem_rsp_ready (cache_bus_if.rsp_ready)
|
||||
);
|
||||
|
||||
wire [`RASTER_DATA_BITS-1:0] prim_mem_offset;
|
||||
wire [RCACHE_ADDR_WIDTH-1:0] prim_mem_offset;
|
||||
|
||||
VX_multiplier #(
|
||||
.A_WIDTH (`RASTER_DATA_BITS),
|
||||
.B_WIDTH (`RASTER_STRIDE_BITS),
|
||||
.R_WIDTH (`RASTER_DATA_BITS),
|
||||
.R_WIDTH (RCACHE_ADDR_WIDTH),
|
||||
.LATENCY (`LATENCY_IMUL)
|
||||
) multiplier (
|
||||
.clk (clk),
|
||||
|
@ -326,7 +324,8 @@ module VX_raster_mem #(
|
|||
);
|
||||
|
||||
for (genvar i = 0; i < 9; ++i) begin
|
||||
assign prim_mem_addr[i] = dcrs.pbuf_addr + prim_mem_offset + 4 * i;
|
||||
wire [RCACHE_ADDR_WIDTH-1:0] offset = prim_mem_offset + RCACHE_ADDR_WIDTH'(1 * i);
|
||||
assign prim_mem_addr[i] = RCACHE_ADDR_WIDTH'({dcrs.pbuf_addr, 4'b0}) + offset;
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
|
|
|
@ -3,6 +3,11 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifdef XLEN_64
|
||||
`define RASTER_ADDR_BITS 32
|
||||
`else
|
||||
`define RASTER_ADDR_BITS 25
|
||||
`endif
|
||||
`define RASTER_DCR_DATA_BITS 32
|
||||
`define RASTER_DATA_BITS 32
|
||||
`define RASTER_TILE_BITS 16
|
||||
|
@ -10,14 +15,14 @@
|
|||
package VX_raster_types;
|
||||
|
||||
typedef struct packed {
|
||||
logic [`RASTER_DCR_DATA_BITS-1:0] tbuf_addr; // Tile buffer address
|
||||
logic [`RASTER_TILE_BITS-1:0] tile_count; // Number of tiles in the tile buffer
|
||||
logic [`RASTER_DCR_DATA_BITS-1:0] pbuf_addr; // Primitive (triangle) data buffer start address
|
||||
logic [`RASTER_STRIDE_BITS-1:0] pbuf_stride; // Primitive data stride to fetch vertices
|
||||
logic [`RASTER_DIM_BITS-1:0] dst_xmin; // Destination window xmin
|
||||
logic [`RASTER_DIM_BITS-1:0] dst_xmax; // Destination window xmax
|
||||
logic [`RASTER_DIM_BITS-1:0] dst_ymin; // Destination window ymin
|
||||
logic [`RASTER_DIM_BITS-1:0] dst_ymax; // Destination window ymax
|
||||
logic [`RASTER_ADDR_BITS-1:0] tbuf_addr; // Tile buffer address
|
||||
logic [`RASTER_TILE_BITS-1:0] tile_count; // Number of tiles in the tile buffer
|
||||
logic [`RASTER_ADDR_BITS-1:0] pbuf_addr; // Primitive triangle data buffer start address
|
||||
logic [`RASTER_STRIDE_BITS-1:0] pbuf_stride; // Primitive data stride to fetch vertices
|
||||
logic [`RASTER_DIM_BITS-1:0] dst_xmin; // Destination window xmin
|
||||
logic [`RASTER_DIM_BITS-1:0] dst_xmax; // Destination window xmax
|
||||
logic [`RASTER_DIM_BITS-1:0] dst_ymin; // Destination window ymin
|
||||
logic [`RASTER_DIM_BITS-1:0] dst_ymax; // Destination window ymax
|
||||
} raster_dcrs_t;
|
||||
|
||||
typedef struct packed {
|
||||
|
|
|
@ -63,13 +63,13 @@ module VX_rop_agent #(
|
|||
assign rop_rsp_valid = rop_exe_if.valid && rop_req_ready;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32)
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN)
|
||||
) rsp_sbuf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (rop_rsp_valid),
|
||||
.ready_in (rop_rsp_ready),
|
||||
.data_in ({rop_exe_if.uuid, rop_exe_if.wid, rop_exe_if.tmask, rop_exe_if.PC}),
|
||||
.data_in ({rop_exe_if.uuid, rop_exe_if.wid, rop_exe_if.tmask, rop_exe_if.PC}),
|
||||
.data_out ({rop_commit_if.uuid, rop_commit_if.wid, rop_commit_if.tmask, rop_commit_if.PC}),
|
||||
.valid_out (rop_commit_if.valid),
|
||||
.ready_out (rop_commit_if.ready)
|
||||
|
|
|
@ -13,7 +13,6 @@ module VX_rop_dcr #(
|
|||
output rop_dcrs_t rop_dcrs
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
`define DEPTH_TEST_ENABLE(func, writemask) \
|
||||
|
@ -40,7 +39,7 @@ module VX_rop_dcr #(
|
|||
if (dcr_bus_if.write_valid) begin
|
||||
case (dcr_bus_if.write_addr)
|
||||
`DCR_ROP_CBUF_ADDR: begin
|
||||
dcrs.cbuf_addr <= dcr_bus_if.write_data[31:0];
|
||||
dcrs.cbuf_addr <= dcr_bus_if.write_data[`ROP_ADDR_BITS-1:0];
|
||||
end
|
||||
`DCR_ROP_CBUF_PITCH: begin
|
||||
dcrs.cbuf_pitch <= dcr_bus_if.write_data[`ROP_PITCH_BITS-1:0];
|
||||
|
@ -49,7 +48,7 @@ module VX_rop_dcr #(
|
|||
dcrs.cbuf_writemask <= dcr_bus_if.write_data[3:0];
|
||||
end
|
||||
`DCR_ROP_ZBUF_ADDR: begin
|
||||
dcrs.zbuf_addr <= dcr_bus_if.write_data[31:0];
|
||||
dcrs.zbuf_addr <= dcr_bus_if.write_data[`ROP_ADDR_BITS-1:0];
|
||||
end
|
||||
`DCR_ROP_ZBUF_PITCH: begin
|
||||
dcrs.zbuf_pitch <= dcr_bus_if.write_data[`ROP_PITCH_BITS-1:0];
|
||||
|
|
|
@ -42,6 +42,7 @@ module VX_rop_mem #(
|
|||
|
||||
localparam UUID_WIDTH = `UP(`UUID_BITS);
|
||||
localparam NUM_REQS = ROP_MEM_REQS;
|
||||
localparam W_ADDR_BITS = (`ROP_ADDR_BITS + 6) - 2;
|
||||
|
||||
wire mreq_valid, mreq_valid_r;
|
||||
wire mreq_rw, mreq_rw_r;
|
||||
|
@ -72,7 +73,8 @@ module VX_rop_mem #(
|
|||
|
||||
// depth/stencil values submission
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [31:0] m_y_pitch, baddr_s;
|
||||
wire [31:0] m_y_pitch;
|
||||
`UNUSED_VAR (m_y_pitch)
|
||||
|
||||
VX_multiplier #(
|
||||
.A_WIDTH (`ROP_DIM_BITS),
|
||||
|
@ -87,13 +89,15 @@ module VX_rop_mem #(
|
|||
.result (m_y_pitch)
|
||||
);
|
||||
|
||||
wire mask = req_ds_mask[i];
|
||||
wire [31:0] baddr = dcrs.zbuf_addr + (req_pos_x[i] * 4);
|
||||
wire [3:0] byteen = req_rw ? {stencil_byteen[i], depth_byteen} : 4'b1111;
|
||||
wire [31:0] data = {req_stencil[i], req_depth[i]};
|
||||
wire [W_ADDR_BITS-1:0] baddr, baddr_s;
|
||||
assign baddr = {dcrs.zbuf_addr, 4'b0} + W_ADDR_BITS'(req_pos_x[i]);
|
||||
|
||||
wire [3:0] byteen = req_rw ? {stencil_byteen[i], depth_byteen} : 4'b1111;
|
||||
wire [31:0] data = {req_stencil[i], req_depth[i]};
|
||||
wire mask = req_ds_mask[i];
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + 4 + 32 + 32),
|
||||
.DATAW (1 + 4 + W_ADDR_BITS + 32),
|
||||
.DEPTH (`LATENCY_IMUL)
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
|
@ -103,15 +107,14 @@ module VX_rop_mem #(
|
|||
.data_out ({mreq_mask[i], mreq_byteen[i], baddr_s, mreq_data[i]})
|
||||
);
|
||||
|
||||
wire [31:0] addr = baddr_s + m_y_pitch;
|
||||
|
||||
assign mreq_addr[i] = addr[(32-OCACHE_ADDR_WIDTH) +: OCACHE_ADDR_WIDTH];
|
||||
`UNUSED_VAR (addr)
|
||||
wire [W_ADDR_BITS-1:0] addr = baddr_s + W_ADDR_BITS'(m_y_pitch[31:2]);
|
||||
assign mreq_addr[i] = OCACHE_ADDR_WIDTH'(addr);
|
||||
end
|
||||
|
||||
// blend color submission
|
||||
for (genvar i = NUM_LANES; i < NUM_REQS; ++i) begin
|
||||
wire [31:0] m_y_pitch, baddr_s;
|
||||
wire [31:0] m_y_pitch;
|
||||
`UNUSED_VAR (m_y_pitch)
|
||||
|
||||
VX_multiplier #(
|
||||
.A_WIDTH (`ROP_DIM_BITS),
|
||||
|
@ -126,13 +129,15 @@ module VX_rop_mem #(
|
|||
.result (m_y_pitch)
|
||||
);
|
||||
|
||||
wire mask = req_c_mask[i - NUM_LANES];
|
||||
wire [31:0] baddr = dcrs.cbuf_addr + (req_pos_x[i - NUM_LANES] * 4);
|
||||
wire [W_ADDR_BITS-1:0] baddr, baddr_s;
|
||||
assign baddr = {dcrs.cbuf_addr, 4'b0} + W_ADDR_BITS'(req_pos_x[i - NUM_LANES]);
|
||||
|
||||
wire [3:0] byteen = req_rw ? color_byteen : 4'b1111;
|
||||
wire [31:0] data = req_color[i - NUM_LANES];
|
||||
wire [31:0] data = req_color[i - NUM_LANES];
|
||||
wire mask = req_c_mask[i - NUM_LANES];
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + 4 + 32 + 32),
|
||||
.DATAW (1 + 4 + W_ADDR_BITS + 32),
|
||||
.DEPTH (`LATENCY_IMUL)
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
|
@ -142,10 +147,8 @@ module VX_rop_mem #(
|
|||
.data_out ({mreq_mask[i], mreq_byteen[i], baddr_s, mreq_data[i]})
|
||||
);
|
||||
|
||||
wire [31:0] addr = baddr_s + m_y_pitch;
|
||||
|
||||
assign mreq_addr[i] = addr[(32-OCACHE_ADDR_WIDTH) +: OCACHE_ADDR_WIDTH];
|
||||
`UNUSED_VAR (addr)
|
||||
wire [W_ADDR_BITS-1:0] addr = baddr_s + W_ADDR_BITS'(m_y_pitch[31:2]);
|
||||
assign mreq_addr[i] = OCACHE_ADDR_WIDTH'(addr);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
|
|
|
@ -3,6 +3,12 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifdef XLEN_64
|
||||
`define ROP_ADDR_BITS 32
|
||||
`else
|
||||
`define ROP_ADDR_BITS 25
|
||||
`endif
|
||||
|
||||
package VX_rop_types;
|
||||
|
||||
typedef struct packed {
|
||||
|
@ -13,11 +19,11 @@ typedef struct packed {
|
|||
} rgba_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] cbuf_addr;
|
||||
logic [`ROP_ADDR_BITS-1:0] cbuf_addr;
|
||||
logic [`ROP_PITCH_BITS-1:0] cbuf_pitch;
|
||||
logic [3:0] cbuf_writemask;
|
||||
|
||||
logic [31:0] zbuf_addr;
|
||||
logic [`ROP_ADDR_BITS-1:0] zbuf_addr;
|
||||
logic [`ROP_PITCH_BITS-1:0] zbuf_pitch;
|
||||
|
||||
logic depth_enable;
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
module VX_tex_addr #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter REQ_INFOW = 1,
|
||||
parameter NUM_LANES = 1
|
||||
parameter NUM_LANES = 1,
|
||||
parameter W_ADDR_BITS = `TEX_ADDR_BITS + 6
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -29,7 +30,7 @@ module VX_tex_addr #(
|
|||
output wire [NUM_LANES-1:0] rsp_mask,
|
||||
output wire [`TEX_FILTER_BITS-1:0] rsp_filter,
|
||||
output wire [`TEX_LGSTRIDE_BITS-1:0] rsp_lgstride,
|
||||
output wire [NUM_LANES-1:0][31:0] rsp_baseaddr,
|
||||
output wire [NUM_LANES-1:0][W_ADDR_BITS-1:0] rsp_baseaddr,
|
||||
output wire [NUM_LANES-1:0][3:0][31:0] rsp_addr,
|
||||
output wire [NUM_LANES-1:0][1:0][`TEX_BLEND_FRAC-1:0] rsp_blends,
|
||||
output wire [REQ_INFOW-1:0] rsp_info,
|
||||
|
@ -52,7 +53,7 @@ module VX_tex_addr #(
|
|||
wire [NUM_LANES-1:0][1:0][`TEX_FXD_FRAC-1:0] clamped_hi, clamped_hi_s0;
|
||||
wire [NUM_LANES-1:0][1:0][SHIFT_BITS-1:0] dim_shift, dim_shift_s0;
|
||||
wire [`TEX_LGSTRIDE_BITS-1:0] log_stride, log_stride_s0;
|
||||
wire [NUM_LANES-1:0][31:0] mip_addr, mip_addr_s0;
|
||||
wire [NUM_LANES-1:0][W_ADDR_BITS-1:0] mip_addr, mip_addr_s0;
|
||||
wire [NUM_LANES-1:0][PITCH_BITS-1:0] log_pitch, log_pitch_s0;
|
||||
|
||||
wire stall_out;
|
||||
|
@ -90,11 +91,11 @@ module VX_tex_addr #(
|
|||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign log_pitch[i] = PITCH_BITS'(req_logdims[0] - req_miplevel[i]) + PITCH_BITS'(log_stride);
|
||||
assign mip_addr[i] = req_baseaddr + `TEX_ADDR_BITS'(req_mipoff[i]);
|
||||
assign mip_addr[i] = {req_baseaddr, 6'b0} + W_ADDR_BITS'(req_mipoff[i]);
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_LANES + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + REQ_INFOW + NUM_LANES * (PITCH_BITS + 2 * SHIFT_BITS + `TEX_ADDR_BITS + 2 * 2 * `TEX_FXD_FRAC)),
|
||||
.DATAW (1 + NUM_LANES + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + REQ_INFOW + NUM_LANES * (PITCH_BITS + 2 * SHIFT_BITS + W_ADDR_BITS + 2 * 2 * `TEX_FXD_FRAC)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
|
@ -140,7 +141,7 @@ module VX_tex_addr #(
|
|||
assign stall_out = rsp_valid && ~rsp_ready;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_LANES + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + (NUM_LANES * 32) + (NUM_LANES * 4 * 32) + (2 * NUM_LANES * `TEX_BLEND_FRAC) + REQ_INFOW),
|
||||
.DATAW (1 + NUM_LANES + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + (NUM_LANES * W_ADDR_BITS) + (NUM_LANES * 4 * 32) + (2 * NUM_LANES * `TEX_BLEND_FRAC) + REQ_INFOW),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
|
|
|
@ -55,7 +55,7 @@ module VX_tex_agent #(
|
|||
wire mdata_pop = tex_bus_if.rsp_valid && tex_bus_if.rsp_ready;
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS),
|
||||
.DATAW (NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS),
|
||||
.SIZE (`TEX_REQ_QUEUE_SIZE)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
|
@ -98,19 +98,25 @@ module VX_tex_agent #(
|
|||
assign mdata_raddr = tex_bus_if.rsp_tag[0 +: REQ_QUEUE_BITS];
|
||||
assign rsp_uuid = tex_bus_if.rsp_tag[REQ_QUEUE_BITS +: UUID_WIDTH];
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] commit_data;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + (`NUM_THREADS * 32))
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS + (`NUM_THREADS * 32))
|
||||
) rsp_sbuf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (tex_bus_if.rsp_valid),
|
||||
.ready_in (tex_bus_if.rsp_ready),
|
||||
.data_in ({rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, tex_bus_if.rsp_texels}),
|
||||
.data_out ({tex_commit_if.uuid, tex_commit_if.wid, tex_commit_if.tmask, tex_commit_if.PC, tex_commit_if.rd, tex_commit_if.data}),
|
||||
.data_out ({tex_commit_if.uuid, tex_commit_if.wid, tex_commit_if.tmask, tex_commit_if.PC, tex_commit_if.rd, commit_data}),
|
||||
.valid_out (tex_commit_if.valid),
|
||||
.ready_out (tex_commit_if.ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign tex_commit_if.data[i] = `XLEN'(commit_data[i]);
|
||||
end
|
||||
|
||||
assign tex_commit_if.wb = 1'b1;
|
||||
assign tex_commit_if.eop = 1'b1;
|
||||
|
||||
|
|
|
@ -14,8 +14,7 @@ module VX_tex_dcr #(
|
|||
input wire [`TEX_STAGE_BITS-1:0] stage,
|
||||
output tex_dcrs_t tex_dcrs
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
// DCR registers
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
|
||||
module VX_tex_mem #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter REQ_INFOW = 1,
|
||||
parameter NUM_LANES = 1
|
||||
parameter REQ_INFOW = 1,
|
||||
parameter NUM_LANES = 1,
|
||||
parameter W_ADDR_BITS = `TEX_ADDR_BITS + 6
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -16,7 +17,7 @@ module VX_tex_mem #(
|
|||
input wire [NUM_LANES-1:0] req_mask,
|
||||
input wire [`TEX_FILTER_BITS-1:0] req_filter,
|
||||
input wire [`TEX_LGSTRIDE_BITS-1:0] req_lgstride,
|
||||
input wire [NUM_LANES-1:0][31:0] req_baseaddr,
|
||||
input wire [NUM_LANES-1:0][W_ADDR_BITS-1:0] req_baseaddr,
|
||||
input wire [NUM_LANES-1:0][3:0][31:0] req_addr,
|
||||
input wire [REQ_INFOW-1:0] req_info,
|
||||
output wire req_ready,
|
||||
|
@ -33,7 +34,7 @@ module VX_tex_mem #(
|
|||
|
||||
wire mem_req_valid;
|
||||
wire [3:0][NUM_LANES-1:0] mem_req_mask;
|
||||
wire [3:0][NUM_LANES-1:0][29:0] mem_req_addr;
|
||||
wire [3:0][NUM_LANES-1:0][TCACHE_ADDR_WIDTH-1:0] mem_req_addr;
|
||||
wire [3:0][NUM_LANES-1:0][3:0] mem_req_byteen;
|
||||
wire [TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_ready;
|
||||
|
@ -45,11 +46,11 @@ module VX_tex_mem #(
|
|||
|
||||
// full address calculation
|
||||
|
||||
wire [NUM_LANES-1:0][3:0][31:0] full_addr;
|
||||
wire [NUM_LANES-1:0][3:0][W_ADDR_BITS-1:0] full_addr;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar j = 0; j < 4; ++j) begin
|
||||
assign full_addr[i][j] = req_baseaddr[i] + req_addr[i][j];
|
||||
assign full_addr[i][j] = req_baseaddr[i] + W_ADDR_BITS'(req_addr[i][j]);
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -59,7 +60,7 @@ module VX_tex_mem #(
|
|||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar j = 0; j < 4; ++j) begin
|
||||
assign mem_req_addr[j][i] = full_addr[i][j][31:2];
|
||||
assign mem_req_addr[j][i] = TCACHE_ADDR_WIDTH'(full_addr[i][j][W_ADDR_BITS-1:2]);
|
||||
assign mem_req_align[j][i] = full_addr[i][j][1:0];
|
||||
assign mem_req_byteen[j][i] = 4'b1111;
|
||||
end
|
||||
|
|
|
@ -8,7 +8,11 @@
|
|||
`define TEX_FXD_HALF (`TEX_FXD_ONE >> 1)
|
||||
`define TEX_FXD_MASK (`TEX_FXD_ONE - 1)
|
||||
|
||||
`ifdef XLEN_64
|
||||
`define TEX_ADDR_BITS 32
|
||||
`else
|
||||
`define TEX_ADDR_BITS 25
|
||||
`endif
|
||||
`define TEX_FORMAT_BITS 3
|
||||
`define TEX_WRAP_BITS 2
|
||||
`define TEX_FILTER_BITS 1
|
||||
|
|
|
@ -22,6 +22,7 @@ module VX_tex_unit #(
|
|||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
localparam BLEND_FRAC_W = (2 * NUM_LANES * `TEX_BLEND_FRAC);
|
||||
localparam W_ADDR_BITS = `TEX_ADDR_BITS + 6;
|
||||
|
||||
// DCRs
|
||||
|
||||
|
@ -80,7 +81,7 @@ module VX_tex_unit #(
|
|||
wire [`TEX_LGSTRIDE_BITS-1:0] mem_req_lgstride;
|
||||
wire [NUM_LANES-1:0][1:0][`TEX_BLEND_FRAC-1:0] mem_req_blends;
|
||||
wire [NUM_LANES-1:0][3:0][31:0] mem_req_addr;
|
||||
wire [NUM_LANES-1:0][31:0] mem_req_baseaddr;
|
||||
wire [NUM_LANES-1:0][W_ADDR_BITS-1:0] mem_req_baseaddr;
|
||||
wire [(TAG_WIDTH + `TEX_FORMAT_BITS)-1:0] mem_req_info;
|
||||
wire mem_req_ready;
|
||||
|
||||
|
|
|
@ -238,14 +238,14 @@ void TextureSampler::configure(const TexDCRS& dcrs) {
|
|||
}
|
||||
|
||||
uint32_t TextureSampler::read(uint32_t stage, int32_t u, int32_t v, uint32_t lod) const {
|
||||
auto mip_off = dcrs_.read(stage, DCR_TEX_MIPOFF(lod));
|
||||
auto base_addr = dcrs_.read(stage, DCR_TEX_ADDR);
|
||||
auto logdim = dcrs_.read(stage, DCR_TEX_LOGDIM);
|
||||
auto format = dcrs_.read(stage, DCR_TEX_FORMAT);
|
||||
auto filter = dcrs_.read(stage, DCR_TEX_FILTER);
|
||||
auto wrap = dcrs_.read(stage, DCR_TEX_WRAP);
|
||||
auto mip_off = dcrs_.read(stage, DCR_TEX_MIPOFF(lod));
|
||||
auto mip_base = uint64_t(dcrs_.read(stage, DCR_TEX_ADDR)) << 6;
|
||||
auto logdim = dcrs_.read(stage, DCR_TEX_LOGDIM);
|
||||
auto format = dcrs_.read(stage, DCR_TEX_FORMAT);
|
||||
auto filter = dcrs_.read(stage, DCR_TEX_FILTER);
|
||||
auto wrap = dcrs_.read(stage, DCR_TEX_WRAP);
|
||||
|
||||
base_addr += mip_off;
|
||||
auto base_addr = mip_base + mip_off;
|
||||
|
||||
auto log_width = std::max<int32_t>((logdim & 0xffff) - lod, 0);
|
||||
auto log_height = std::max<int32_t>((logdim >> 16) - lod, 0);
|
||||
|
@ -270,7 +270,7 @@ uint32_t TextureSampler::read(uint32_t stage, int32_t u, int32_t v, uint32_t lod
|
|||
|
||||
// memory lookup
|
||||
uint32_t texel[4];
|
||||
uint32_t addr[4] = {
|
||||
uint64_t addr[4] = {
|
||||
base_addr + offset00 * stride,
|
||||
base_addr + offset01 * stride,
|
||||
base_addr + offset10 * stride,
|
||||
|
@ -290,7 +290,7 @@ uint32_t TextureSampler::read(uint32_t stage, int32_t u, int32_t v, uint32_t lod
|
|||
|
||||
// memory lookup
|
||||
uint32_t texel;
|
||||
uint32_t addr = base_addr + offset * stride;
|
||||
uint64_t addr = base_addr + offset * stride;
|
||||
mem_cb_(&texel, &addr, stride, 1, cb_arg_);
|
||||
|
||||
// filtering
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cocogfx/include/fixed.hpp>
|
||||
#include <cocogfx/include/math.hpp>
|
||||
#include <VX_types.h>
|
||||
|
@ -174,7 +173,7 @@ class TextureSampler {
|
|||
public:
|
||||
typedef void (*MemoryCB)(
|
||||
uint32_t* out,
|
||||
const uint32_t* addr,
|
||||
const uint64_t* addr,
|
||||
uint32_t stride,
|
||||
uint32_t size,
|
||||
void* cb_arg
|
||||
|
|
|
@ -1542,7 +1542,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
for (uint32_t t = 0; t < num_threads; ++t) {
|
||||
if (!tmask_.test(t))
|
||||
continue;
|
||||
rddata[t].i = (WordI)(((int64_t)rsdata[t][0].i * (int64_t)rsdata[t][1].i) >> shift) + rsdata[t][2].i;
|
||||
rddata[t].i = (int32_t)(((int64_t)rsdata[t][0].i32 * (int64_t)rsdata[t][1].i32) >> shift) + rsdata[t][2].i32;
|
||||
}
|
||||
rd_write = true;
|
||||
} break;
|
||||
|
|
|
@ -11,13 +11,13 @@ using namespace vortex;
|
|||
#define STAMP_POOL_MAX_SIZE 1024
|
||||
|
||||
struct prim_mem_trace_t {
|
||||
uint32_t prim_addr;
|
||||
std::vector<uint32_t> edge_addrs;
|
||||
uint64_t prim_addr;
|
||||
std::vector<uint64_t> edge_addrs;
|
||||
uint32_t stamps;
|
||||
};
|
||||
|
||||
struct tile_mem_trace_t {
|
||||
std::vector<uint32_t> header_addrs;
|
||||
std::vector<uint64_t> header_addrs;
|
||||
std::list<prim_mem_trace_t> primitives;
|
||||
bool end_of_tile;
|
||||
};
|
||||
|
@ -89,8 +89,8 @@ public:
|
|||
// get device configuration
|
||||
graphics::Rasterizer::configure(dcrs);
|
||||
num_tiles_ = dcrs.read(DCR_RASTER_TILE_COUNT);
|
||||
tbuf_baseaddr_ = dcrs.read(DCR_RASTER_TBUF_ADDR);
|
||||
pbuf_baseaddr_ = dcrs.read(DCR_RASTER_PBUF_ADDR);
|
||||
tbuf_baseaddr_ = uint64_t(dcrs.read(DCR_RASTER_TBUF_ADDR)) << 6;
|
||||
pbuf_baseaddr_ = uint64_t(dcrs.read(DCR_RASTER_PBUF_ADDR)) << 6;
|
||||
pbuf_stride_ = dcrs.read(DCR_RASTER_PBUF_STRIDE);
|
||||
|
||||
tbuf_addr_ = tbuf_baseaddr_ + raster_index_ * sizeof(graphics::rast_tile_header_t);
|
||||
|
@ -252,10 +252,10 @@ private:
|
|||
uint32_t raster_count_;
|
||||
RAM* mem_;
|
||||
uint32_t num_tiles_;
|
||||
uint32_t tbuf_baseaddr_;
|
||||
uint32_t pbuf_baseaddr_;
|
||||
uint64_t tbuf_baseaddr_;
|
||||
uint64_t pbuf_baseaddr_;
|
||||
uint32_t pbuf_stride_;
|
||||
uint32_t tbuf_addr_;
|
||||
uint64_t tbuf_addr_;
|
||||
uint32_t tile_x_;
|
||||
uint32_t tile_y_;
|
||||
uint32_t pids_offset_;
|
||||
|
@ -388,7 +388,7 @@ public:
|
|||
|
||||
auto& mem_trace = mem_traces.front();
|
||||
|
||||
std::vector<uint32_t> addresses;
|
||||
std::vector<uint64_t> addresses;
|
||||
|
||||
switch (mem_trace_state_) {
|
||||
case e_mem_trace_state::header: {
|
||||
|
|
|
@ -18,13 +18,13 @@ public:
|
|||
depthStencil_.configure(dcrs);
|
||||
blender_.configure(dcrs);
|
||||
|
||||
zbuf_baseaddr_ = dcrs.read(DCR_ROP_ZBUF_ADDR);
|
||||
zbuf_baseaddr_ = uint64_t(dcrs.read(DCR_ROP_ZBUF_ADDR)) << 6;
|
||||
zbuf_pitch_ = dcrs.read(DCR_ROP_ZBUF_PITCH);
|
||||
depth_writemask_ = dcrs.read(DCR_ROP_DEPTH_WRITEMASK) & 0x1;
|
||||
stencil_front_writemask_ = dcrs.read(DCR_ROP_STENCIL_WRITEMASK) & 0xffff;
|
||||
stencil_back_writemask_ = dcrs.read(DCR_ROP_STENCIL_WRITEMASK) >> 16;
|
||||
|
||||
cbuf_baseaddr_ = dcrs.read(DCR_ROP_CBUF_ADDR);
|
||||
cbuf_baseaddr_ = uint64_t(dcrs.read(DCR_ROP_CBUF_ADDR)) << 6;
|
||||
cbuf_pitch_ = dcrs.read(DCR_ROP_CBUF_PITCH);
|
||||
auto cbuf_writemask = dcrs.read(DCR_ROP_CBUF_WRITEMASK) & 0xf;
|
||||
cbuf_writemask_ = (((cbuf_writemask >> 0) & 0x1) * 0x000000ff)
|
||||
|
@ -78,14 +78,16 @@ private:
|
|||
uint32_t* color,
|
||||
RopUnit::TraceData::Ptr trace_data) {
|
||||
if (depth_enable || stencil_enable) {
|
||||
uint32_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
|
||||
uint64_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
|
||||
mem_->read(depthstencil, zbuf_addr, 4);
|
||||
trace_data->mem_rd_addrs.push_back(zbuf_addr);
|
||||
DT(3, "rop-depthstencil-read: x=" << std::dec << x << ", y=" << y << ", addr=0x" << std::hex << zbuf_addr << ", depthstencil=0x" << *depthstencil);
|
||||
}
|
||||
if (color_write_ && (color_read_ || blend_enable)) {
|
||||
uint32_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
|
||||
uint64_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
|
||||
mem_->read(color, cbuf_addr, 4);
|
||||
trace_data->mem_rd_addrs.push_back(cbuf_addr);
|
||||
DT(3, "rop-color-read: x=" << std::dec << x << ", y=" << y << ", addr=0x" << std::hex << cbuf_addr << ", color=0x" << *color);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -105,18 +107,18 @@ private:
|
|||
| (stencil_enable ? (stencil_writemask << ROP_DEPTH_BITS) : 0);
|
||||
if (ds_writeMask != 0) {
|
||||
uint32_t write_value = (dst_depthstencil & ~ds_writeMask) | (depthstencil & ds_writeMask);
|
||||
uint32_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
|
||||
uint64_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
|
||||
mem_->write(&write_value, zbuf_addr, 4);
|
||||
trace_data->mem_wr_addrs.push_back(zbuf_addr);
|
||||
DT(3, "rop-depthstencil: x=" << std::dec << x << ", y=" << y << ", depthstencil=0x" << std::hex << write_value);
|
||||
DT(3, "rop-depthstencil-write: x=" << std::dec << x << ", y=" << y << ", addr=0x" << std::hex << zbuf_addr << ", depthstencil=0x" << write_value);
|
||||
}
|
||||
|
||||
if (color_write_ && ds_passed) {
|
||||
uint32_t write_value = (dst_color & ~cbuf_writemask_) | (color & cbuf_writemask_);
|
||||
uint32_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
|
||||
uint64_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
|
||||
mem_->write(&write_value, cbuf_addr, 4);
|
||||
trace_data->mem_wr_addrs.push_back(cbuf_addr);
|
||||
DT(3, "rop-color: x=" << std::dec << x << ", y=" << y << ", color=0x" << std::hex << write_value);
|
||||
DT(3, "rop-color-write: x=" << std::dec << x << ", y=" << y << ", addr=0x" << std::hex << cbuf_addr << ", color=0x" << write_value);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -122,7 +122,7 @@ private:
|
|||
|
||||
void texture_read(
|
||||
uint32_t* out,
|
||||
const uint32_t* addr,
|
||||
const uint64_t* addr,
|
||||
uint32_t stride,
|
||||
uint32_t size) {
|
||||
for (uint32_t i = 0; i < size; ++i) {
|
||||
|
@ -133,7 +133,7 @@ private:
|
|||
|
||||
static void memoryCB(
|
||||
uint32_t* out,
|
||||
const uint32_t* addr,
|
||||
const uint64_t* addr,
|
||||
uint32_t stride,
|
||||
uint32_t size,
|
||||
void* cb_arg) {
|
||||
|
|
|
@ -16,7 +16,13 @@
|
|||
class GpuSW;
|
||||
|
||||
typedef struct {
|
||||
uint32_t log_num_tasks;
|
||||
#ifdef SW_ENABLE
|
||||
graphics::RasterDCRS raster_dcrs;
|
||||
graphics::RopDCRS rop_dcrs;
|
||||
graphics::TexDCRS tex_dcrs;
|
||||
#endif
|
||||
uint32_t log_num_tasks;
|
||||
uint64_t prim_addr;
|
||||
|
||||
bool depth_enabled;
|
||||
bool color_enabled;
|
||||
|
@ -26,25 +32,6 @@ typedef struct {
|
|||
bool sw_rast;
|
||||
bool sw_rop;
|
||||
bool sw_interp;
|
||||
|
||||
uint32_t dst_width;
|
||||
uint32_t dst_height;
|
||||
|
||||
uint64_t cbuf_addr;
|
||||
uint8_t cbuf_stride;
|
||||
uint32_t cbuf_pitch;
|
||||
|
||||
uint64_t zbuf_addr;
|
||||
uint8_t zbuf_stride;
|
||||
uint32_t zbuf_pitch;
|
||||
|
||||
uint64_t prim_addr;
|
||||
|
||||
#ifdef SW_ENABLE
|
||||
graphics::RasterDCRS raster_dcrs;
|
||||
graphics::RopDCRS rop_dcrs;
|
||||
graphics::TexDCRS tex_dcrs;
|
||||
#endif
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
|
@ -25,8 +25,8 @@ public:
|
|||
void configure(const graphics::RasterDCRS& dcrs, uint32_t log_num_tasks) {
|
||||
graphics::Rasterizer::configure(dcrs);
|
||||
num_tiles_ = dcrs.read(DCR_RASTER_TILE_COUNT);
|
||||
tbuf_baseaddr_ = dcrs.read(DCR_RASTER_TBUF_ADDR);
|
||||
pbuf_baseaddr_ = dcrs.read(DCR_RASTER_PBUF_ADDR);
|
||||
tbuf_baseaddr_ = uint64_t(dcrs.read(DCR_RASTER_TBUF_ADDR)) << 6;
|
||||
pbuf_baseaddr_ = uint64_t(dcrs.read(DCR_RASTER_PBUF_ADDR)) << 6;
|
||||
pbuf_stride_ = dcrs.read(DCR_RASTER_PBUF_STRIDE);
|
||||
log_num_tasks_ = log_num_tasks;
|
||||
}
|
||||
|
@ -64,8 +64,8 @@ public:
|
|||
private:
|
||||
|
||||
uint32_t num_tiles_;
|
||||
uint32_t tbuf_baseaddr_;
|
||||
uint32_t pbuf_baseaddr_;
|
||||
uint64_t tbuf_baseaddr_;
|
||||
uint64_t pbuf_baseaddr_;
|
||||
uint32_t pbuf_stride_;
|
||||
uint32_t log_num_tasks_;
|
||||
};
|
||||
|
@ -80,13 +80,13 @@ public:
|
|||
blender_.configure(dcrs);
|
||||
|
||||
// get device configuration
|
||||
zbuf_baseaddr_ = dcrs.read(DCR_ROP_ZBUF_ADDR);
|
||||
zbuf_baseaddr_ = uint64_t(dcrs.read(DCR_ROP_ZBUF_ADDR)) << 6;
|
||||
zbuf_pitch_ = dcrs.read(DCR_ROP_ZBUF_PITCH);
|
||||
depth_writemask_ = dcrs.read(DCR_ROP_DEPTH_WRITEMASK) & 0x1;
|
||||
stencil_front_writemask_ = dcrs.read(DCR_ROP_STENCIL_WRITEMASK) & 0xffff;
|
||||
stencil_back_writemask_ = dcrs.read(DCR_ROP_STENCIL_WRITEMASK) >> 16;
|
||||
|
||||
cbuf_baseaddr_ = dcrs.read(DCR_ROP_CBUF_ADDR);
|
||||
cbuf_baseaddr_ = uint64_t(dcrs.read(DCR_ROP_CBUF_ADDR)) << 6;
|
||||
cbuf_pitch_ = dcrs.read(DCR_ROP_CBUF_PITCH);
|
||||
auto cbuf_writemask = dcrs.read(DCR_ROP_CBUF_WRITEMASK) & 0xf;
|
||||
cbuf_writemask_ = (((cbuf_writemask >> 0) & 0x1) * 0x000000ff)
|
||||
|
@ -132,12 +132,12 @@ private:
|
|||
uint32_t* depthstencil,
|
||||
uint32_t* color) const {
|
||||
if (depth_enable || stencil_enable) {
|
||||
uint32_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
|
||||
uint64_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
|
||||
*depthstencil = *reinterpret_cast<const uint32_t*>(zbuf_addr);
|
||||
}
|
||||
|
||||
if (color_write_ && (color_read_ || blend_enable)) {
|
||||
uint32_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
|
||||
uint64_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
|
||||
*color = *reinterpret_cast<const uint32_t*>(cbuf_addr);
|
||||
}
|
||||
}
|
||||
|
@ -157,12 +157,12 @@ private:
|
|||
| (stencil_enable ? (stencil_writemask << ROP_DEPTH_BITS) : 0);
|
||||
if (ds_writeMask != 0) {
|
||||
uint32_t write_value = (dst_depthstencil & ~ds_writeMask) | (depthstencil & ds_writeMask);
|
||||
uint32_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
|
||||
uint64_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
|
||||
*reinterpret_cast<uint32_t*>(zbuf_addr) = write_value;
|
||||
}
|
||||
if (color_write_ && ds_passed) {
|
||||
uint32_t write_value = (dst_color & ~cbuf_writemask_) | (color & cbuf_writemask_);
|
||||
uint32_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
|
||||
uint64_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
|
||||
*reinterpret_cast<uint32_t*>(cbuf_addr) = write_value;
|
||||
}
|
||||
}
|
||||
|
@ -170,13 +170,13 @@ private:
|
|||
graphics::DepthTencil depthStencil_;
|
||||
graphics::Blender blender_;
|
||||
|
||||
uint32_t zbuf_baseaddr_;
|
||||
uint64_t zbuf_baseaddr_;
|
||||
uint32_t zbuf_pitch_;
|
||||
bool depth_writemask_;
|
||||
uint32_t stencil_front_writemask_;
|
||||
uint32_t stencil_back_writemask_;
|
||||
|
||||
uint32_t cbuf_baseaddr_;
|
||||
uint64_t cbuf_baseaddr_;
|
||||
uint32_t cbuf_pitch_;
|
||||
uint32_t cbuf_writemask_;
|
||||
bool color_read_;
|
||||
|
@ -196,7 +196,7 @@ public:
|
|||
|
||||
private:
|
||||
static void memory_cb(uint32_t* out,
|
||||
const uint32_t* addr,
|
||||
const uint64_t* addr,
|
||||
uint32_t stride,
|
||||
uint32_t size,
|
||||
void* /*cb_arg*/) {
|
||||
|
|
|
@ -222,21 +222,21 @@ int render(const CGLTrace& trace) {
|
|||
uint32_t primbuf_stride = sizeof(graphics::rast_prim_t);
|
||||
|
||||
// configure raster units
|
||||
RASTER_DCR_WRITE(DCR_RASTER_TBUF_ADDR, tilebuf_addr);
|
||||
RASTER_DCR_WRITE(DCR_RASTER_TBUF_ADDR, tilebuf_addr / 64); // block address
|
||||
RASTER_DCR_WRITE(DCR_RASTER_TILE_COUNT, num_tiles);
|
||||
RASTER_DCR_WRITE(DCR_RASTER_PBUF_ADDR, primbuf_addr);
|
||||
RASTER_DCR_WRITE(DCR_RASTER_PBUF_ADDR, primbuf_addr / 64); // block address
|
||||
RASTER_DCR_WRITE(DCR_RASTER_PBUF_STRIDE, primbuf_stride);
|
||||
RASTER_DCR_WRITE(DCR_RASTER_SCISSOR_X, (dst_width << 16) | 0);
|
||||
RASTER_DCR_WRITE(DCR_RASTER_SCISSOR_Y, (dst_height << 16) | 0);
|
||||
|
||||
// configure rop color buffer
|
||||
ROP_DCR_WRITE(DCR_ROP_CBUF_ADDR, cbuf_addr);
|
||||
ROP_DCR_WRITE(DCR_ROP_CBUF_ADDR, cbuf_addr / 64); // block address
|
||||
ROP_DCR_WRITE(DCR_ROP_CBUF_PITCH, cbuf_pitch);
|
||||
ROP_DCR_WRITE(DCR_ROP_CBUF_WRITEMASK, states.color_writemask);
|
||||
|
||||
if (states.depth_test || states.stencil_test) {
|
||||
// configure rop depth buffer
|
||||
ROP_DCR_WRITE(DCR_ROP_ZBUF_ADDR, zbuf_addr);
|
||||
ROP_DCR_WRITE(DCR_ROP_ZBUF_ADDR, zbuf_addr / 64); // block address
|
||||
ROP_DCR_WRITE(DCR_ROP_ZBUF_PITCH, zbuf_pitch);
|
||||
}
|
||||
|
||||
|
@ -338,7 +338,7 @@ int render(const CGLTrace& trace) {
|
|||
TEX_DCR_WRITE(DCR_TEX_FORMAT, tex_format);
|
||||
TEX_DCR_WRITE(DCR_TEX_WRAP, (tex_wrapV << 16) | tex_wrapU);
|
||||
TEX_DCR_WRITE(DCR_TEX_FILTER, tex_filter ? TEX_FILTER_BILINEAR : TEX_FILTER_POINT);
|
||||
TEX_DCR_WRITE(DCR_TEX_ADDR, texbuf_addr);
|
||||
TEX_DCR_WRITE(DCR_TEX_ADDR, texbuf_addr / 64); // block address
|
||||
for (uint32_t i = 0; i < mip_offsets.size(); ++i) {
|
||||
assert(i < TEX_LOD_MAX);
|
||||
TEX_DCR_WRITE(DCR_TEX_MIPOFF(i), mip_offsets.at(i));
|
||||
|
@ -526,17 +526,6 @@ int main(int argc, char *argv[]) {
|
|||
kernel_arg.sw_rop = sw_rop;
|
||||
kernel_arg.sw_interp = sw_interp;
|
||||
|
||||
kernel_arg.dst_width = dst_width;
|
||||
kernel_arg.dst_height = dst_height;
|
||||
|
||||
kernel_arg.cbuf_stride = cbuf_stride;
|
||||
kernel_arg.cbuf_pitch = cbuf_pitch;
|
||||
kernel_arg.cbuf_addr = cbuf_addr;
|
||||
|
||||
kernel_arg.zbuf_stride = zbuf_stride;
|
||||
kernel_arg.zbuf_pitch = zbuf_pitch;
|
||||
kernel_arg.zbuf_addr = zbuf_addr;
|
||||
|
||||
// run tests
|
||||
RT_CHECK(render(trace));
|
||||
|
||||
|
|
|
@ -224,7 +224,7 @@ int main(int argc, char *argv[]) {
|
|||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
auto ref = ((a[i] * b[i]) >> (s[i] * 8)) + c[i];
|
||||
if (d[i] != ref) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << ", c=" << c[i] << ", s=" << s[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << d[i] << ", a=" << a[i] << ", b=" << b[i] << ", c=" << c[i] << ", s=" << s[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -173,9 +173,9 @@ int render(const CGLTrace& trace) {
|
|||
uint32_t primbuf_stride = sizeof(graphics::rast_prim_t);
|
||||
|
||||
// configure raster units
|
||||
vx_dcr_write(device, DCR_RASTER_TBUF_ADDR, tilebuf_addr);
|
||||
vx_dcr_write(device, DCR_RASTER_TBUF_ADDR, tilebuf_addr / 64); // block address
|
||||
vx_dcr_write(device, DCR_RASTER_TILE_COUNT, num_tiles);
|
||||
vx_dcr_write(device, DCR_RASTER_PBUF_ADDR, primbuf_addr);
|
||||
vx_dcr_write(device, DCR_RASTER_PBUF_ADDR, primbuf_addr / 64); // block address
|
||||
vx_dcr_write(device, DCR_RASTER_PBUF_STRIDE, primbuf_stride);
|
||||
vx_dcr_write(device, DCR_RASTER_SCISSOR_X, (dst_width << 16) | 0);
|
||||
vx_dcr_write(device, DCR_RASTER_SCISSOR_Y, (dst_height << 16) | 0);
|
||||
|
|
|
@ -147,12 +147,12 @@ int render(uint32_t num_tasks) {
|
|||
staging_buf = nullptr;
|
||||
|
||||
// configure rop color buffer
|
||||
vx_dcr_write(device, DCR_ROP_CBUF_ADDR, cbuf_addr);
|
||||
vx_dcr_write(device, DCR_ROP_CBUF_ADDR, cbuf_addr / 64); // block address
|
||||
vx_dcr_write(device, DCR_ROP_CBUF_PITCH, cbuf_pitch);
|
||||
vx_dcr_write(device, DCR_ROP_CBUF_WRITEMASK, 0xf);
|
||||
|
||||
// configure rop depth buffer to default
|
||||
vx_dcr_write(device, DCR_ROP_ZBUF_ADDR, zbuf_addr);
|
||||
vx_dcr_write(device, DCR_ROP_ZBUF_ADDR, zbuf_addr / 64); // block address
|
||||
vx_dcr_write(device, DCR_ROP_ZBUF_PITCH, zbuf_pitch);
|
||||
if (depth_enable) {
|
||||
vx_dcr_write(device, DCR_ROP_DEPTH_FUNC, ROP_DEPTH_FUNC_LESS);
|
||||
|
|
|
@ -15,7 +15,7 @@ typedef struct {
|
|||
} tile_info_t;
|
||||
|
||||
static void memory_cb(uint32_t* out,
|
||||
const uint32_t* addr,
|
||||
const uint64_t* addr,
|
||||
uint32_t stride,
|
||||
uint32_t size,
|
||||
void* /*cb_arg*/) {
|
||||
|
|
|
@ -268,7 +268,7 @@ int main(int argc, char *argv[]) {
|
|||
TEX_DCR_WRITE(DCR_TEX_FORMAT, format);
|
||||
TEX_DCR_WRITE(DCR_TEX_WRAP, (wrap << 16) | wrap);
|
||||
TEX_DCR_WRITE(DCR_TEX_FILTER, (filter ? TEX_FILTER_BILINEAR : TEX_FILTER_POINT));
|
||||
TEX_DCR_WRITE(DCR_TEX_ADDR, src_addr);
|
||||
TEX_DCR_WRITE(DCR_TEX_ADDR, src_addr / 64); // block address
|
||||
for (uint32_t i = 0; i < mip_offsets.size(); ++i) {
|
||||
assert(i < TEX_LOD_MAX);
|
||||
TEX_DCR_WRITE(DCR_TEX_MIPOFF(i), mip_offsets.at(i));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue