enabling 64-bit ISA with graphics extension

This commit is contained in:
Blaise Tine 2023-06-23 18:52:54 -04:00
parent c0378b1fcc
commit c7d0f1ee34
32 changed files with 235 additions and 213 deletions

View file

@ -58,6 +58,9 @@ jobs:
- stage: test
name: graphics
script: cp -r ../build32 ../build32_graphics && cd ../build32_graphics && ./ci/travis_run.py ./ci/regression.sh -graphics
- stage: test
name: graphics64
script: cp -r ../build64 ../build64_graphics && cd ../build64_graphics && XLEN=64 RISCV_TOOLCHAIN_PATH=$TOOLDIR/riscv64-gnu-toolchain ./ci/travis_run.py ./ci/regression.sh -graphics
- stage: test
name: tex
script: cp -r ../build32 ../build32_tex && cd ../build32_tex && ./ci/travis_run.py ./ci/regression.sh -tex

View file

@ -94,18 +94,18 @@ CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=t
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-itoad.png -rtoad_ref_f6.png -f6 -g0"
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g0.png -g0"
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g0.png -g0"
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g0.png -g0"
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g0.png -g0"
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g2.png -g2"
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g2.png -g2"
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g2.png -g2"
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1 -z"
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1"
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE -DNUM_TEX_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png" --cores=4 --warps=1 --threads=2
CONFIGS="-DEXT_TEX_ENABLE -DNUM_TEX_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png" --cores=4 --warps=1 --threads=2
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE -DNUM_TEX_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png" --cores=1 --warps=1 --threads=2
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE -DNUM_TEX_UNITS=1 -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png" --cores=2 --warps=1 --threads=2
CONFIGS="-DENABLE_DPI -DEXT_TEX_ENABLE -DNUM_TEX_UNITS=4 -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png" --cores=2 --warps=1 --threads=2
@ -119,11 +119,11 @@ rop()
{
echo "begin render output tests..."
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE" ./ci/blackbox.sh --driver=simx --app=rop --args="-rwhitebox_128.png" --perf=5
CONFIGS="-DEXT_ROP_ENABLE" ./ci/blackbox.sh --driver=simx --app=rop --args="-rwhitebox_128.png" --perf=5
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --perf=5
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png"
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE -DOCACHE_NUM_BANKS=8" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --perf=5
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE -DNUM_ROP_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=rop --args="-rwhitebox_128.png" --cores=4 --warps=1 --threads=2
CONFIGS="-DEXT_ROP_ENABLE -DNUM_ROP_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=rop --args="-rwhitebox_128.png" --cores=4 --warps=1 --threads=2
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE -DNUM_ROP_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --cores=1 --warps=1 --threads=2
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE -DNUM_ROP_UNITS=1 -DL1_DISABLE -DSM_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --cores=2 --warps=1 --threads=2
CONFIGS="-DENABLE_DPI -DEXT_ROP_ENABLE -DNUM_ROP_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --cores=4 --warps=1 --threads=2
@ -137,15 +137,15 @@ raster()
{
echo "begin rasterizer tests..."
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE" ./ci/blackbox.sh --driver=simx --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=4
CONFIGS="-DEXT_RASTER_ENABLE" ./ci/blackbox.sh --driver=simx --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=4
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=4
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE -DRCACHE_NUM_BANKS=4" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=4
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=4" ./ci/blackbox.sh --driver=simx --app=raster --args="-k4 -ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=6" ./ci/blackbox.sh --driver=simx --app=raster --args="-k6 -ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=4" ./ci/blackbox.sh --driver=simx --app=raster --args="-k4 -ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=6" ./ci/blackbox.sh --driver=simx --app=raster --args="-k6 -ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=4" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-k4 -ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DENABLE_DPI -DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=6" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-k6 -ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --cores=4 --warps=1 --threads=2
CONFIGS="-DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --cores=4 --warps=1 --threads=2
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --cores=1 --warps=1 --threads=2
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=1 -DL1_DISABLE -DSM_DISABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --cores=2 --warps=1 --threads=2
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --cores=4 --warps=1 --threads=2
@ -161,18 +161,18 @@ graphics()
{
echo "begin graphics tests..."
CONFIGS="-DENABLE_DPI -DEXT_IMADD_ENABLE" ./ci/blackbox.sh --driver=simx --app=imadd
CONFIGS="-DEXT_IMADD_ENABLE" ./ci/blackbox.sh --driver=simx --app=imadd
CONFIGS="-DENABLE_DPI -DEXT_IMADD_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=imadd --args="-n32 -z"
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png"
CONFIGS="-DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png"
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png"
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --clusters=2 --cores=2 --warps=1 --threads=2
CONFIGS="-DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --clusters=2 --cores=2 --warps=1 --threads=2
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --clusters=2 --cores=2 --warps=1 --threads=2
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-ttriangle.cgltrace -rtriangle_ref_8.png -w8 -h8" --warps=1 --threads=2 --debug=3
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-ttriangle.cgltrace -rtriangle_ref_8.png -w8 -h8" --warps=1 --threads=2 --debug=3
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-ttriangle.cgltrace -rtriangle_ref_8.png -w8 -h8" --warps=1 --threads=2 --debug=3
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tvase.cgltrace -rvase_ref_32.png -w32 -h32" --threads=1
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DIPDOM_STACK_SIZE=128" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-x -ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-y -ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-z -ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tvase.cgltrace -rvase_ref_32.png -w32 -h32" --threads=1
CONFIGS="-DEXT_GFX_ENABLE -DIPDOM_STACK_SIZE=128" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-x -ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-y -ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-z -ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DENABLE_DPI -DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tvase.cgltrace -rvase_ref_32.png -w32 -h32" --threads=2 || true
echo "graphics tests done!"
@ -185,21 +185,21 @@ echo "begin clustering tests..."
# warp/threads configurations
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=demo
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=demo
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=simx --cores=1 --warps=8 --threads=16 --app=demo
./ci/blackbox.sh --driver=simx --cores=1 --warps=8 --threads=16 --app=demo
# cores clustering
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=1 --clusters=1 --app=demo --args="-n1"
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=demo --args="-n1"
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=demo --args="-n1"
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=demo --args="-n1"
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=demo --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=demo --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=demo --args="-n1"
# L2/L3
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=demo --args="-n1"
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=demo --args="-n1"
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=demo --args="-n1"
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=demo --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=demo --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=demo --args="-n1"
echo "clustering tests done!"
}
@ -230,12 +230,12 @@ CONFIGS="-DENABLE_DPI -DEXT_M_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=
# disabling F extension
CONFIGS="-DENABLE_DPI -DEXT_F_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_mf_ext
CONFIGS="-DENABLE_DPI -DEXT_F_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_mf_ext --perf=1
CONFIGS="-DENABLE_DPI -DEXT_F_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=no_mf_ext --perf=1
CONFIGS="-DEXT_F_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=no_mf_ext --perf=1
# disable shared memory
CONFIGS="-DENABLE_DPI -DSM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_smem
CONFIGS="-DENABLE_DPI -DSM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_smem --perf=1
CONFIGS="-DENABLE_DPI -DSM_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=no_smem --perf=1
CONFIGS="-DSM_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=no_smem --perf=1
# disable L1 cache
CONFIGS="-DENABLE_DPI -DL1_DISABLE -DSM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
@ -256,17 +256,17 @@ CONFIGS="-DENABLE_DPI -DL1_LINE_SIZE=64" ./ci/blackbox.sh --driver=rtlsim --core
# test cache banking
CONFIGS="-DENABLE_DPI -DSMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
CONFIGS="-DENABLE_DPI -DSMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
CONFIGS="-DENABLE_DPI -DSMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemm
CONFIGS="-DSMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemm
CONFIGS="-DENABLE_DPI -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm
CONFIGS="-DENABLE_DPI -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm
CONFIGS="-DENABLE_DPI -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemm
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemm
# test cache multi-porting
CONFIGS="-DENABLE_DPI -DDCACHE_NUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm
CONFIGS="-DENABLE_DPI -DDCACHE_NUM_PORTS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemm
CONFIGS="-DDCACHE_NUM_PORTS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemm
CONFIGS="-DENABLE_DPI -DDCACHE_NUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm
CONFIGS="-DENABLE_DPI -DL2_NUM_PORTS=2 -DDCACHE_NUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=sgemm
CONFIGS="-DENABLE_DPI -DL2_NUM_PORTS=2 -DDCACHE_NUM_PORTS=2" ./ci/blackbox.sh --driver=simx --cores=2 --l2cache --app=sgemm
CONFIGS="-DL2_NUM_PORTS=2 -DDCACHE_NUM_PORTS=2" ./ci/blackbox.sh --driver=simx --cores=2 --l2cache --app=sgemm
# test 128-bit MEM block
CONFIGS="-DENABLE_DPI -DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo

View file

@ -150,7 +150,7 @@ module VX_gpu_unit #(
`ifdef EXT_TEX_ENABLE
VX_tex_exe_if tex_exe_if();
VX_commit_if tex_commit_if();
VX_commit_if tex_commit_if();
assign tex_exe_if.valid = gpu_req_valid && (gpu_exe_if.op_type == `INST_GPU_TEX);
assign tex_exe_if.uuid = gpu_exe_if.uuid;
@ -161,8 +161,8 @@ module VX_gpu_unit #(
assign tex_exe_if.stage = gpu_exe_if.op_mod[`TEX_STAGE_BITS-1:0];
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign tex_exe_if.coords[0][i] = gpu_exe_if.rs1_data[i];
assign tex_exe_if.coords[1][i] = gpu_exe_if.rs2_data[i];
assign tex_exe_if.coords[0][i] = gpu_exe_if.rs1_data[i][31:0];
assign tex_exe_if.coords[1][i] = gpu_exe_if.rs2_data[i][31:0];
assign tex_exe_if.lod[i] = gpu_exe_if.rs3_data[i][0 +: `TEX_LOD_BITS];
end
@ -206,7 +206,7 @@ module VX_gpu_unit #(
.reset (raster_reset),
.raster_csr_if (raster_csr_if),
.raster_bus_if (raster_bus_if),
.raster_exe_if (raster_exe_if),
.raster_exe_if (raster_exe_if),
.raster_commit_if (raster_commit_if)
);
@ -231,7 +231,7 @@ module VX_gpu_unit #(
assign rop_exe_if.face[i] = gpu_exe_if.rs1_data[i][0];
assign rop_exe_if.pos_x[i] = gpu_exe_if.rs1_data[i][1 +: `ROP_DIM_BITS];
assign rop_exe_if.pos_y[i] = gpu_exe_if.rs1_data[i][16 +: `ROP_DIM_BITS];
assign rop_exe_if.color[i] = gpu_exe_if.rs2_data[i];
assign rop_exe_if.color[i] = gpu_exe_if.rs2_data[i][31:0];
assign rop_exe_if.depth[i] = gpu_exe_if.rs3_data[i][`ROP_DEPTH_BITS-1:0];
end
@ -258,6 +258,7 @@ module VX_gpu_unit #(
wire imadd_valid_in;
wire imadd_ready_in;
wire [`NUM_THREADS-1:0][31:0] imadd_data_in [3];
wire imadd_valid_out;
wire [UUID_WIDTH-1:0] imadd_uuid_out;
@ -270,6 +271,12 @@ module VX_gpu_unit #(
assign imadd_valid_in = gpu_req_valid && (gpu_exe_if.op_type == `INST_GPU_IMADD);
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign imadd_data_in[0][i] = gpu_exe_if.rs1_data[i][31:0];
assign imadd_data_in[1][i] = gpu_exe_if.rs2_data[i][31:0];
assign imadd_data_in[2][i] = gpu_exe_if.rs3_data[i][31:0];
end
`RESET_RELAY (imadd_reset, reset);
VX_imadd #(
@ -285,9 +292,9 @@ module VX_gpu_unit #(
// Inputs
.valid_in (imadd_valid_in),
.shift_in ({gpu_exe_if.op_mod[1:0], 3'b0}),
.data1_in (gpu_exe_if.rs1_data),
.data2_in (gpu_exe_if.rs2_data),
.data3_in (gpu_exe_if.rs3_data),
.data1_in (imadd_data_in[0]),
.data2_in (imadd_data_in[1]),
.data3_in (imadd_data_in[2]),
.tag_in ({gpu_exe_if.uuid, gpu_exe_if.wid, gpu_exe_if.tmask, gpu_exe_if.PC, gpu_exe_if.rd}),
.ready_in (imadd_ready_in),
@ -298,12 +305,19 @@ module VX_gpu_unit #(
.ready_out (imadd_ready_out)
);
wire [`NUM_THREADS-1:0][`XLEN-1:0] imadd_data_out_x;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign imadd_data_out_x[i] = `XLEN'(imadd_data_out[i]);
end
assign rsp_arb_valid_in[RSP_ARB_IDX_IMADD] = imadd_valid_out;
assign rsp_arb_data_in[RSP_ARB_IDX_IMADD] = {imadd_uuid_out, imadd_wid_out, imadd_tmask_out, imadd_PC_out, imadd_rd_out, 1'b1, RSP_DATAW'(imadd_data_out), 1'b1, 1'b0};
assign rsp_arb_data_in[RSP_ARB_IDX_IMADD] = {imadd_uuid_out, imadd_wid_out, imadd_tmask_out, imadd_PC_out, imadd_rd_out, 1'b1, RSP_DATAW'(imadd_data_out_x), 1'b1, 1'b0};
assign imadd_ready_out = rsp_arb_ready_in[RSP_ARB_IDX_IMADD];
`endif
// can accept new request?
always @(*) begin

View file

@ -18,7 +18,8 @@ module VX_multiplier #(
wire [A_WIDTH-1:0] dataa_w;
wire [B_WIDTH-1:0] datab_w;
wire [R_WIDTH-1:0] result_w;
wire [A_WIDTH+B_WIDTH-1:0] result_w;
`UNUSED_VAR (result_w)
if (SIGNED != 0) begin
assign result_w = $signed(dataa_w) * $signed(datab_w);
@ -29,7 +30,7 @@ module VX_multiplier #(
if (LATENCY == 0) begin
assign dataa_w = dataa;
assign datab_w = datab;
assign result = result_w;
assign result = R_WIDTH'(result_w);
end else begin
if (LATENCY >= 2) begin
reg [A_WIDTH-1:0] dataa_p [LATENCY-2:0];
@ -57,7 +58,7 @@ module VX_multiplier #(
reg [R_WIDTH-1:0] result_r;
always @(posedge clk) begin
if (enable) begin
result_r <= result_w;
result_r <= R_WIDTH'(result_w);
end
end
assign result = result_r;

View file

@ -49,25 +49,29 @@ module VX_raster_agent #(
assign raster_rsp_valid = raster_exe_if.valid && raster_bus_if.req_valid;
wire [`NUM_THREADS-1:0][31:0] response_data;
wire [`NUM_THREADS-1:0][31:0] response_data, commit_data;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign response_data[i] = {31'(raster_bus_if.req_stamps[i].pid), ~raster_bus_if.req_done};
end
VX_skid_buffer #(
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + (`NUM_THREADS * 32))
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS + (`NUM_THREADS * 32))
) rsp_sbuf (
.clk (clk),
.reset (reset),
.valid_in (raster_rsp_valid),
.ready_in (raster_rsp_ready),
.data_in ({raster_exe_if.uuid, raster_exe_if.wid, raster_exe_if.tmask, raster_exe_if.PC, raster_exe_if.rd, response_data}),
.data_out ({raster_commit_if.uuid, raster_commit_if.wid, raster_commit_if.tmask, raster_commit_if.PC, raster_commit_if.rd, raster_commit_if.data}),
.data_out ({raster_commit_if.uuid, raster_commit_if.wid, raster_commit_if.tmask, raster_commit_if.PC, raster_commit_if.rd, commit_data}),
.valid_out (raster_commit_if.valid),
.ready_out (raster_commit_if.ready)
);
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign raster_commit_if.data[i] = `XLEN'(commit_data[i]);
end
assign raster_commit_if.wb = 1'b1;
assign raster_commit_if.eop = 1'b1;

View file

@ -13,7 +13,6 @@ module VX_raster_dcr #(
output raster_dcrs_t raster_dcrs
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_VAR (reset)
// DCR registers
@ -24,13 +23,13 @@ module VX_raster_dcr #(
if (dcr_bus_if.write_valid) begin
case (dcr_bus_if.write_addr)
`DCR_RASTER_TBUF_ADDR: begin
dcrs.tbuf_addr <= dcr_bus_if.write_data[`RASTER_DCR_DATA_BITS-1:0];
dcrs.tbuf_addr <= dcr_bus_if.write_data[`RASTER_ADDR_BITS-1:0];
end
`DCR_RASTER_TILE_COUNT: begin
dcrs.tile_count <= dcr_bus_if.write_data[`RASTER_TILE_BITS-1:0];
end
`DCR_RASTER_PBUF_ADDR: begin
dcrs.pbuf_addr <= dcr_bus_if.write_data[`RASTER_DCR_DATA_BITS-1:0];
dcrs.pbuf_addr <= dcr_bus_if.write_data[`RASTER_ADDR_BITS-1:0];
end
`DCR_RASTER_PBUF_STRIDE: begin
dcrs.pbuf_stride <= dcr_bus_if.write_data[`RASTER_STRIDE_BITS-1:0];

View file

@ -51,19 +51,19 @@ module VX_raster_mem #(
localparam FETCH_FLAG_PID = 2'b01;
localparam FETCH_FLAG_PDATA = 2'b10;
localparam TILE_HEADER_SIZE = 8;
localparam TILE_HEADER_SIZEW = 8 / 4;
// A primitive data contains (xloc, yloc, pid, edges)
localparam PRIM_DATA_WIDTH = 2 * `RASTER_DIM_BITS + 9 * `RASTER_DATA_BITS + `RASTER_PID_BITS;
// Storage to cycle through all primitives and tiles
reg [`RASTER_DCR_DATA_BITS-1:0] next_tbuf_addr;
reg [`RASTER_DCR_DATA_BITS-1:0] curr_pbuf_addr;
reg [`RASTER_PID_BITS-1:0] curr_pid_reqs;
reg [`RASTER_PID_BITS-1:0] curr_pid_rsps;
reg [`RASTER_TILE_BITS-1:0] curr_num_tiles;
reg [`RASTER_DIM_BITS-1:0] curr_xloc;
reg [`RASTER_DIM_BITS-1:0] curr_yloc;
reg [RCACHE_ADDR_WIDTH-1:0] next_tbuf_addr;
reg [RCACHE_ADDR_WIDTH-1:0] curr_pbuf_addr;
reg [`RASTER_PID_BITS-1:0] curr_pid_reqs;
reg [`RASTER_PID_BITS-1:0] curr_pid_rsps;
reg [`RASTER_TILE_BITS-1:0] curr_num_tiles;
reg [`RASTER_DIM_BITS-1:0] curr_xloc;
reg [`RASTER_DIM_BITS-1:0] curr_yloc;
// Output buffer
wire buf_in_valid;
@ -72,7 +72,7 @@ module VX_raster_mem #(
// Memory request
reg mem_req_valid, mem_req_valid_qual;
reg [NUM_REQS-1:0] mem_req_mask;
reg [8:0][`RASTER_DCR_DATA_BITS-1:0] mem_req_addr;
reg [8:0][RCACHE_ADDR_WIDTH-1:0] mem_req_addr;
reg [TAG_WIDTH-1:0] mem_req_tag;
wire mem_req_ready;
@ -83,12 +83,12 @@ module VX_raster_mem #(
wire mem_rsp_ready;
// Primitive info
wire [`RASTER_DCR_DATA_BITS-1:0] pids_addr;
wire [RCACHE_ADDR_WIDTH-1:0] pids_addr;
wire prim_id_rsp_valid;
wire prim_data_rsp_valid;
wire prim_addr_rsp_valid;
wire prim_addr_rsp_ready;
wire [8:0][`RASTER_DATA_BITS-1:0] prim_mem_addr;
wire [8:0][RCACHE_ADDR_WIDTH-1:0] prim_mem_addr;
wire [`RASTER_PID_BITS-1:0] primitive_id;
// Memory fetch FSM
@ -115,10 +115,10 @@ module VX_raster_mem #(
// calculate tile start info
wire [`RASTER_TILE_BITS-1:0] start_tile_count = (dcrs.tile_count + `RASTER_TILE_BITS'(NUM_INSTANCES - 1 - INSTANCE_IDX)) >> LOG2_NUM_INSTANCES;
wire [`RASTER_DCR_DATA_BITS-1:0] start_tbuf_addr = dcrs.tbuf_addr + (INSTANCE_IDX * TILE_HEADER_SIZE);
wire [RCACHE_ADDR_WIDTH-1:0] start_tbuf_addr = RCACHE_ADDR_WIDTH'({dcrs.tbuf_addr, 4'b0}) + RCACHE_ADDR_WIDTH'(INSTANCE_IDX * TILE_HEADER_SIZEW);
// calculate address of primitive ids
assign pids_addr = (mem_req_addr[1] + 4) + (`RASTER_DCR_DATA_BITS'(th_pids_offset) << 2);
assign pids_addr = (mem_req_addr[1] + 1) + RCACHE_ADDR_WIDTH'(th_pids_offset);
// scheduler FSM
always @(posedge clk) begin
@ -139,11 +139,11 @@ module VX_raster_mem #(
end
mem_req_valid <= 1;
mem_req_addr[0] <= start_tbuf_addr;
mem_req_addr[1] <= start_tbuf_addr + 4;
mem_req_addr[1] <= start_tbuf_addr + 1;
mem_req_mask <= 9'b11;
mem_req_tag <= TAG_WIDTH'(FETCH_FLAG_TILE);
// update tile counters
next_tbuf_addr <= start_tbuf_addr + (NUM_INSTANCES * TILE_HEADER_SIZE);
next_tbuf_addr <= start_tbuf_addr + RCACHE_ADDR_WIDTH'(NUM_INSTANCES * TILE_HEADER_SIZEW);
curr_num_tiles <= start_tile_count;
end
STATE_TILE: begin
@ -168,7 +168,7 @@ module VX_raster_mem #(
if (mem_req_fire) begin
if (is_prim_id_req) begin
// update pid counters
curr_pbuf_addr <= curr_pbuf_addr + 4;
curr_pbuf_addr <= curr_pbuf_addr + 1;
curr_pid_reqs <= curr_pid_reqs - `RASTER_PID_BITS'(1);
end
@ -177,7 +177,7 @@ module VX_raster_mem #(
// fetch next primitive pid
mem_req_valid <= 1;
mem_req_mask <= 9'b1;
mem_req_addr[0] <= curr_pbuf_addr + (is_prim_id_req ? 4 : 0);
mem_req_addr[0] <= curr_pbuf_addr + (is_prim_id_req ? 1 : 0);
mem_req_tag <= TAG_WIDTH'(FETCH_FLAG_PID);
end
end
@ -202,9 +202,9 @@ module VX_raster_mem #(
mem_req_valid <= 1;
mem_req_mask <= 9'b11;
mem_req_addr[0] <= next_tbuf_addr;
mem_req_addr[1] <= next_tbuf_addr + 4;
mem_req_addr[1] <= next_tbuf_addr + 1;
mem_req_tag <= TAG_WIDTH'(FETCH_FLAG_TILE);
next_tbuf_addr <= next_tbuf_addr + (NUM_INSTANCES * TILE_HEADER_SIZE);
next_tbuf_addr <= next_tbuf_addr + RCACHE_ADDR_WIDTH'(NUM_INSTANCES * TILE_HEADER_SIZEW);
end
// update tile counter
curr_num_tiles <= curr_num_tiles - `RASTER_TILE_BITS'(1);
@ -252,10 +252,8 @@ module VX_raster_mem #(
assign mem_rsp_ready = (~prim_id_rsp_valid || prim_addr_rsp_ready)
&& (~prim_data_rsp_valid || buf_in_ready);
wire [8:0][RCACHE_ADDR_WIDTH-1:0] mem_req_addr_w;
wire [8:0][RCACHE_WORD_SIZE-1:0] mem_req_byteen;
for (genvar i = 0; i < 9; ++i) begin
assign mem_req_addr_w[i] = mem_req_addr[i][(32 - RCACHE_ADDR_WIDTH) +: RCACHE_ADDR_WIDTH];
assign mem_req_byteen[i] = {RCACHE_WORD_SIZE{1'b1}};
end
@ -279,7 +277,7 @@ module VX_raster_mem #(
.req_rw (1'b0),
.req_mask (mem_req_mask),
.req_byteen (mem_req_byteen),
.req_addr (mem_req_addr_w),
.req_addr (mem_req_addr),
`UNUSED_PIN (req_data),
.req_tag (mem_req_tag),
`UNUSED_PIN (req_empty),
@ -310,12 +308,12 @@ module VX_raster_mem #(
.mem_rsp_ready (cache_bus_if.rsp_ready)
);
wire [`RASTER_DATA_BITS-1:0] prim_mem_offset;
wire [RCACHE_ADDR_WIDTH-1:0] prim_mem_offset;
VX_multiplier #(
.A_WIDTH (`RASTER_DATA_BITS),
.B_WIDTH (`RASTER_STRIDE_BITS),
.R_WIDTH (`RASTER_DATA_BITS),
.R_WIDTH (RCACHE_ADDR_WIDTH),
.LATENCY (`LATENCY_IMUL)
) multiplier (
.clk (clk),
@ -326,7 +324,8 @@ module VX_raster_mem #(
);
for (genvar i = 0; i < 9; ++i) begin
assign prim_mem_addr[i] = dcrs.pbuf_addr + prim_mem_offset + 4 * i;
wire [RCACHE_ADDR_WIDTH-1:0] offset = prim_mem_offset + RCACHE_ADDR_WIDTH'(1 * i);
assign prim_mem_addr[i] = RCACHE_ADDR_WIDTH'({dcrs.pbuf_addr, 4'b0}) + offset;
end
VX_shift_register #(

View file

@ -3,6 +3,11 @@
`include "VX_define.vh"
`ifdef XLEN_64
`define RASTER_ADDR_BITS 32
`else
`define RASTER_ADDR_BITS 25
`endif
`define RASTER_DCR_DATA_BITS 32
`define RASTER_DATA_BITS 32
`define RASTER_TILE_BITS 16
@ -10,14 +15,14 @@
package VX_raster_types;
typedef struct packed {
logic [`RASTER_DCR_DATA_BITS-1:0] tbuf_addr; // Tile buffer address
logic [`RASTER_TILE_BITS-1:0] tile_count; // Number of tiles in the tile buffer
logic [`RASTER_DCR_DATA_BITS-1:0] pbuf_addr; // Primitive (triangle) data buffer start address
logic [`RASTER_STRIDE_BITS-1:0] pbuf_stride; // Primitive data stride to fetch vertices
logic [`RASTER_DIM_BITS-1:0] dst_xmin; // Destination window xmin
logic [`RASTER_DIM_BITS-1:0] dst_xmax; // Destination window xmax
logic [`RASTER_DIM_BITS-1:0] dst_ymin; // Destination window ymin
logic [`RASTER_DIM_BITS-1:0] dst_ymax; // Destination window ymax
logic [`RASTER_ADDR_BITS-1:0] tbuf_addr; // Tile buffer address
logic [`RASTER_TILE_BITS-1:0] tile_count; // Number of tiles in the tile buffer
logic [`RASTER_ADDR_BITS-1:0] pbuf_addr; // Primitive triangle data buffer start address
logic [`RASTER_STRIDE_BITS-1:0] pbuf_stride; // Primitive data stride to fetch vertices
logic [`RASTER_DIM_BITS-1:0] dst_xmin; // Destination window xmin
logic [`RASTER_DIM_BITS-1:0] dst_xmax; // Destination window xmax
logic [`RASTER_DIM_BITS-1:0] dst_ymin; // Destination window ymin
logic [`RASTER_DIM_BITS-1:0] dst_ymax; // Destination window ymax
} raster_dcrs_t;
typedef struct packed {

View file

@ -63,13 +63,13 @@ module VX_rop_agent #(
assign rop_rsp_valid = rop_exe_if.valid && rop_req_ready;
VX_skid_buffer #(
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32)
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN)
) rsp_sbuf (
.clk (clk),
.reset (reset),
.valid_in (rop_rsp_valid),
.ready_in (rop_rsp_ready),
.data_in ({rop_exe_if.uuid, rop_exe_if.wid, rop_exe_if.tmask, rop_exe_if.PC}),
.data_in ({rop_exe_if.uuid, rop_exe_if.wid, rop_exe_if.tmask, rop_exe_if.PC}),
.data_out ({rop_commit_if.uuid, rop_commit_if.wid, rop_commit_if.tmask, rop_commit_if.PC}),
.valid_out (rop_commit_if.valid),
.ready_out (rop_commit_if.ready)

View file

@ -13,7 +13,6 @@ module VX_rop_dcr #(
output rop_dcrs_t rop_dcrs
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_VAR (reset)
`define DEPTH_TEST_ENABLE(func, writemask) \
@ -40,7 +39,7 @@ module VX_rop_dcr #(
if (dcr_bus_if.write_valid) begin
case (dcr_bus_if.write_addr)
`DCR_ROP_CBUF_ADDR: begin
dcrs.cbuf_addr <= dcr_bus_if.write_data[31:0];
dcrs.cbuf_addr <= dcr_bus_if.write_data[`ROP_ADDR_BITS-1:0];
end
`DCR_ROP_CBUF_PITCH: begin
dcrs.cbuf_pitch <= dcr_bus_if.write_data[`ROP_PITCH_BITS-1:0];
@ -49,7 +48,7 @@ module VX_rop_dcr #(
dcrs.cbuf_writemask <= dcr_bus_if.write_data[3:0];
end
`DCR_ROP_ZBUF_ADDR: begin
dcrs.zbuf_addr <= dcr_bus_if.write_data[31:0];
dcrs.zbuf_addr <= dcr_bus_if.write_data[`ROP_ADDR_BITS-1:0];
end
`DCR_ROP_ZBUF_PITCH: begin
dcrs.zbuf_pitch <= dcr_bus_if.write_data[`ROP_PITCH_BITS-1:0];

View file

@ -42,6 +42,7 @@ module VX_rop_mem #(
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NUM_REQS = ROP_MEM_REQS;
localparam W_ADDR_BITS = (`ROP_ADDR_BITS + 6) - 2;
wire mreq_valid, mreq_valid_r;
wire mreq_rw, mreq_rw_r;
@ -72,7 +73,8 @@ module VX_rop_mem #(
// depth/stencil values submission
for (genvar i = 0; i < NUM_LANES; ++i) begin
wire [31:0] m_y_pitch, baddr_s;
wire [31:0] m_y_pitch;
`UNUSED_VAR (m_y_pitch)
VX_multiplier #(
.A_WIDTH (`ROP_DIM_BITS),
@ -87,13 +89,15 @@ module VX_rop_mem #(
.result (m_y_pitch)
);
wire mask = req_ds_mask[i];
wire [31:0] baddr = dcrs.zbuf_addr + (req_pos_x[i] * 4);
wire [3:0] byteen = req_rw ? {stencil_byteen[i], depth_byteen} : 4'b1111;
wire [31:0] data = {req_stencil[i], req_depth[i]};
wire [W_ADDR_BITS-1:0] baddr, baddr_s;
assign baddr = {dcrs.zbuf_addr, 4'b0} + W_ADDR_BITS'(req_pos_x[i]);
wire [3:0] byteen = req_rw ? {stencil_byteen[i], depth_byteen} : 4'b1111;
wire [31:0] data = {req_stencil[i], req_depth[i]};
wire mask = req_ds_mask[i];
VX_shift_register #(
.DATAW (1 + 4 + 32 + 32),
.DATAW (1 + 4 + W_ADDR_BITS + 32),
.DEPTH (`LATENCY_IMUL)
) shift_reg (
.clk (clk),
@ -103,15 +107,14 @@ module VX_rop_mem #(
.data_out ({mreq_mask[i], mreq_byteen[i], baddr_s, mreq_data[i]})
);
wire [31:0] addr = baddr_s + m_y_pitch;
assign mreq_addr[i] = addr[(32-OCACHE_ADDR_WIDTH) +: OCACHE_ADDR_WIDTH];
`UNUSED_VAR (addr)
wire [W_ADDR_BITS-1:0] addr = baddr_s + W_ADDR_BITS'(m_y_pitch[31:2]);
assign mreq_addr[i] = OCACHE_ADDR_WIDTH'(addr);
end
// blend color submission
for (genvar i = NUM_LANES; i < NUM_REQS; ++i) begin
wire [31:0] m_y_pitch, baddr_s;
wire [31:0] m_y_pitch;
`UNUSED_VAR (m_y_pitch)
VX_multiplier #(
.A_WIDTH (`ROP_DIM_BITS),
@ -126,13 +129,15 @@ module VX_rop_mem #(
.result (m_y_pitch)
);
wire mask = req_c_mask[i - NUM_LANES];
wire [31:0] baddr = dcrs.cbuf_addr + (req_pos_x[i - NUM_LANES] * 4);
wire [W_ADDR_BITS-1:0] baddr, baddr_s;
assign baddr = {dcrs.cbuf_addr, 4'b0} + W_ADDR_BITS'(req_pos_x[i - NUM_LANES]);
wire [3:0] byteen = req_rw ? color_byteen : 4'b1111;
wire [31:0] data = req_color[i - NUM_LANES];
wire [31:0] data = req_color[i - NUM_LANES];
wire mask = req_c_mask[i - NUM_LANES];
VX_shift_register #(
.DATAW (1 + 4 + 32 + 32),
.DATAW (1 + 4 + W_ADDR_BITS + 32),
.DEPTH (`LATENCY_IMUL)
) shift_reg (
.clk (clk),
@ -142,10 +147,8 @@ module VX_rop_mem #(
.data_out ({mreq_mask[i], mreq_byteen[i], baddr_s, mreq_data[i]})
);
wire [31:0] addr = baddr_s + m_y_pitch;
assign mreq_addr[i] = addr[(32-OCACHE_ADDR_WIDTH) +: OCACHE_ADDR_WIDTH];
`UNUSED_VAR (addr)
wire [W_ADDR_BITS-1:0] addr = baddr_s + W_ADDR_BITS'(m_y_pitch[31:2]);
assign mreq_addr[i] = OCACHE_ADDR_WIDTH'(addr);
end
VX_shift_register #(

View file

@ -3,6 +3,12 @@
`include "VX_define.vh"
`ifdef XLEN_64
`define ROP_ADDR_BITS 32
`else
`define ROP_ADDR_BITS 25
`endif
package VX_rop_types;
typedef struct packed {
@ -13,11 +19,11 @@ typedef struct packed {
} rgba_t;
typedef struct packed {
logic [31:0] cbuf_addr;
logic [`ROP_ADDR_BITS-1:0] cbuf_addr;
logic [`ROP_PITCH_BITS-1:0] cbuf_pitch;
logic [3:0] cbuf_writemask;
logic [31:0] zbuf_addr;
logic [`ROP_ADDR_BITS-1:0] zbuf_addr;
logic [`ROP_PITCH_BITS-1:0] zbuf_pitch;
logic depth_enable;

View file

@ -3,7 +3,8 @@
module VX_tex_addr #(
parameter `STRING INSTANCE_ID = "",
parameter REQ_INFOW = 1,
parameter NUM_LANES = 1
parameter NUM_LANES = 1,
parameter W_ADDR_BITS = `TEX_ADDR_BITS + 6
) (
input wire clk,
input wire reset,
@ -29,7 +30,7 @@ module VX_tex_addr #(
output wire [NUM_LANES-1:0] rsp_mask,
output wire [`TEX_FILTER_BITS-1:0] rsp_filter,
output wire [`TEX_LGSTRIDE_BITS-1:0] rsp_lgstride,
output wire [NUM_LANES-1:0][31:0] rsp_baseaddr,
output wire [NUM_LANES-1:0][W_ADDR_BITS-1:0] rsp_baseaddr,
output wire [NUM_LANES-1:0][3:0][31:0] rsp_addr,
output wire [NUM_LANES-1:0][1:0][`TEX_BLEND_FRAC-1:0] rsp_blends,
output wire [REQ_INFOW-1:0] rsp_info,
@ -52,7 +53,7 @@ module VX_tex_addr #(
wire [NUM_LANES-1:0][1:0][`TEX_FXD_FRAC-1:0] clamped_hi, clamped_hi_s0;
wire [NUM_LANES-1:0][1:0][SHIFT_BITS-1:0] dim_shift, dim_shift_s0;
wire [`TEX_LGSTRIDE_BITS-1:0] log_stride, log_stride_s0;
wire [NUM_LANES-1:0][31:0] mip_addr, mip_addr_s0;
wire [NUM_LANES-1:0][W_ADDR_BITS-1:0] mip_addr, mip_addr_s0;
wire [NUM_LANES-1:0][PITCH_BITS-1:0] log_pitch, log_pitch_s0;
wire stall_out;
@ -90,11 +91,11 @@ module VX_tex_addr #(
for (genvar i = 0; i < NUM_LANES; ++i) begin
assign log_pitch[i] = PITCH_BITS'(req_logdims[0] - req_miplevel[i]) + PITCH_BITS'(log_stride);
assign mip_addr[i] = req_baseaddr + `TEX_ADDR_BITS'(req_mipoff[i]);
assign mip_addr[i] = {req_baseaddr, 6'b0} + W_ADDR_BITS'(req_mipoff[i]);
end
VX_pipe_register #(
.DATAW (1 + NUM_LANES + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + REQ_INFOW + NUM_LANES * (PITCH_BITS + 2 * SHIFT_BITS + `TEX_ADDR_BITS + 2 * 2 * `TEX_FXD_FRAC)),
.DATAW (1 + NUM_LANES + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + REQ_INFOW + NUM_LANES * (PITCH_BITS + 2 * SHIFT_BITS + W_ADDR_BITS + 2 * 2 * `TEX_FXD_FRAC)),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
@ -140,7 +141,7 @@ module VX_tex_addr #(
assign stall_out = rsp_valid && ~rsp_ready;
VX_pipe_register #(
.DATAW (1 + NUM_LANES + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + (NUM_LANES * 32) + (NUM_LANES * 4 * 32) + (2 * NUM_LANES * `TEX_BLEND_FRAC) + REQ_INFOW),
.DATAW (1 + NUM_LANES + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + (NUM_LANES * W_ADDR_BITS) + (NUM_LANES * 4 * 32) + (2 * NUM_LANES * `TEX_BLEND_FRAC) + REQ_INFOW),
.RESETW (1)
) pipe_reg1 (
.clk (clk),

View file

@ -55,7 +55,7 @@ module VX_tex_agent #(
wire mdata_pop = tex_bus_if.rsp_valid && tex_bus_if.rsp_ready;
VX_index_buffer #(
.DATAW (NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS),
.DATAW (NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS),
.SIZE (`TEX_REQ_QUEUE_SIZE)
) tag_store (
.clk (clk),
@ -98,19 +98,25 @@ module VX_tex_agent #(
assign mdata_raddr = tex_bus_if.rsp_tag[0 +: REQ_QUEUE_BITS];
assign rsp_uuid = tex_bus_if.rsp_tag[REQ_QUEUE_BITS +: UUID_WIDTH];
wire [`NUM_THREADS-1:0][31:0] commit_data;
VX_skid_buffer #(
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + (`NUM_THREADS * 32))
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS + (`NUM_THREADS * 32))
) rsp_sbuf (
.clk (clk),
.reset (reset),
.valid_in (tex_bus_if.rsp_valid),
.ready_in (tex_bus_if.rsp_ready),
.data_in ({rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, tex_bus_if.rsp_texels}),
.data_out ({tex_commit_if.uuid, tex_commit_if.wid, tex_commit_if.tmask, tex_commit_if.PC, tex_commit_if.rd, tex_commit_if.data}),
.data_out ({tex_commit_if.uuid, tex_commit_if.wid, tex_commit_if.tmask, tex_commit_if.PC, tex_commit_if.rd, commit_data}),
.valid_out (tex_commit_if.valid),
.ready_out (tex_commit_if.ready)
);
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign tex_commit_if.data[i] = `XLEN'(commit_data[i]);
end
assign tex_commit_if.wb = 1'b1;
assign tex_commit_if.eop = 1'b1;

View file

@ -14,8 +14,7 @@ module VX_tex_dcr #(
input wire [`TEX_STAGE_BITS-1:0] stage,
output tex_dcrs_t tex_dcrs
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_VAR (reset)
// DCR registers

View file

@ -2,8 +2,9 @@
module VX_tex_mem #(
parameter `STRING INSTANCE_ID = "",
parameter REQ_INFOW = 1,
parameter NUM_LANES = 1
parameter REQ_INFOW = 1,
parameter NUM_LANES = 1,
parameter W_ADDR_BITS = `TEX_ADDR_BITS + 6
) (
input wire clk,
input wire reset,
@ -16,7 +17,7 @@ module VX_tex_mem #(
input wire [NUM_LANES-1:0] req_mask,
input wire [`TEX_FILTER_BITS-1:0] req_filter,
input wire [`TEX_LGSTRIDE_BITS-1:0] req_lgstride,
input wire [NUM_LANES-1:0][31:0] req_baseaddr,
input wire [NUM_LANES-1:0][W_ADDR_BITS-1:0] req_baseaddr,
input wire [NUM_LANES-1:0][3:0][31:0] req_addr,
input wire [REQ_INFOW-1:0] req_info,
output wire req_ready,
@ -33,7 +34,7 @@ module VX_tex_mem #(
wire mem_req_valid;
wire [3:0][NUM_LANES-1:0] mem_req_mask;
wire [3:0][NUM_LANES-1:0][29:0] mem_req_addr;
wire [3:0][NUM_LANES-1:0][TCACHE_ADDR_WIDTH-1:0] mem_req_addr;
wire [3:0][NUM_LANES-1:0][3:0] mem_req_byteen;
wire [TAG_WIDTH-1:0] mem_req_tag;
wire mem_req_ready;
@ -45,11 +46,11 @@ module VX_tex_mem #(
// full address calculation
wire [NUM_LANES-1:0][3:0][31:0] full_addr;
wire [NUM_LANES-1:0][3:0][W_ADDR_BITS-1:0] full_addr;
for (genvar i = 0; i < NUM_LANES; ++i) begin
for (genvar j = 0; j < 4; ++j) begin
assign full_addr[i][j] = req_baseaddr[i] + req_addr[i][j];
assign full_addr[i][j] = req_baseaddr[i] + W_ADDR_BITS'(req_addr[i][j]);
end
end
@ -59,7 +60,7 @@ module VX_tex_mem #(
for (genvar i = 0; i < NUM_LANES; ++i) begin
for (genvar j = 0; j < 4; ++j) begin
assign mem_req_addr[j][i] = full_addr[i][j][31:2];
assign mem_req_addr[j][i] = TCACHE_ADDR_WIDTH'(full_addr[i][j][W_ADDR_BITS-1:2]);
assign mem_req_align[j][i] = full_addr[i][j][1:0];
assign mem_req_byteen[j][i] = 4'b1111;
end

View file

@ -8,7 +8,11 @@
`define TEX_FXD_HALF (`TEX_FXD_ONE >> 1)
`define TEX_FXD_MASK (`TEX_FXD_ONE - 1)
`ifdef XLEN_64
`define TEX_ADDR_BITS 32
`else
`define TEX_ADDR_BITS 25
`endif
`define TEX_FORMAT_BITS 3
`define TEX_WRAP_BITS 2
`define TEX_FILTER_BITS 1

View file

@ -22,6 +22,7 @@ module VX_tex_unit #(
`UNUSED_SPARAM (INSTANCE_ID)
localparam BLEND_FRAC_W = (2 * NUM_LANES * `TEX_BLEND_FRAC);
localparam W_ADDR_BITS = `TEX_ADDR_BITS + 6;
// DCRs
@ -80,7 +81,7 @@ module VX_tex_unit #(
wire [`TEX_LGSTRIDE_BITS-1:0] mem_req_lgstride;
wire [NUM_LANES-1:0][1:0][`TEX_BLEND_FRAC-1:0] mem_req_blends;
wire [NUM_LANES-1:0][3:0][31:0] mem_req_addr;
wire [NUM_LANES-1:0][31:0] mem_req_baseaddr;
wire [NUM_LANES-1:0][W_ADDR_BITS-1:0] mem_req_baseaddr;
wire [(TAG_WIDTH + `TEX_FORMAT_BITS)-1:0] mem_req_info;
wire mem_req_ready;

View file

@ -238,14 +238,14 @@ void TextureSampler::configure(const TexDCRS& dcrs) {
}
uint32_t TextureSampler::read(uint32_t stage, int32_t u, int32_t v, uint32_t lod) const {
auto mip_off = dcrs_.read(stage, DCR_TEX_MIPOFF(lod));
auto base_addr = dcrs_.read(stage, DCR_TEX_ADDR);
auto logdim = dcrs_.read(stage, DCR_TEX_LOGDIM);
auto format = dcrs_.read(stage, DCR_TEX_FORMAT);
auto filter = dcrs_.read(stage, DCR_TEX_FILTER);
auto wrap = dcrs_.read(stage, DCR_TEX_WRAP);
auto mip_off = dcrs_.read(stage, DCR_TEX_MIPOFF(lod));
auto mip_base = uint64_t(dcrs_.read(stage, DCR_TEX_ADDR)) << 6;
auto logdim = dcrs_.read(stage, DCR_TEX_LOGDIM);
auto format = dcrs_.read(stage, DCR_TEX_FORMAT);
auto filter = dcrs_.read(stage, DCR_TEX_FILTER);
auto wrap = dcrs_.read(stage, DCR_TEX_WRAP);
base_addr += mip_off;
auto base_addr = mip_base + mip_off;
auto log_width = std::max<int32_t>((logdim & 0xffff) - lod, 0);
auto log_height = std::max<int32_t>((logdim >> 16) - lod, 0);
@ -270,7 +270,7 @@ uint32_t TextureSampler::read(uint32_t stage, int32_t u, int32_t v, uint32_t lod
// memory lookup
uint32_t texel[4];
uint32_t addr[4] = {
uint64_t addr[4] = {
base_addr + offset00 * stride,
base_addr + offset01 * stride,
base_addr + offset10 * stride,
@ -290,7 +290,7 @@ uint32_t TextureSampler::read(uint32_t stage, int32_t u, int32_t v, uint32_t lod
// memory lookup
uint32_t texel;
uint32_t addr = base_addr + offset * stride;
uint64_t addr = base_addr + offset * stride;
mem_cb_(&texel, &addr, stride, 1, cb_arg_);
// filtering

View file

@ -1,6 +1,5 @@
#pragma once
#include <cstdint>
#include <cocogfx/include/fixed.hpp>
#include <cocogfx/include/math.hpp>
#include <VX_types.h>
@ -174,7 +173,7 @@ class TextureSampler {
public:
typedef void (*MemoryCB)(
uint32_t* out,
const uint32_t* addr,
const uint64_t* addr,
uint32_t stride,
uint32_t size,
void* cb_arg

View file

@ -1542,7 +1542,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
for (uint32_t t = 0; t < num_threads; ++t) {
if (!tmask_.test(t))
continue;
rddata[t].i = (WordI)(((int64_t)rsdata[t][0].i * (int64_t)rsdata[t][1].i) >> shift) + rsdata[t][2].i;
rddata[t].i = (int32_t)(((int64_t)rsdata[t][0].i32 * (int64_t)rsdata[t][1].i32) >> shift) + rsdata[t][2].i32;
}
rd_write = true;
} break;

View file

@ -11,13 +11,13 @@ using namespace vortex;
#define STAMP_POOL_MAX_SIZE 1024
struct prim_mem_trace_t {
uint32_t prim_addr;
std::vector<uint32_t> edge_addrs;
uint64_t prim_addr;
std::vector<uint64_t> edge_addrs;
uint32_t stamps;
};
struct tile_mem_trace_t {
std::vector<uint32_t> header_addrs;
std::vector<uint64_t> header_addrs;
std::list<prim_mem_trace_t> primitives;
bool end_of_tile;
};
@ -89,8 +89,8 @@ public:
// get device configuration
graphics::Rasterizer::configure(dcrs);
num_tiles_ = dcrs.read(DCR_RASTER_TILE_COUNT);
tbuf_baseaddr_ = dcrs.read(DCR_RASTER_TBUF_ADDR);
pbuf_baseaddr_ = dcrs.read(DCR_RASTER_PBUF_ADDR);
tbuf_baseaddr_ = uint64_t(dcrs.read(DCR_RASTER_TBUF_ADDR)) << 6;
pbuf_baseaddr_ = uint64_t(dcrs.read(DCR_RASTER_PBUF_ADDR)) << 6;
pbuf_stride_ = dcrs.read(DCR_RASTER_PBUF_STRIDE);
tbuf_addr_ = tbuf_baseaddr_ + raster_index_ * sizeof(graphics::rast_tile_header_t);
@ -252,10 +252,10 @@ private:
uint32_t raster_count_;
RAM* mem_;
uint32_t num_tiles_;
uint32_t tbuf_baseaddr_;
uint32_t pbuf_baseaddr_;
uint64_t tbuf_baseaddr_;
uint64_t pbuf_baseaddr_;
uint32_t pbuf_stride_;
uint32_t tbuf_addr_;
uint64_t tbuf_addr_;
uint32_t tile_x_;
uint32_t tile_y_;
uint32_t pids_offset_;
@ -388,7 +388,7 @@ public:
auto& mem_trace = mem_traces.front();
std::vector<uint32_t> addresses;
std::vector<uint64_t> addresses;
switch (mem_trace_state_) {
case e_mem_trace_state::header: {

View file

@ -18,13 +18,13 @@ public:
depthStencil_.configure(dcrs);
blender_.configure(dcrs);
zbuf_baseaddr_ = dcrs.read(DCR_ROP_ZBUF_ADDR);
zbuf_baseaddr_ = uint64_t(dcrs.read(DCR_ROP_ZBUF_ADDR)) << 6;
zbuf_pitch_ = dcrs.read(DCR_ROP_ZBUF_PITCH);
depth_writemask_ = dcrs.read(DCR_ROP_DEPTH_WRITEMASK) & 0x1;
stencil_front_writemask_ = dcrs.read(DCR_ROP_STENCIL_WRITEMASK) & 0xffff;
stencil_back_writemask_ = dcrs.read(DCR_ROP_STENCIL_WRITEMASK) >> 16;
cbuf_baseaddr_ = dcrs.read(DCR_ROP_CBUF_ADDR);
cbuf_baseaddr_ = uint64_t(dcrs.read(DCR_ROP_CBUF_ADDR)) << 6;
cbuf_pitch_ = dcrs.read(DCR_ROP_CBUF_PITCH);
auto cbuf_writemask = dcrs.read(DCR_ROP_CBUF_WRITEMASK) & 0xf;
cbuf_writemask_ = (((cbuf_writemask >> 0) & 0x1) * 0x000000ff)
@ -78,14 +78,16 @@ private:
uint32_t* color,
RopUnit::TraceData::Ptr trace_data) {
if (depth_enable || stencil_enable) {
uint32_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
uint64_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
mem_->read(depthstencil, zbuf_addr, 4);
trace_data->mem_rd_addrs.push_back(zbuf_addr);
DT(3, "rop-depthstencil-read: x=" << std::dec << x << ", y=" << y << ", addr=0x" << std::hex << zbuf_addr << ", depthstencil=0x" << *depthstencil);
}
if (color_write_ && (color_read_ || blend_enable)) {
uint32_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
uint64_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
mem_->read(color, cbuf_addr, 4);
trace_data->mem_rd_addrs.push_back(cbuf_addr);
DT(3, "rop-color-read: x=" << std::dec << x << ", y=" << y << ", addr=0x" << std::hex << cbuf_addr << ", color=0x" << *color);
}
}
@ -105,18 +107,18 @@ private:
| (stencil_enable ? (stencil_writemask << ROP_DEPTH_BITS) : 0);
if (ds_writeMask != 0) {
uint32_t write_value = (dst_depthstencil & ~ds_writeMask) | (depthstencil & ds_writeMask);
uint32_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
uint64_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
mem_->write(&write_value, zbuf_addr, 4);
trace_data->mem_wr_addrs.push_back(zbuf_addr);
DT(3, "rop-depthstencil: x=" << std::dec << x << ", y=" << y << ", depthstencil=0x" << std::hex << write_value);
DT(3, "rop-depthstencil-write: x=" << std::dec << x << ", y=" << y << ", addr=0x" << std::hex << zbuf_addr << ", depthstencil=0x" << write_value);
}
if (color_write_ && ds_passed) {
uint32_t write_value = (dst_color & ~cbuf_writemask_) | (color & cbuf_writemask_);
uint32_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
uint64_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
mem_->write(&write_value, cbuf_addr, 4);
trace_data->mem_wr_addrs.push_back(cbuf_addr);
DT(3, "rop-color: x=" << std::dec << x << ", y=" << y << ", color=0x" << std::hex << write_value);
DT(3, "rop-color-write: x=" << std::dec << x << ", y=" << y << ", addr=0x" << std::hex << cbuf_addr << ", color=0x" << write_value);
}
}

View file

@ -122,7 +122,7 @@ private:
void texture_read(
uint32_t* out,
const uint32_t* addr,
const uint64_t* addr,
uint32_t stride,
uint32_t size) {
for (uint32_t i = 0; i < size; ++i) {
@ -133,7 +133,7 @@ private:
static void memoryCB(
uint32_t* out,
const uint32_t* addr,
const uint64_t* addr,
uint32_t stride,
uint32_t size,
void* cb_arg) {

View file

@ -16,7 +16,13 @@
class GpuSW;
typedef struct {
uint32_t log_num_tasks;
#ifdef SW_ENABLE
graphics::RasterDCRS raster_dcrs;
graphics::RopDCRS rop_dcrs;
graphics::TexDCRS tex_dcrs;
#endif
uint32_t log_num_tasks;
uint64_t prim_addr;
bool depth_enabled;
bool color_enabled;
@ -26,25 +32,6 @@ typedef struct {
bool sw_rast;
bool sw_rop;
bool sw_interp;
uint32_t dst_width;
uint32_t dst_height;
uint64_t cbuf_addr;
uint8_t cbuf_stride;
uint32_t cbuf_pitch;
uint64_t zbuf_addr;
uint8_t zbuf_stride;
uint32_t zbuf_pitch;
uint64_t prim_addr;
#ifdef SW_ENABLE
graphics::RasterDCRS raster_dcrs;
graphics::RopDCRS rop_dcrs;
graphics::TexDCRS tex_dcrs;
#endif
} kernel_arg_t;
#endif

View file

@ -25,8 +25,8 @@ public:
void configure(const graphics::RasterDCRS& dcrs, uint32_t log_num_tasks) {
graphics::Rasterizer::configure(dcrs);
num_tiles_ = dcrs.read(DCR_RASTER_TILE_COUNT);
tbuf_baseaddr_ = dcrs.read(DCR_RASTER_TBUF_ADDR);
pbuf_baseaddr_ = dcrs.read(DCR_RASTER_PBUF_ADDR);
tbuf_baseaddr_ = uint64_t(dcrs.read(DCR_RASTER_TBUF_ADDR)) << 6;
pbuf_baseaddr_ = uint64_t(dcrs.read(DCR_RASTER_PBUF_ADDR)) << 6;
pbuf_stride_ = dcrs.read(DCR_RASTER_PBUF_STRIDE);
log_num_tasks_ = log_num_tasks;
}
@ -64,8 +64,8 @@ public:
private:
uint32_t num_tiles_;
uint32_t tbuf_baseaddr_;
uint32_t pbuf_baseaddr_;
uint64_t tbuf_baseaddr_;
uint64_t pbuf_baseaddr_;
uint32_t pbuf_stride_;
uint32_t log_num_tasks_;
};
@ -80,13 +80,13 @@ public:
blender_.configure(dcrs);
// get device configuration
zbuf_baseaddr_ = dcrs.read(DCR_ROP_ZBUF_ADDR);
zbuf_baseaddr_ = uint64_t(dcrs.read(DCR_ROP_ZBUF_ADDR)) << 6;
zbuf_pitch_ = dcrs.read(DCR_ROP_ZBUF_PITCH);
depth_writemask_ = dcrs.read(DCR_ROP_DEPTH_WRITEMASK) & 0x1;
stencil_front_writemask_ = dcrs.read(DCR_ROP_STENCIL_WRITEMASK) & 0xffff;
stencil_back_writemask_ = dcrs.read(DCR_ROP_STENCIL_WRITEMASK) >> 16;
cbuf_baseaddr_ = dcrs.read(DCR_ROP_CBUF_ADDR);
cbuf_baseaddr_ = uint64_t(dcrs.read(DCR_ROP_CBUF_ADDR)) << 6;
cbuf_pitch_ = dcrs.read(DCR_ROP_CBUF_PITCH);
auto cbuf_writemask = dcrs.read(DCR_ROP_CBUF_WRITEMASK) & 0xf;
cbuf_writemask_ = (((cbuf_writemask >> 0) & 0x1) * 0x000000ff)
@ -132,12 +132,12 @@ private:
uint32_t* depthstencil,
uint32_t* color) const {
if (depth_enable || stencil_enable) {
uint32_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
uint64_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
*depthstencil = *reinterpret_cast<const uint32_t*>(zbuf_addr);
}
if (color_write_ && (color_read_ || blend_enable)) {
uint32_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
uint64_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
*color = *reinterpret_cast<const uint32_t*>(cbuf_addr);
}
}
@ -157,12 +157,12 @@ private:
| (stencil_enable ? (stencil_writemask << ROP_DEPTH_BITS) : 0);
if (ds_writeMask != 0) {
uint32_t write_value = (dst_depthstencil & ~ds_writeMask) | (depthstencil & ds_writeMask);
uint32_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
uint64_t zbuf_addr = zbuf_baseaddr_ + y * zbuf_pitch_ + x * 4;
*reinterpret_cast<uint32_t*>(zbuf_addr) = write_value;
}
if (color_write_ && ds_passed) {
uint32_t write_value = (dst_color & ~cbuf_writemask_) | (color & cbuf_writemask_);
uint32_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
uint64_t cbuf_addr = cbuf_baseaddr_ + y * cbuf_pitch_ + x * 4;
*reinterpret_cast<uint32_t*>(cbuf_addr) = write_value;
}
}
@ -170,13 +170,13 @@ private:
graphics::DepthTencil depthStencil_;
graphics::Blender blender_;
uint32_t zbuf_baseaddr_;
uint64_t zbuf_baseaddr_;
uint32_t zbuf_pitch_;
bool depth_writemask_;
uint32_t stencil_front_writemask_;
uint32_t stencil_back_writemask_;
uint32_t cbuf_baseaddr_;
uint64_t cbuf_baseaddr_;
uint32_t cbuf_pitch_;
uint32_t cbuf_writemask_;
bool color_read_;
@ -196,7 +196,7 @@ public:
private:
static void memory_cb(uint32_t* out,
const uint32_t* addr,
const uint64_t* addr,
uint32_t stride,
uint32_t size,
void* /*cb_arg*/) {

View file

@ -222,21 +222,21 @@ int render(const CGLTrace& trace) {
uint32_t primbuf_stride = sizeof(graphics::rast_prim_t);
// configure raster units
RASTER_DCR_WRITE(DCR_RASTER_TBUF_ADDR, tilebuf_addr);
RASTER_DCR_WRITE(DCR_RASTER_TBUF_ADDR, tilebuf_addr / 64); // block address
RASTER_DCR_WRITE(DCR_RASTER_TILE_COUNT, num_tiles);
RASTER_DCR_WRITE(DCR_RASTER_PBUF_ADDR, primbuf_addr);
RASTER_DCR_WRITE(DCR_RASTER_PBUF_ADDR, primbuf_addr / 64); // block address
RASTER_DCR_WRITE(DCR_RASTER_PBUF_STRIDE, primbuf_stride);
RASTER_DCR_WRITE(DCR_RASTER_SCISSOR_X, (dst_width << 16) | 0);
RASTER_DCR_WRITE(DCR_RASTER_SCISSOR_Y, (dst_height << 16) | 0);
// configure rop color buffer
ROP_DCR_WRITE(DCR_ROP_CBUF_ADDR, cbuf_addr);
ROP_DCR_WRITE(DCR_ROP_CBUF_ADDR, cbuf_addr / 64); // block address
ROP_DCR_WRITE(DCR_ROP_CBUF_PITCH, cbuf_pitch);
ROP_DCR_WRITE(DCR_ROP_CBUF_WRITEMASK, states.color_writemask);
if (states.depth_test || states.stencil_test) {
// configure rop depth buffer
ROP_DCR_WRITE(DCR_ROP_ZBUF_ADDR, zbuf_addr);
ROP_DCR_WRITE(DCR_ROP_ZBUF_ADDR, zbuf_addr / 64); // block address
ROP_DCR_WRITE(DCR_ROP_ZBUF_PITCH, zbuf_pitch);
}
@ -338,7 +338,7 @@ int render(const CGLTrace& trace) {
TEX_DCR_WRITE(DCR_TEX_FORMAT, tex_format);
TEX_DCR_WRITE(DCR_TEX_WRAP, (tex_wrapV << 16) | tex_wrapU);
TEX_DCR_WRITE(DCR_TEX_FILTER, tex_filter ? TEX_FILTER_BILINEAR : TEX_FILTER_POINT);
TEX_DCR_WRITE(DCR_TEX_ADDR, texbuf_addr);
TEX_DCR_WRITE(DCR_TEX_ADDR, texbuf_addr / 64); // block address
for (uint32_t i = 0; i < mip_offsets.size(); ++i) {
assert(i < TEX_LOD_MAX);
TEX_DCR_WRITE(DCR_TEX_MIPOFF(i), mip_offsets.at(i));
@ -526,17 +526,6 @@ int main(int argc, char *argv[]) {
kernel_arg.sw_rop = sw_rop;
kernel_arg.sw_interp = sw_interp;
kernel_arg.dst_width = dst_width;
kernel_arg.dst_height = dst_height;
kernel_arg.cbuf_stride = cbuf_stride;
kernel_arg.cbuf_pitch = cbuf_pitch;
kernel_arg.cbuf_addr = cbuf_addr;
kernel_arg.zbuf_stride = zbuf_stride;
kernel_arg.zbuf_pitch = zbuf_pitch;
kernel_arg.zbuf_addr = zbuf_addr;
// run tests
RT_CHECK(render(trace));

View file

@ -224,7 +224,7 @@ int main(int argc, char *argv[]) {
for (uint32_t i = 0; i < num_points; ++i) {
auto ref = ((a[i] * b[i]) >> (s[i] * 8)) + c[i];
if (d[i] != ref) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << ", c=" << c[i] << ", s=" << s[i] << std::endl;
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << d[i] << ", a=" << a[i] << ", b=" << b[i] << ", c=" << c[i] << ", s=" << s[i] << std::endl;
++errors;
}
}

View file

@ -173,9 +173,9 @@ int render(const CGLTrace& trace) {
uint32_t primbuf_stride = sizeof(graphics::rast_prim_t);
// configure raster units
vx_dcr_write(device, DCR_RASTER_TBUF_ADDR, tilebuf_addr);
vx_dcr_write(device, DCR_RASTER_TBUF_ADDR, tilebuf_addr / 64); // block address
vx_dcr_write(device, DCR_RASTER_TILE_COUNT, num_tiles);
vx_dcr_write(device, DCR_RASTER_PBUF_ADDR, primbuf_addr);
vx_dcr_write(device, DCR_RASTER_PBUF_ADDR, primbuf_addr / 64); // block address
vx_dcr_write(device, DCR_RASTER_PBUF_STRIDE, primbuf_stride);
vx_dcr_write(device, DCR_RASTER_SCISSOR_X, (dst_width << 16) | 0);
vx_dcr_write(device, DCR_RASTER_SCISSOR_Y, (dst_height << 16) | 0);

View file

@ -147,12 +147,12 @@ int render(uint32_t num_tasks) {
staging_buf = nullptr;
// configure rop color buffer
vx_dcr_write(device, DCR_ROP_CBUF_ADDR, cbuf_addr);
vx_dcr_write(device, DCR_ROP_CBUF_ADDR, cbuf_addr / 64); // block address
vx_dcr_write(device, DCR_ROP_CBUF_PITCH, cbuf_pitch);
vx_dcr_write(device, DCR_ROP_CBUF_WRITEMASK, 0xf);
// configure rop depth buffer to default
vx_dcr_write(device, DCR_ROP_ZBUF_ADDR, zbuf_addr);
vx_dcr_write(device, DCR_ROP_ZBUF_ADDR, zbuf_addr / 64); // block address
vx_dcr_write(device, DCR_ROP_ZBUF_PITCH, zbuf_pitch);
if (depth_enable) {
vx_dcr_write(device, DCR_ROP_DEPTH_FUNC, ROP_DEPTH_FUNC_LESS);

View file

@ -15,7 +15,7 @@ typedef struct {
} tile_info_t;
static void memory_cb(uint32_t* out,
const uint32_t* addr,
const uint64_t* addr,
uint32_t stride,
uint32_t size,
void* /*cb_arg*/) {

View file

@ -268,7 +268,7 @@ int main(int argc, char *argv[]) {
TEX_DCR_WRITE(DCR_TEX_FORMAT, format);
TEX_DCR_WRITE(DCR_TEX_WRAP, (wrap << 16) | wrap);
TEX_DCR_WRITE(DCR_TEX_FILTER, (filter ? TEX_FILTER_BILINEAR : TEX_FILTER_POINT));
TEX_DCR_WRITE(DCR_TEX_ADDR, src_addr);
TEX_DCR_WRITE(DCR_TEX_ADDR, src_addr / 64); // block address
for (uint32_t i = 0; i < mip_offsets.size(); ++i) {
assert(i < TEX_LOD_MAX);
TEX_DCR_WRITE(DCR_TEX_MIPOFF(i), mip_offsets.at(i));