mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'develop'
This commit is contained in:
commit
e663db9b5a
63 changed files with 1016 additions and 939 deletions
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
|
@ -117,7 +117,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
name: [regression, opencl, config1, config2, debug, stress]
|
||||
name: [regression, opencl, cache, config1, config2, debug, stress]
|
||||
xlen: [32, 64]
|
||||
|
||||
steps:
|
||||
|
|
|
@ -122,32 +122,54 @@ opencl()
|
|||
echo "opencl tests done!"
|
||||
}
|
||||
|
||||
test_csv_trace()
|
||||
cache()
|
||||
{
|
||||
# test CSV trace generation
|
||||
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
|
||||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
# clean build
|
||||
make -C sim/simx clean
|
||||
make -C sim/rtlsim clean
|
||||
}
|
||||
echo "begin cache tests..."
|
||||
|
||||
debug()
|
||||
{
|
||||
echo "begin debugging tests..."
|
||||
# disable local memory
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
|
||||
|
||||
test_csv_trace
|
||||
# disable L1 cache
|
||||
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
|
||||
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
|
||||
# reduce l1 line size
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=simx --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
echo "debugging tests done!"
|
||||
# test cache ways
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test writeback
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
|
||||
# cache clustering
|
||||
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2
|
||||
|
||||
# L2/L3
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=4 --l2cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
|
||||
echo "begin cache tests..."
|
||||
}
|
||||
|
||||
config1()
|
||||
|
@ -163,10 +185,12 @@ config1()
|
|||
./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge
|
||||
|
||||
# cores clustering
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=4 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
|
||||
# issue width
|
||||
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
|
@ -186,22 +210,19 @@ config1()
|
|||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
|
||||
# FPU's PE scaling
|
||||
CONFIGS="-DFMA_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfmadd"
|
||||
CONFIGS="-DFCVT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tftoi"
|
||||
CONFIGS="-DFDIV_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfdiv"
|
||||
CONFIGS="-DFSQRT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfsqrt"
|
||||
CONFIGS="-DFNCP_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfclamp"
|
||||
|
||||
# LSU scaling
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
|
||||
# L2/L3
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
|
||||
# multiple L1 caches per socket
|
||||
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=8 --warps=1 --threads=2
|
||||
|
||||
echo "configuration-1 tests done!"
|
||||
}
|
||||
|
||||
|
@ -232,37 +253,9 @@ config2()
|
|||
# disabling ZICOND extension
|
||||
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
|
||||
|
||||
# disable local memory
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
|
||||
|
||||
# test AXI bus
|
||||
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=demo
|
||||
|
||||
# disable L1 cache
|
||||
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
|
||||
# reduce l1 line size
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=simx --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache ways
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test 128-bit MEM block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=demo
|
||||
|
||||
|
@ -275,12 +268,40 @@ config2()
|
|||
echo "configuration-2 tests done!"
|
||||
}
|
||||
|
||||
test_csv_trace()
|
||||
{
|
||||
# test CSV trace generation
|
||||
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
|
||||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
# clean build
|
||||
make -C sim/simx clean
|
||||
make -C sim/rtlsim clean
|
||||
}
|
||||
|
||||
debug()
|
||||
{
|
||||
echo "begin debugging tests..."
|
||||
|
||||
test_csv_trace
|
||||
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
|
||||
|
||||
echo "debugging tests done!"
|
||||
}
|
||||
|
||||
stress()
|
||||
{
|
||||
echo "begin stress tests..."
|
||||
|
||||
# test verilator reset values
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache
|
||||
|
||||
echo "stress tests done!"
|
||||
|
@ -299,11 +320,9 @@ synthesis()
|
|||
show_usage()
|
||||
{
|
||||
echo "Vortex Regression Test"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]"
|
||||
}
|
||||
|
||||
start=$SECONDS
|
||||
|
||||
declare -a tests=()
|
||||
clean=0
|
||||
|
||||
|
@ -327,6 +346,9 @@ while [ "$1" != "" ]; do
|
|||
--opencl )
|
||||
tests+=("opencl")
|
||||
;;
|
||||
--cache )
|
||||
tests+=("cache")
|
||||
;;
|
||||
--config1 )
|
||||
tests+=("config1")
|
||||
;;
|
||||
|
@ -349,6 +371,7 @@ while [ "$1" != "" ]; do
|
|||
tests+=("kernel")
|
||||
tests+=("regression")
|
||||
tests+=("opencl")
|
||||
tests+=("cache")
|
||||
tests+=("config1")
|
||||
tests+=("config2")
|
||||
tests+=("debug")
|
||||
|
@ -372,6 +395,8 @@ then
|
|||
make -s
|
||||
fi
|
||||
|
||||
start=$SECONDS
|
||||
|
||||
for test in "${tests[@]}"; do
|
||||
$test
|
||||
done
|
||||
|
|
|
@ -19,6 +19,8 @@ import csv
|
|||
import re
|
||||
import inspect
|
||||
|
||||
configs = None
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='CPU trace log to CSV format converter.')
|
||||
parser.add_argument('-t', '--type', default='simx', help='log type (rtlsim or simx)')
|
||||
|
@ -26,6 +28,24 @@ def parse_args():
|
|||
parser.add_argument('log', help='Input log file')
|
||||
return parser.parse_args()
|
||||
|
||||
def load_config(filename):
|
||||
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=0x([0-9a-fA-F]+), num_barriers=(\d+)"
|
||||
with open(filename, 'r') as file:
|
||||
for line in file:
|
||||
config_match = re.search(config_pattern, line)
|
||||
if config_match:
|
||||
config = {
|
||||
'num_threads': int(config_match.group(1)),
|
||||
'num_warps': int(config_match.group(2)),
|
||||
'num_cores': int(config_match.group(3)),
|
||||
'num_clusters': int(config_match.group(4)),
|
||||
'socket_size': int(config_match.group(5)),
|
||||
'local_mem_base': int(config_match.group(6), 16),
|
||||
'num_barriers': int(config_match.group(7)),
|
||||
}
|
||||
return config
|
||||
return None
|
||||
|
||||
def parse_simx(log_lines):
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
|
||||
|
@ -46,10 +66,10 @@ def parse_simx(log_lines):
|
|||
instr_data = {}
|
||||
instr_data["lineno"] = lineno
|
||||
instr_data["PC"] = re.search(pc_pattern, line).group(1)
|
||||
instr_data["core_id"] = re.search(core_id_pattern, line).group(1)
|
||||
instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1)
|
||||
instr_data["core_id"] = int(re.search(core_id_pattern, line).group(1))
|
||||
instr_data["warp_id"] = int(re.search(warp_id_pattern, line).group(1))
|
||||
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
|
||||
instr_data["uuid"] = re.search(uuid_pattern, line).group(1)
|
||||
instr_data["uuid"] = int(re.search(uuid_pattern, line).group(1))
|
||||
elif line.startswith("DEBUG Instr"):
|
||||
instr_data["instr"] = re.search(instr_pattern, line).group(1)
|
||||
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
|
||||
|
@ -96,7 +116,7 @@ def append_value(text, reg, value, tmask_arr, sep):
|
|||
return text, sep
|
||||
|
||||
def parse_rtlsim(log_lines):
|
||||
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=(\d+), num_barriers=(\d+)"
|
||||
global configs
|
||||
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
|
||||
|
@ -118,36 +138,20 @@ def parse_rtlsim(log_lines):
|
|||
uuid_pattern = r"#(\d+)"
|
||||
entries = []
|
||||
instr_data = {}
|
||||
num_threads = 0
|
||||
num_warps = 0
|
||||
num_cores = 0
|
||||
num_clusters = 0
|
||||
socket_size = 0
|
||||
local_mem_base = 0
|
||||
num_barriers = 0
|
||||
num_sockets = 0
|
||||
num_cores = configs['num_cores']
|
||||
socket_size = configs['socket_size']
|
||||
num_sockets = (num_cores + socket_size - 1) // socket_size
|
||||
for lineno, line in enumerate(log_lines, start=1):
|
||||
try:
|
||||
config_match = re.search(config_pattern, line)
|
||||
if config_match:
|
||||
num_threads = int(config_match.group(1))
|
||||
num_warps = int(config_match.group(2))
|
||||
num_cores = int(config_match.group(3))
|
||||
num_clusters = int(config_match.group(4))
|
||||
socket_size = int(config_match.group(5))
|
||||
local_mem_base = int(config_match.group(6))
|
||||
num_barriers = int(config_match.group(7))
|
||||
num_sockets = (num_cores + socket_size - 1) // socket_size
|
||||
continue
|
||||
line_match = re.search(line_pattern, line)
|
||||
if line_match:
|
||||
PC = re.search(pc_pattern, line).group(1)
|
||||
warp_id = re.search(warp_id_pattern, line).group(1)
|
||||
warp_id = int(re.search(warp_id_pattern, line).group(1))
|
||||
tmask = re.search(tmask_pattern, line).group(1)
|
||||
uuid = re.search(uuid_pattern, line).group(1)
|
||||
cluster_id = line_match.group(1)
|
||||
socket_id = line_match.group(2)
|
||||
core_id = line_match.group(3)
|
||||
uuid = int(re.search(uuid_pattern, line).group(1))
|
||||
cluster_id = int(line_match.group(1))
|
||||
socket_id = int(line_match.group(2))
|
||||
core_id = int(line_match.group(3))
|
||||
stage = line_match.group(4)
|
||||
if stage == "decode":
|
||||
trace = {}
|
||||
|
@ -274,7 +278,9 @@ def split_log_file(log_filename):
|
|||
return sublogs
|
||||
|
||||
def main():
|
||||
global configs
|
||||
args = parse_args()
|
||||
configs = load_config(args.log)
|
||||
sublogs = split_log_file(args.log)
|
||||
write_csv(sublogs, args.csv, args.type)
|
||||
|
||||
|
|
|
@ -238,11 +238,11 @@
|
|||
`define RESET_RELAY(dst, src) \
|
||||
`RESET_RELAY_EX (dst, src, 1, 0)
|
||||
|
||||
// size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2
|
||||
`define TO_OUT_BUF_SIZE(out_reg) `MIN(out_reg, 2)
|
||||
// size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2, 5 -> 2
|
||||
`define TO_OUT_BUF_SIZE(s) `MIN(s, 2)
|
||||
|
||||
// reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2
|
||||
`define TO_OUT_BUF_REG(out_reg) ((out_reg & 1) + ((out_reg >> 2) << 1))
|
||||
// reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2, 5 > 3
|
||||
`define TO_OUT_BUF_REG(s) ((s < 2) ? s : (s - 2))
|
||||
|
||||
`define REPEAT(n,f,s) `_REPEAT_``n(f,s)
|
||||
`define _REPEAT_0(f,s)
|
||||
|
|
89
hw/rtl/cache/VX_bank_flush.sv
vendored
89
hw/rtl/cache/VX_bank_flush.sv
vendored
|
@ -14,6 +14,7 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_bank_flush #(
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
|
@ -27,34 +28,36 @@ module VX_bank_flush #(
|
|||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire flush_in_valid,
|
||||
output wire flush_in_ready,
|
||||
output wire flush_out_init,
|
||||
output wire flush_out_valid,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] flush_out_line,
|
||||
output wire [NUM_WAYS-1:0] flush_out_way,
|
||||
input wire flush_out_ready,
|
||||
input wire mshr_empty
|
||||
input wire flush_begin,
|
||||
output wire flush_end,
|
||||
output wire flush_init,
|
||||
output wire flush_valid,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] flush_line,
|
||||
output wire [NUM_WAYS-1:0] flush_way,
|
||||
input wire flush_ready,
|
||||
input wire mshr_empty,
|
||||
input wire bank_empty
|
||||
);
|
||||
// ways interation is only needed when eviction is enabled
|
||||
localparam CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);
|
||||
|
||||
localparam STATE_IDLE = 2'd0;
|
||||
localparam STATE_INIT = 2'd1;
|
||||
localparam STATE_FLUSH = 2'd2;
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_INIT = 1;
|
||||
localparam STATE_WAIT1 = 2;
|
||||
localparam STATE_FLUSH = 3;
|
||||
localparam STATE_WAIT2 = 4;
|
||||
localparam STATE_DONE = 5;
|
||||
|
||||
reg [2:0] state_r, state_n;
|
||||
|
||||
reg [CTR_WIDTH-1:0] counter_r;
|
||||
reg [1:0] state_r, state_n;
|
||||
reg flush_in_ready_r, flush_in_ready_n;
|
||||
|
||||
always @(*) begin
|
||||
state_n = state_r;
|
||||
flush_in_ready_n = 0;
|
||||
case (state_r)
|
||||
// STATE_IDLE
|
||||
default: begin
|
||||
if (flush_in_valid && mshr_empty) begin
|
||||
state_n = STATE_FLUSH;
|
||||
STATE_IDLE: begin
|
||||
if (flush_begin) begin
|
||||
state_n = STATE_WAIT1;
|
||||
end
|
||||
end
|
||||
STATE_INIT: begin
|
||||
|
@ -62,25 +65,41 @@ module VX_bank_flush #(
|
|||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
if (counter_r == ((2 ** CTR_WIDTH)-1)) begin
|
||||
state_n = STATE_IDLE;
|
||||
flush_in_ready_n = 1;
|
||||
STATE_WAIT1: begin
|
||||
// wait for pending requests to complete
|
||||
if (mshr_empty) begin
|
||||
state_n = STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
|
||||
state_n = (BANK_ID == 0) ? STATE_DONE : STATE_WAIT2;
|
||||
end
|
||||
end
|
||||
STATE_WAIT2: begin
|
||||
// ensure the bank is empty before notifying the cache flush unit,
|
||||
// because the flush request to lower caches only goes through bank0
|
||||
// and it is important that request gets send out last.
|
||||
if (bank_empty) begin
|
||||
state_n = STATE_DONE;
|
||||
end
|
||||
end
|
||||
STATE_DONE: begin
|
||||
// generate a completion pulse
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state_r <= STATE_INIT;
|
||||
state_r <= STATE_INIT;
|
||||
counter_r <= '0;
|
||||
flush_in_ready_r <= '0;
|
||||
end else begin
|
||||
state_r <= state_n;
|
||||
flush_in_ready_r <= flush_in_ready_n;
|
||||
if (state_r != STATE_IDLE) begin
|
||||
if ((state_r == STATE_INIT) || flush_out_ready) begin
|
||||
if ((state_r == STATE_INIT)
|
||||
|| ((state_r == STATE_FLUSH) && flush_ready)) begin
|
||||
counter_r <= counter_r + CTR_WIDTH'(1);
|
||||
end
|
||||
end else begin
|
||||
|
@ -89,20 +108,20 @@ module VX_bank_flush #(
|
|||
end
|
||||
end
|
||||
|
||||
assign flush_in_ready = flush_in_ready_r;
|
||||
assign flush_out_init = (state_r == STATE_INIT);
|
||||
assign flush_out_valid = (state_r == STATE_FLUSH);
|
||||
assign flush_out_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
||||
assign flush_end = (state_r == STATE_DONE);
|
||||
assign flush_init = (state_r == STATE_INIT);
|
||||
assign flush_valid = (state_r == STATE_FLUSH);
|
||||
assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin
|
||||
reg [NUM_WAYS-1:0] flush_out_way_r;
|
||||
reg [NUM_WAYS-1:0] flush_way_r;
|
||||
always @(*) begin
|
||||
flush_out_way_r = '0;
|
||||
flush_out_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
|
||||
flush_way_r = '0;
|
||||
flush_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
|
||||
end
|
||||
assign flush_out_way = flush_out_way_r;
|
||||
assign flush_way = flush_way_r;
|
||||
end else begin
|
||||
assign flush_out_way = {NUM_WAYS{1'b1}};
|
||||
assign flush_way = {NUM_WAYS{1'b1}};
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
21
hw/rtl/cache/VX_cache.sv
vendored
21
hw/rtl/cache/VX_cache.sv
vendored
|
@ -109,26 +109,23 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) core_bus2_if[NUM_REQS]();
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_ready;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_begin;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_end;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_fire;
|
||||
|
||||
// this reset relay is required to sync with bank initialization
|
||||
`RESET_RELAY (flush_reset, reset);
|
||||
|
||||
VX_cache_flush #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (flush_reset),
|
||||
.reset (reset),
|
||||
.core_bus_in_if (core_bus_if),
|
||||
.core_bus_out_if (core_bus2_if),
|
||||
.bank_req_fire (per_bank_core_req_fire),
|
||||
.flush_valid (per_bank_flush_valid),
|
||||
.flush_ready (per_bank_flush_ready)
|
||||
.flush_begin (per_bank_flush_begin),
|
||||
.flush_end (per_bank_flush_end)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
@ -324,6 +321,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (CORE_REQ_DATAW),
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (REQ_XBAR_BUF)
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
|
@ -432,8 +430,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
|
||||
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
|
||||
|
||||
.flush_valid (per_bank_flush_valid[bank_id]),
|
||||
.flush_ready (per_bank_flush_ready[bank_id])
|
||||
.flush_begin (per_bank_flush_begin[bank_id]),
|
||||
.flush_end (per_bank_flush_end[bank_id])
|
||||
);
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
|
@ -457,7 +455,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.NUM_OUTPUTS (NUM_REQS),
|
||||
.DATAW (CORE_RSP_DATAW)
|
||||
.DATAW (CORE_RSP_DATAW),
|
||||
.ARBITER ("F")
|
||||
) rsp_xbar (
|
||||
.clk (clk),
|
||||
.reset (rsp_xbar_reset),
|
||||
|
|
91
hw/rtl/cache/VX_cache_bank.sv
vendored
91
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -108,8 +108,8 @@ module VX_cache_bank #(
|
|||
output wire mem_rsp_ready,
|
||||
|
||||
// flush
|
||||
input wire flush_valid,
|
||||
output wire flush_ready
|
||||
input wire flush_begin,
|
||||
output wire flush_end
|
||||
);
|
||||
|
||||
localparam PIPELINE_STAGES = 2;
|
||||
|
@ -120,6 +120,7 @@ module VX_cache_bank #(
|
|||
|
||||
wire crsp_queue_stall;
|
||||
wire mshr_alm_full;
|
||||
wire mreq_queue_empty;
|
||||
wire mreq_queue_alm_full;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
|
||||
|
@ -162,30 +163,38 @@ module VX_cache_bank #(
|
|||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
wire mshr_empty;
|
||||
|
||||
wire line_flush_valid;
|
||||
wire line_flush_init;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_flush_sel;
|
||||
wire [NUM_WAYS-1:0] line_flush_way;
|
||||
wire line_flush_ready;
|
||||
wire flush_valid;
|
||||
wire init_valid;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] flush_sel;
|
||||
wire [NUM_WAYS-1:0] flush_way;
|
||||
wire flush_ready;
|
||||
|
||||
// ensure we have no pending memory request in the bank
|
||||
wire no_pending_req = ~valid_st0 && ~valid_st1 && mreq_queue_empty;
|
||||
|
||||
// this reset relay should match pipeline during tags initialization
|
||||
`RESET_RELAY (flush_reset, reset);
|
||||
|
||||
// flush unit
|
||||
VX_bank_flush #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WRITEBACK (WRITEBACK)
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.flush_in_valid (flush_valid),
|
||||
.flush_in_ready (flush_ready),
|
||||
.flush_out_init (line_flush_init),
|
||||
.flush_out_valid (line_flush_valid),
|
||||
.flush_out_line (line_flush_sel),
|
||||
.flush_out_way (line_flush_way),
|
||||
.flush_out_ready (line_flush_ready),
|
||||
.mshr_empty (mshr_empty)
|
||||
.clk (clk),
|
||||
.reset (flush_reset),
|
||||
.flush_begin (flush_begin),
|
||||
.flush_end (flush_end),
|
||||
.flush_init (init_valid),
|
||||
.flush_valid (flush_valid),
|
||||
.flush_line (flush_sel),
|
||||
.flush_way (flush_way),
|
||||
.flush_ready (flush_ready),
|
||||
.mshr_empty (mshr_empty),
|
||||
.bank_empty (no_pending_req)
|
||||
);
|
||||
|
||||
wire rdw_hazard1_sel;
|
||||
|
@ -198,16 +207,16 @@ module VX_cache_bank #(
|
|||
// mshr replay has highest priority to maximize utilization since there is no miss.
|
||||
// handle memory responses next to prevent deadlock with potential memory request from a miss.
|
||||
// flush has precedence over core requests to ensure that the cache is in a consistent state.
|
||||
wire replay_grant = ~line_flush_init;
|
||||
wire replay_grant = ~init_valid;
|
||||
wire replay_enable = replay_grant && replay_valid;
|
||||
|
||||
wire fill_grant = ~line_flush_init && ~replay_enable;
|
||||
wire fill_grant = ~init_valid && ~replay_enable;
|
||||
wire fill_enable = fill_grant && mem_rsp_valid;
|
||||
|
||||
wire flush_grant = ~line_flush_init && ~replay_enable && ~fill_enable;
|
||||
wire flush_enable = flush_grant && line_flush_valid;
|
||||
wire flush_grant = ~init_valid && ~replay_enable && ~fill_enable;
|
||||
wire flush_enable = flush_grant && flush_valid;
|
||||
|
||||
wire creq_grant = ~line_flush_init && ~replay_enable && ~fill_enable && ~flush_enable;
|
||||
wire creq_grant = ~init_valid && ~replay_enable && ~fill_enable && ~flush_enable;
|
||||
wire creq_enable = creq_grant && core_req_valid;
|
||||
|
||||
assign replay_ready = replay_grant
|
||||
|
@ -219,23 +228,23 @@ module VX_cache_bank #(
|
|||
&& ~rdw_hazard2_sel
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign line_flush_ready = flush_grant
|
||||
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||
&& ~rdw_hazard2_sel
|
||||
&& ~pipe_stall;
|
||||
assign flush_ready = flush_grant
|
||||
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||
&& ~rdw_hazard2_sel
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign core_req_ready = creq_grant
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~mshr_alm_full
|
||||
&& ~pipe_stall;
|
||||
|
||||
wire init_fire = line_flush_init;
|
||||
wire init_fire = init_valid;
|
||||
wire replay_fire = replay_valid && replay_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
wire line_flush_fire = line_flush_valid && line_flush_ready;
|
||||
wire flush_fire = flush_valid && flush_ready;
|
||||
wire core_req_fire = core_req_valid && core_req_ready;
|
||||
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || line_flush_fire || core_req_fire;
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
|
||||
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
|
||||
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
|
||||
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
|
||||
|
@ -243,7 +252,7 @@ module VX_cache_bank #(
|
|||
assign tag_sel = replay_valid ? replay_tag : core_req_tag;
|
||||
assign creq_flush_sel = core_req_valid && core_req_flush;
|
||||
|
||||
assign addr_sel = (line_flush_init | line_flush_valid) ? `CS_LINE_ADDR_WIDTH'(line_flush_sel) :
|
||||
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
|
||||
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
|
@ -263,15 +272,17 @@ module VX_cache_bank #(
|
|||
assign req_uuid_sel = 0;
|
||||
end
|
||||
|
||||
`RESET_RELAY (pipe0_reset, reset);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (pipe0_reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_sel, line_flush_init, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, line_flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
|
@ -298,7 +309,7 @@ module VX_cache_bank #(
|
|||
wire [NUM_WAYS-1:0] evict_way_st0;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0;
|
||||
|
||||
`RESET_RELAY (tag_reset, reset);
|
||||
`RESET_RELAY (tags_reset, reset);
|
||||
|
||||
VX_cache_tags #(
|
||||
.INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)),
|
||||
|
@ -312,7 +323,7 @@ module VX_cache_bank #(
|
|||
.UUID_WIDTH (UUID_WIDTH)
|
||||
) cache_tags (
|
||||
.clk (clk),
|
||||
.reset (tag_reset),
|
||||
.reset (tags_reset),
|
||||
|
||||
.req_uuid (req_uuid_st0),
|
||||
|
||||
|
@ -344,12 +355,14 @@ module VX_cache_bank #(
|
|||
|
||||
assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0;
|
||||
|
||||
`RESET_RELAY (pipe1_reset, reset);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (pipe1_reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, creq_flush_st0, rw_st0, addr2_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, evict_dirty_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, evict_dirty_st1, mshr_pending_st1})
|
||||
|
@ -585,7 +598,7 @@ module VX_cache_bank #(
|
|||
|
||||
// schedule memory request
|
||||
|
||||
wire mreq_queue_push, mreq_queue_pop, mreq_queue_empty;
|
||||
wire mreq_queue_push, mreq_queue_pop;
|
||||
wire [`CS_LINE_WIDTH-1:0] mreq_queue_data;
|
||||
wire [LINE_SIZE-1:0] mreq_queue_byteen;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
|
||||
|
@ -663,8 +676,8 @@ module VX_cache_bank #(
|
|||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
|
||||
wire input_stall = (replay_valid || mem_rsp_valid || core_req_valid || line_flush_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || line_flush_fire);
|
||||
wire input_stall = (replay_valid || mem_rsp_valid || core_req_valid || flush_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
|
||||
always @(posedge clk) begin
|
||||
if (input_stall || pipe_stall) begin
|
||||
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1));
|
||||
|
|
9
hw/rtl/cache/VX_cache_bypass.sv
vendored
9
hw/rtl/cache/VX_cache_bypass.sv
vendored
|
@ -217,13 +217,15 @@ module VX_cache_bypass #(
|
|||
|
||||
assign mem_bus_in_if.req_ready = mem_req_out_ready;
|
||||
|
||||
`RESET_RELAY (mem_req_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH),
|
||||
.SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mem_req_reset),
|
||||
.valid_in (mem_req_out_valid),
|
||||
.ready_in (mem_req_out_ready),
|
||||
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}),
|
||||
|
@ -309,13 +311,16 @@ module VX_cache_bypass #(
|
|||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
||||
`RESET_RELAY (core_rsp_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH),
|
||||
.SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
) core_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (core_rsp_reset),
|
||||
.valid_in (core_rsp_in_valid[i]),
|
||||
.ready_in (core_rsp_in_ready[i]),
|
||||
.data_in ({core_rsp_in_data[i], core_rsp_in_tag[i]}),
|
||||
|
|
8
hw/rtl/cache/VX_cache_cluster.sv
vendored
8
hw/rtl/cache/VX_cache_cluster.sv
vendored
|
@ -117,7 +117,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
|
||||
end
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
`RESET_RELAY (cache_arb_reset, reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (NUM_INPUTS),
|
||||
|
@ -130,7 +130,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0)
|
||||
) cache_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.reset (cache_arb_reset),
|
||||
.bus_in_if (core_bus_tmp_if),
|
||||
.bus_out_if (arb_core_bus_tmp_if)
|
||||
);
|
||||
|
@ -182,6 +182,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (MEM_TAG_WIDTH + `ARB_SEL_BITS(NUM_CACHES, 1))
|
||||
) mem_bus_tmp_if[1]();
|
||||
|
||||
`RESET_RELAY (mem_arb_reset, reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (NUM_CACHES),
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
|
@ -192,7 +194,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.RSP_OUT_BUF ((NUM_CACHES > 1) ? 2 : 0)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mem_arb_reset),
|
||||
.bus_in_if (cache_mem_bus_if),
|
||||
.bus_out_if (mem_bus_tmp_if)
|
||||
);
|
||||
|
|
7
hw/rtl/cache/VX_cache_data.sv
vendored
7
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -62,7 +62,6 @@ module VX_cache_data #(
|
|||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_PARAM (WORD_SIZE)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (stall)
|
||||
`UNUSED_VAR (line_addr)
|
||||
`UNUSED_VAR (init)
|
||||
|
@ -91,9 +90,10 @@ module VX_cache_data #(
|
|||
.SIZE (`CS_LINES_PER_BANK)
|
||||
) byteen_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (write || fill || flush),
|
||||
.write (init || write || fill || flush),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.wdata (bs_wdata),
|
||||
.rdata (bs_rdata)
|
||||
|
@ -117,7 +117,7 @@ module VX_cache_data #(
|
|||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last.
|
||||
// this allows converting way index to binary in parallel with BRAM read.
|
||||
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||
wire [BYTEENW-1:0] line_wren;
|
||||
|
@ -161,6 +161,7 @@ module VX_cache_data #(
|
|||
.RW_ASSERT (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.write (line_write),
|
||||
.wren (line_wren),
|
||||
|
|
31
hw/rtl/cache/VX_cache_flush.sv
vendored
31
hw/rtl/cache/VX_cache_flush.sv
vendored
|
@ -26,13 +26,16 @@ module VX_cache_flush #(
|
|||
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
|
||||
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
|
||||
input wire [NUM_BANKS-1:0] bank_req_fire,
|
||||
output wire [NUM_BANKS-1:0] flush_valid,
|
||||
input wire [NUM_BANKS-1:0] flush_ready
|
||||
output wire [NUM_BANKS-1:0] flush_begin,
|
||||
input wire [NUM_BANKS-1:0] flush_end
|
||||
);
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_WAIT = 1;
|
||||
localparam STATE_WAIT1 = 1;
|
||||
localparam STATE_FLUSH = 2;
|
||||
localparam STATE_DONE = 3;
|
||||
localparam STATE_WAIT2 = 3;
|
||||
localparam STATE_DONE = 4;
|
||||
|
||||
reg [2:0] state, state_n;
|
||||
|
||||
// track in-flight core requests
|
||||
|
||||
|
@ -76,7 +79,6 @@ module VX_cache_flush #(
|
|||
`UNUSED_VAR (bank_req_fire)
|
||||
end
|
||||
|
||||
reg [1:0] state, state_n;
|
||||
reg [NUM_BANKS-1:0] flush_done, flush_done_n;
|
||||
|
||||
wire [NUM_REQS-1:0] flush_req_mask;
|
||||
|
@ -112,23 +114,32 @@ module VX_cache_flush #(
|
|||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (flush_req_enable) begin
|
||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT : STATE_FLUSH;
|
||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_WAIT: begin
|
||||
STATE_WAIT1: begin
|
||||
if (no_inflight_reqs) begin
|
||||
state_n = STATE_FLUSH;
|
||||
flush_done_n = '0;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
flush_done_n = flush_done | flush_ready;
|
||||
// generate a flush request pulse
|
||||
state_n = STATE_WAIT2;
|
||||
end
|
||||
STATE_WAIT2: begin
|
||||
// wait for all banks to finish flushing
|
||||
flush_done_n = flush_done | flush_end;
|
||||
if (flush_done_n == {NUM_BANKS{1'b1}}) begin
|
||||
state_n = STATE_DONE;
|
||||
flush_done_n = '0;
|
||||
// only release current flush requests
|
||||
// and keep normal requests locked
|
||||
lock_released_n = flush_req_mask;
|
||||
end
|
||||
end
|
||||
STATE_DONE: begin
|
||||
// wait until released flush requests are issued
|
||||
// when returning to IDLE state other requests will unlock
|
||||
lock_released_n = lock_released & ~core_bus_out_ready;
|
||||
if (lock_released_n == 0) begin
|
||||
state_n = STATE_IDLE;
|
||||
|
@ -149,6 +160,6 @@ module VX_cache_flush #(
|
|||
end
|
||||
end
|
||||
|
||||
assign flush_valid = {NUM_BANKS{state == STATE_FLUSH}};
|
||||
assign flush_begin = {NUM_BANKS{state == STATE_FLUSH}};
|
||||
|
||||
endmodule
|
||||
|
|
3
hw/rtl/cache/VX_cache_mshr.sv
vendored
3
hw/rtl/cache/VX_cache_mshr.sv
vendored
|
@ -232,9 +232,10 @@ module VX_cache_mshr #(
|
|||
.LUTRAM (1)
|
||||
) entries (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (allocate_valid),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (allocate_id_r),
|
||||
.wdata (allocate_data),
|
||||
.raddr (dequeue_id_r),
|
||||
|
|
4
hw/rtl/cache/VX_cache_tags.sv
vendored
4
hw/rtl/cache/VX_cache_tags.sv
vendored
|
@ -57,7 +57,6 @@ module VX_cache_tags #(
|
|||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (lookup)
|
||||
|
||||
// valid, dirty, tag
|
||||
|
@ -130,9 +129,10 @@ module VX_cache_tags #(
|
|||
.RW_ASSERT (1)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.write (line_write),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
|
|
|
@ -83,7 +83,7 @@ module VX_alu_muldiv #(
|
|||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, mul_result_tmp}),
|
||||
|
@ -324,6 +324,7 @@ module VX_alu_muldiv #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (1)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
|
|
|
@ -126,7 +126,8 @@ module VX_alu_unit #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 3),
|
||||
.ARBITER ("F")
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
|
|
|
@ -56,9 +56,10 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
.LUTRAM (1)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (icache_req_fire),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (req_tag),
|
||||
.wdata ({schedule_if.data.PC, schedule_if.data.tmask}),
|
||||
.raddr (rsp_tag),
|
||||
|
|
|
@ -72,9 +72,10 @@ module VX_ipdom_stack #(
|
|||
.LUTRAM (OUT_REG ? 0 : 1)
|
||||
) store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (wr_ptr),
|
||||
.wdata ({q1, q0}),
|
||||
.raddr (rd_ptr),
|
||||
|
|
|
@ -490,6 +490,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.ARBITER ("P"), // prioritize commit_rsp_if
|
||||
.OUT_BUF (3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
|
|
|
@ -43,8 +43,9 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
|
||||
localparam METADATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS;
|
||||
localparam DATAW = `UUID_WIDTH + METADATAW + 3 * `NUM_THREADS * `XLEN;
|
||||
localparam META_DATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS + `UUID_WIDTH;
|
||||
localparam REGS_DATAW = NUM_SRC_REGS * `NUM_THREADS * `XLEN;
|
||||
localparam DATAW = META_DATAW + REGS_DATAW;
|
||||
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
|
||||
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
|
||||
localparam XLEN_SIZE = `XLEN / 8;
|
||||
|
@ -53,30 +54,28 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
`UNUSED_VAR (writeback_if.data.sop)
|
||||
|
||||
wire [NUM_SRC_REGS-1:0] src_valid;
|
||||
wire [NUM_SRC_REGS-1:0] req_in_valid;
|
||||
wire [NUM_SRC_REGS-1:0] req_in_ready;
|
||||
wire [NUM_SRC_REGS-1:0] req_in_valid, req_in_ready;
|
||||
wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data;
|
||||
wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
|
||||
|
||||
wire [NUM_BANKS-1:0] gpr_rd_valid_n, gpr_rd_ready;
|
||||
reg [NUM_BANKS-1:0] gpr_rd_valid;
|
||||
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr_n;
|
||||
reg [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr;
|
||||
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx_n;
|
||||
reg [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx;
|
||||
wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready;
|
||||
wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2;
|
||||
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1;
|
||||
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1, gpr_rd_data_st2;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2;
|
||||
|
||||
wire pipe_in_ready;
|
||||
reg pipe_out_valid;
|
||||
wire pipe_out_ready;
|
||||
reg [`UUID_WIDTH-1:0] pipe_out_uuid;
|
||||
reg [METADATAW-1:0] pipe_out_data;
|
||||
wire pipe_valid_st1, pipe_ready_st1;
|
||||
wire pipe_valid_st2, pipe_ready_st2;
|
||||
wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2;
|
||||
|
||||
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data, src_data_n;
|
||||
reg [NUM_SRC_REGS-1:0] data_fetched;
|
||||
reg has_collision, has_collision_n;
|
||||
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n;
|
||||
wire [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2;
|
||||
|
||||
wire stg_in_valid, stg_in_ready;
|
||||
reg [NUM_SRC_REGS-1:0] data_fetched_n;
|
||||
wire [NUM_SRC_REGS-1:0] data_fetched_st1;
|
||||
|
||||
reg has_collision_n;
|
||||
wire has_collision_st1;
|
||||
|
||||
wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3,
|
||||
scoreboard_if.data.rs2,
|
||||
|
@ -96,11 +95,13 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin
|
||||
assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched[i];
|
||||
assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched_st1[i];
|
||||
end
|
||||
|
||||
assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid;
|
||||
|
||||
`RESET_RELAY (req_xbar_reset, reset);
|
||||
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_SRC_REGS),
|
||||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
|
@ -110,19 +111,26 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
.OUT_BUF (0) // no output buffering
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (req_xbar_reset),
|
||||
`UNUSED_PIN(collisions),
|
||||
.valid_in (req_in_valid),
|
||||
.data_in (req_in_data),
|
||||
.sel_in (req_bank_idx),
|
||||
.ready_in (req_in_ready),
|
||||
.valid_out (gpr_rd_valid_n),
|
||||
.data_out (gpr_rd_addr_n),
|
||||
.sel_out (gpr_rd_req_idx_n),
|
||||
.valid_out (gpr_rd_valid),
|
||||
.data_out (gpr_rd_addr),
|
||||
.sel_out (gpr_rd_req_idx),
|
||||
.ready_out (gpr_rd_ready)
|
||||
);
|
||||
|
||||
assign gpr_rd_ready = {NUM_BANKS{stg_in_ready}};
|
||||
wire pipe_in_ready = pipe_ready_st1 || ~pipe_valid_st1;
|
||||
|
||||
assign gpr_rd_ready = {NUM_BANKS{pipe_in_ready}};
|
||||
|
||||
assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n;
|
||||
|
||||
wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1;
|
||||
wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2;
|
||||
|
||||
always @(*) begin
|
||||
has_collision_n = 0;
|
||||
|
@ -136,83 +144,86 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
always @(*) begin
|
||||
src_data_n = src_data;
|
||||
for (integer b = 0; b < NUM_BANKS; ++b) begin
|
||||
if (gpr_rd_valid[b]) begin
|
||||
src_data_n[gpr_rd_req_idx[b]] = gpr_rd_data[b];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire pipe_stall = pipe_out_valid && ~pipe_out_ready;
|
||||
assign pipe_in_ready = ~pipe_stall;
|
||||
|
||||
assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n;
|
||||
|
||||
wire stg_in_fire = stg_in_valid && stg_in_ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pipe_out_valid <= 0;
|
||||
gpr_rd_valid <= '0;
|
||||
data_fetched <= '0;
|
||||
src_data <= '0;
|
||||
data_fetched_n = data_fetched_st1;
|
||||
if (scoreboard_if.ready) begin
|
||||
data_fetched_n = '0;
|
||||
end else begin
|
||||
if (~pipe_stall) begin
|
||||
pipe_out_valid <= scoreboard_if.valid;
|
||||
gpr_rd_valid <= gpr_rd_valid_n;
|
||||
if (scoreboard_if.ready) begin
|
||||
data_fetched <= '0;
|
||||
end else begin
|
||||
data_fetched <= data_fetched | req_in_ready;
|
||||
end
|
||||
if (stg_in_fire) begin
|
||||
src_data <= '0;
|
||||
end else begin
|
||||
src_data <= src_data_n;
|
||||
end
|
||||
end
|
||||
end
|
||||
if (~pipe_stall) begin
|
||||
pipe_out_uuid <= scoreboard_if.data.uuid;
|
||||
pipe_out_data <= {
|
||||
scoreboard_if.data.wis,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.rd
|
||||
};
|
||||
has_collision <= has_collision_n;
|
||||
gpr_rd_addr <= gpr_rd_addr_n;
|
||||
gpr_rd_req_idx <= gpr_rd_req_idx_n;
|
||||
data_fetched_n = data_fetched_st1 | req_in_ready;
|
||||
end
|
||||
end
|
||||
|
||||
assign pipe_out_ready = stg_in_ready;
|
||||
assign stg_in_valid = pipe_out_valid && ~has_collision;
|
||||
assign pipe_data = {
|
||||
scoreboard_if.data.wis,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.rd,
|
||||
scoreboard_if.data.uuid
|
||||
};
|
||||
|
||||
`RESET_RELAY (pipe1_reset, reset);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_SRC_REGS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)),
|
||||
.RESETW (1 + NUM_SRC_REGS)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (pipe1_reset),
|
||||
.enable (pipe_in_ready),
|
||||
.data_in ({scoreboard_if.valid, data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}),
|
||||
.data_out ({pipe_valid_st1, data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1})
|
||||
);
|
||||
|
||||
assign pipe_ready_st1 = pipe_ready_st2 || ~pipe_valid_st2;
|
||||
|
||||
assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_n;
|
||||
|
||||
wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1;
|
||||
|
||||
`RESET_RELAY (pipe2_reset, reset);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + REGS_DATAW + NUM_BANKS + (NUM_BANKS * `XLEN * `NUM_THREADS) + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH),
|
||||
.RESETW (1 + REGS_DATAW)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (pipe2_reset),
|
||||
.enable (pipe_ready_st1),
|
||||
.data_in ({pipe_valid2_st1, src_data_st1, gpr_rd_valid_st1, gpr_rd_data_st1, pipe_data_st1, gpr_rd_req_idx_st1}),
|
||||
.data_out ({pipe_valid_st2, src_data_st2, gpr_rd_valid_st2, gpr_rd_data_st2, pipe_data_st2, gpr_rd_req_idx_st2})
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
src_data_n = src_data_st2;
|
||||
for (integer b = 0; b < NUM_BANKS; ++b) begin
|
||||
if (gpr_rd_valid_st2[b]) begin
|
||||
src_data_n[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (out_buf_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
|
||||
.LUTRAM (1)
|
||||
) out_buffer (
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (stg_in_valid),
|
||||
.ready_in (stg_in_ready),
|
||||
.reset (out_buf_reset),
|
||||
.valid_in (pipe_valid_st2),
|
||||
.ready_in (pipe_ready_st2),
|
||||
.data_in ({
|
||||
pipe_out_uuid,
|
||||
pipe_out_data,
|
||||
pipe_data_st2,
|
||||
src_data_n[0],
|
||||
src_data_n[1],
|
||||
src_data_n[2]
|
||||
}),
|
||||
.data_out ({
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.wis,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.PC,
|
||||
|
@ -221,6 +232,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
operands_if.data.op_type,
|
||||
operands_if.data.op_args,
|
||||
operands_if.data.rd,
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.rs1_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs3_data
|
||||
|
@ -269,27 +281,26 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}};
|
||||
end
|
||||
|
||||
`ifdef GPR_RESET
|
||||
VX_dp_ram_rst #(
|
||||
`else
|
||||
`RESET_RELAY (bram_reset, reset);
|
||||
|
||||
VX_dp_ram #(
|
||||
`endif
|
||||
.DATAW (`XLEN * `NUM_THREADS),
|
||||
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
|
||||
.WRENW (BYTEENW),
|
||||
`ifdef GPR_RESET
|
||||
.RESET_RAM (1),
|
||||
`endif
|
||||
.NO_RWCHECK (1)
|
||||
) gpr_ram (
|
||||
.clk (clk),
|
||||
`ifdef GPR_RESET
|
||||
.reset (reset),
|
||||
`endif
|
||||
.read (1'b1),
|
||||
.reset (bram_reset),
|
||||
.read (pipe_fire_st1),
|
||||
.wren (wren),
|
||||
.write (gpr_wr_enabled),
|
||||
.waddr (gpr_wr_addr),
|
||||
.wdata (writeback_if.data.data),
|
||||
.raddr (gpr_rd_addr[b]),
|
||||
.rdata (gpr_rd_data[b])
|
||||
.raddr (gpr_rd_addr_st1[b]),
|
||||
.rdata (gpr_rd_data_st1[b])
|
||||
);
|
||||
end
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,7 +21,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
parameter TAG_WIDTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
@ -36,7 +36,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
input wire is_signed,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -45,25 +45,26 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
fflags_t [NUM_LANES-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][31:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FCVT),
|
||||
.DATA_IN_WIDTH(32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0)
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -94,7 +95,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
.enable (pe_enable),
|
||||
.frm (frm),
|
||||
.is_itof (is_itof),
|
||||
.is_signed (is_signed),
|
||||
.is_signed (is_signed),
|
||||
.dataa (pe_data_in[i][0 +: 32]),
|
||||
.result (pe_data_out[i][0 +: 32]),
|
||||
.fflags (pe_data_out[i][32 +: `FP_FLAGS_BITS])
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,7 +21,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
parameter TAG_WIDTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset,
|
||||
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
|
@ -31,10 +31,10 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
input wire [TAG_WIDTH-1:0] tag_in,
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -47,27 +47,28 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
`UNUSED_VAR (frm)
|
||||
|
||||
wire [NUM_LANES-1:0][2*32-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign data_in[i][0 +: 32] = dataa[i];
|
||||
assign data_in[i][32 +: 32] = datab[i];
|
||||
end
|
||||
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FDIV),
|
||||
.DATA_IN_WIDTH(2*32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0)
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -92,7 +93,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
fflags_t [NUM_LANES-1:0] per_lane_fflags;
|
||||
|
||||
`ifdef QUARTUS
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
acl_fdiv fdiv (
|
||||
.clk (clk),
|
||||
|
@ -103,8 +104,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
.q (pe_data_out[i][0 +: 32])
|
||||
);
|
||||
assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
assign has_fflags = 0;
|
||||
assign per_lane_fflags = 'x;
|
||||
`UNUSED_VAR (fflags_out)
|
||||
|
@ -131,21 +132,21 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
assign has_fflags = 1;
|
||||
assign per_lane_fflags = fflags_out;
|
||||
|
||||
`else
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
reg [63:0] r;
|
||||
`UNUSED_VAR (r)
|
||||
`UNUSED_VAR (r)
|
||||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
dpi_fdiv (
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]},
|
||||
frm,
|
||||
r,
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]},
|
||||
frm,
|
||||
r,
|
||||
f
|
||||
);
|
||||
end
|
||||
|
|
|
@ -98,7 +98,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
.DATA_IN_WIDTH(3*32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0)
|
||||
.PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -35,7 +35,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -44,15 +44,15 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
|
||||
wire [NUM_LANES-1:0][2*32-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
fflags_t [NUM_LANES-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
|
@ -60,15 +60,16 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
assign data_in[i][0 +: 32] = dataa[i];
|
||||
assign data_in[i][32 +: 32] = datab[i];
|
||||
end
|
||||
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FNCP),
|
||||
.DATA_IN_WIDTH(2*32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0)
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -97,8 +98,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (pe_enable),
|
||||
.frm (frm),
|
||||
.op_type (op_type),
|
||||
.frm (frm),
|
||||
.op_type (op_type),
|
||||
.dataa (pe_data_in[i][0 +: 32]),
|
||||
.datab (pe_data_in[i][32 +: 32]),
|
||||
.result (pe_data_out[i][0 +: 32]),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -18,10 +18,10 @@
|
|||
module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter NUM_PES = `UP(NUM_LANES /`FSQRT_PE_RATIO),
|
||||
parameter TAG_WIDTH = 1
|
||||
parameter TAG_WIDTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
@ -29,11 +29,11 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0] mask_in,
|
||||
|
||||
input wire [TAG_WIDTH-1:0] tag_in,
|
||||
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -46,22 +46,23 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
|
||||
`UNUSED_VAR (frm)
|
||||
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][31:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FSQRT),
|
||||
.DATA_IN_WIDTH(32),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0)
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -83,10 +84,10 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
end
|
||||
|
||||
fflags_t [NUM_LANES-1:0] per_lane_fflags;
|
||||
fflags_t [NUM_LANES-1:0] per_lane_fflags;
|
||||
|
||||
`ifdef QUARTUS
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
acl_fsqrt fsqrt (
|
||||
.clk (clk),
|
||||
|
@ -105,7 +106,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
`elsif VIVADO
|
||||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
wire tuser;
|
||||
wire tuser;
|
||||
|
||||
xil_fsqrt fsqrt (
|
||||
.aclk (clk),
|
||||
|
@ -130,17 +131,17 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
`UNUSED_VAR (r)
|
||||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
dpi_fsqrt (
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i]},
|
||||
frm,
|
||||
r,
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i]},
|
||||
frm,
|
||||
r,
|
||||
f
|
||||
);
|
||||
end
|
||||
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (32 + $bits(fflags_t)),
|
||||
.DEPTH (`LATENCY_FSQRT)
|
||||
|
|
|
@ -82,11 +82,14 @@ module VX_avs_adapter #(
|
|||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
|
||||
`RESET_RELAY (rd_req_reset, reset);
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (RD_QUEUE_SIZE)
|
||||
) pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (rd_req_reset),
|
||||
.incr (req_queue_push[i]),
|
||||
.decr (req_queue_pop[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
|
@ -102,7 +105,7 @@ module VX_avs_adapter #(
|
|||
.DEPTH (RD_QUEUE_SIZE)
|
||||
) rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (rd_req_reset),
|
||||
.push (req_queue_push[i]),
|
||||
.pop (req_queue_pop[i]),
|
||||
.data_in (mem_req_tag),
|
||||
|
@ -126,13 +129,15 @@ module VX_avs_adapter #(
|
|||
wire valid_out_w = mem_req_valid && ~req_queue_going_full[i] && (req_bank_sel == i);
|
||||
wire ready_out_w;
|
||||
|
||||
`RESET_RELAY (req_out_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + DATA_SIZE + BANK_OFFSETW + DATA_WIDTH),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF))
|
||||
) req_out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (req_out_reset),
|
||||
.valid_in (valid_out_w),
|
||||
.ready_in (ready_out_w),
|
||||
.data_in ({mem_req_rw, mem_req_byteen, req_bank_off, mem_req_data}),
|
||||
|
@ -168,12 +173,15 @@ module VX_avs_adapter #(
|
|||
wire [NUM_BANKS-1:0] rsp_queue_empty;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
|
||||
`RESET_RELAY (rd_rsp_reset, reset);
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (DATA_WIDTH),
|
||||
.DEPTH (RD_QUEUE_SIZE)
|
||||
) rd_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (rd_rsp_reset),
|
||||
.push (avs_readdatavalid[i]),
|
||||
.pop (req_queue_pop[i]),
|
||||
.data_in (avs_readdata[i]),
|
||||
|
@ -192,14 +200,16 @@ module VX_avs_adapter #(
|
|||
assign req_queue_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i];
|
||||
end
|
||||
|
||||
`RESET_RELAY (rsp_arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (DATA_WIDTH + TAG_WIDTH),
|
||||
.ARBITER ("R"),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (RSP_OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (rsp_arb_reset),
|
||||
.valid_in (rsp_arb_valid_in),
|
||||
.data_in (rsp_arb_data_in),
|
||||
.ready_in (rsp_arb_ready_in),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -15,10 +15,10 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_axi_adapter #(
|
||||
parameter DATA_WIDTH = 512,
|
||||
parameter DATA_WIDTH = 512,
|
||||
parameter ADDR_WIDTH = 32,
|
||||
parameter TAG_WIDTH = 8,
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter AVS_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)),
|
||||
parameter RSP_OUT_BUF = 0
|
||||
) (
|
||||
|
@ -34,13 +34,13 @@ module VX_axi_adapter #(
|
|||
input wire [TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire mem_req_ready,
|
||||
|
||||
// Vortex response
|
||||
output wire mem_rsp_valid,
|
||||
// Vortex response
|
||||
output wire mem_rsp_valid,
|
||||
output wire [DATA_WIDTH-1:0] mem_rsp_data,
|
||||
output wire [TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
input wire mem_rsp_ready,
|
||||
|
||||
// AXI write request address channel
|
||||
// AXI write request address channel
|
||||
output wire m_axi_awvalid [NUM_BANKS],
|
||||
input wire m_axi_awready [NUM_BANKS],
|
||||
output wire [ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS],
|
||||
|
@ -54,7 +54,7 @@ module VX_axi_adapter #(
|
|||
output wire [3:0] m_axi_awqos [NUM_BANKS],
|
||||
output wire [3:0] m_axi_awregion [NUM_BANKS],
|
||||
|
||||
// AXI write request data channel
|
||||
// AXI write request data channel
|
||||
output wire m_axi_wvalid [NUM_BANKS],
|
||||
input wire m_axi_wready [NUM_BANKS],
|
||||
output wire [DATA_WIDTH-1:0] m_axi_wdata [NUM_BANKS],
|
||||
|
@ -66,7 +66,7 @@ module VX_axi_adapter #(
|
|||
output wire m_axi_bready [NUM_BANKS],
|
||||
input wire [TAG_WIDTH-1:0] m_axi_bid [NUM_BANKS],
|
||||
input wire [1:0] m_axi_bresp [NUM_BANKS],
|
||||
|
||||
|
||||
// AXI read address channel
|
||||
output wire m_axi_arvalid [NUM_BANKS],
|
||||
input wire m_axi_arready [NUM_BANKS],
|
||||
|
@ -74,13 +74,13 @@ module VX_axi_adapter #(
|
|||
output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS],
|
||||
output wire [7:0] m_axi_arlen [NUM_BANKS],
|
||||
output wire [2:0] m_axi_arsize [NUM_BANKS],
|
||||
output wire [1:0] m_axi_arburst [NUM_BANKS],
|
||||
output wire [1:0] m_axi_arburst [NUM_BANKS],
|
||||
output wire [1:0] m_axi_arlock [NUM_BANKS],
|
||||
output wire [3:0] m_axi_arcache [NUM_BANKS],
|
||||
output wire [2:0] m_axi_arprot [NUM_BANKS],
|
||||
output wire [3:0] m_axi_arqos [NUM_BANKS],
|
||||
output wire [3:0] m_axi_arregion [NUM_BANKS],
|
||||
|
||||
|
||||
// AXI read response channel
|
||||
input wire m_axi_rvalid [NUM_BANKS],
|
||||
output wire m_axi_rready [NUM_BANKS],
|
||||
|
@ -88,15 +88,15 @@ module VX_axi_adapter #(
|
|||
input wire m_axi_rlast [NUM_BANKS],
|
||||
input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS],
|
||||
input wire [1:0] m_axi_rresp [NUM_BANKS]
|
||||
);
|
||||
);
|
||||
localparam AXSIZE = `CLOG2(DATA_WIDTH/8);
|
||||
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
|
||||
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
|
||||
localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS);
|
||||
|
||||
wire [BANK_ADDRW-1:0] req_bank_sel;
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
|
||||
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
|
||||
end else begin
|
||||
assign req_bank_sel = '0;
|
||||
end
|
||||
|
@ -108,12 +108,12 @@ module VX_axi_adapter #(
|
|||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i];
|
||||
wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i];
|
||||
wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i];
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
m_axi_aw_ack[i] <= 0;
|
||||
m_axi_w_ack[i] <= 0;
|
||||
end else begin
|
||||
end else begin
|
||||
if (mem_req_fire && (req_bank_sel == i)) begin
|
||||
m_axi_aw_ack[i] <= 0;
|
||||
m_axi_w_ack[i] <= 0;
|
||||
|
@ -127,10 +127,10 @@ module VX_axi_adapter #(
|
|||
end
|
||||
end
|
||||
|
||||
wire axi_write_ready [NUM_BANKS];
|
||||
wire axi_write_ready [NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i])
|
||||
assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i])
|
||||
&& (m_axi_wready[i] || m_axi_w_ack[i]);
|
||||
end
|
||||
|
||||
|
@ -141,17 +141,17 @@ module VX_axi_adapter #(
|
|||
assign mem_req_ready = mem_req_rw ? axi_write_ready[0] : m_axi_arready[0];
|
||||
end
|
||||
|
||||
// AXI write request address channel
|
||||
// AXI write request address channel
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i];
|
||||
assign m_axi_awaddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE;
|
||||
assign m_axi_awid[i] = mem_req_tag;
|
||||
assign m_axi_awlen[i] = 8'b00000000;
|
||||
assign m_axi_awlen[i] = 8'b00000000;
|
||||
assign m_axi_awsize[i] = 3'(AXSIZE);
|
||||
assign m_axi_awburst[i] = 2'b00;
|
||||
assign m_axi_awlock[i] = 2'b00;
|
||||
assign m_axi_awburst[i] = 2'b00;
|
||||
assign m_axi_awlock[i] = 2'b00;
|
||||
assign m_axi_awcache[i] = 4'b0000;
|
||||
assign m_axi_awprot[i] = 3'b000;
|
||||
assign m_axi_awprot[i] = 3'b000;
|
||||
assign m_axi_awqos[i] = 4'b0000;
|
||||
assign m_axi_awregion[i]= 4'b0000;
|
||||
end
|
||||
|
@ -170,31 +170,31 @@ module VX_axi_adapter #(
|
|||
`UNUSED_VAR (m_axi_bid[i])
|
||||
`UNUSED_VAR (m_axi_bresp[i])
|
||||
assign m_axi_bready[i] = 1'b1;
|
||||
`RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time));
|
||||
`RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time));
|
||||
end
|
||||
|
||||
// AXI read request channel
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i);
|
||||
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i);
|
||||
assign m_axi_araddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE;
|
||||
assign m_axi_arid[i] = mem_req_tag;
|
||||
assign m_axi_arlen[i] = 8'b00000000;
|
||||
assign m_axi_arsize[i] = 3'(AXSIZE);
|
||||
assign m_axi_arburst[i] = 2'b00;
|
||||
assign m_axi_arlock[i] = 2'b00;
|
||||
assign m_axi_arburst[i] = 2'b00;
|
||||
assign m_axi_arlock[i] = 2'b00;
|
||||
assign m_axi_arcache[i] = 4'b0000;
|
||||
assign m_axi_arprot[i] = 3'b000;
|
||||
assign m_axi_arqos[i] = 4'b0000;
|
||||
assign m_axi_arregion[i]= 4'b0000;
|
||||
end
|
||||
|
||||
// AXI read response channel
|
||||
// AXI read response channel
|
||||
|
||||
wire [NUM_BANKS-1:0] rsp_arb_valid_in;
|
||||
wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in;
|
||||
wire [NUM_BANKS-1:0] rsp_arb_ready_in;
|
||||
|
||||
`UNUSED_VAR (m_axi_rlast)
|
||||
`UNUSED_VAR (m_axi_rlast)
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign rsp_arb_valid_in[i] = m_axi_rvalid[i];
|
||||
|
@ -204,14 +204,16 @@ module VX_axi_adapter #(
|
|||
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time));
|
||||
end
|
||||
|
||||
`RESET_RELAY (rsp_arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (DATA_WIDTH + TAG_WIDTH),
|
||||
.ARBITER ("R"),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (RSP_OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (rsp_arb_reset),
|
||||
.valid_in (rsp_arb_valid_in),
|
||||
.data_in (rsp_arb_data_in),
|
||||
.ready_in (rsp_arb_ready_in),
|
||||
|
|
|
@ -23,12 +23,15 @@ module VX_dp_ram #(
|
|||
parameter NO_RWCHECK = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter RW_ASSERT = 0,
|
||||
parameter RESET_RAM = 0,
|
||||
parameter READ_ENABLE = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire read,
|
||||
input wire write,
|
||||
input wire [WRENW-1:0] wren,
|
||||
|
@ -58,42 +61,37 @@ module VX_dp_ram #(
|
|||
`RUNTIME_ASSERT(~write || (| wren), ("invalid write enable mask"));
|
||||
end
|
||||
|
||||
wire [DATAW-1:0] rdata_w;
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
if (WRENW > 1) begin
|
||||
`ifdef QUARTUS
|
||||
if (LUTRAM != 0) begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
|
@ -103,37 +101,8 @@ module VX_dp_ram #(
|
|||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end
|
||||
end
|
||||
`else
|
||||
|
@ -141,35 +110,18 @@ module VX_dp_ram #(
|
|||
if (LUTRAM != 0) begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -178,37 +130,20 @@ module VX_dp_ram #(
|
|||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
@ -217,64 +152,36 @@ module VX_dp_ram #(
|
|||
if (LUTRAM != 0) begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
if (NO_RWCHECK != 0) begin
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
`else
|
||||
// RAM emulation
|
||||
// simulation
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
|
@ -283,42 +190,57 @@ module VX_dp_ram #(
|
|||
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
|
||||
end
|
||||
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_waddr;
|
||||
reg prev_write;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (RESET_RAM && reset) begin
|
||||
for (integer i = 0; i < SIZE; ++i) begin
|
||||
ram[i] <= DATAW'(INIT_VALUE);
|
||||
end
|
||||
end else begin
|
||||
if (write) begin
|
||||
ram[waddr] <= ram_n;
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_waddr;
|
||||
reg prev_write;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= ram_n;
|
||||
end
|
||||
if (reset) begin
|
||||
prev_write <= 0;
|
||||
prev_data <= '0;
|
||||
prev_waddr <= '0;
|
||||
end else begin
|
||||
prev_write <= write;
|
||||
prev_data <= ram[waddr];
|
||||
prev_waddr <= waddr;
|
||||
end
|
||||
if (LUTRAM || !NO_RWCHECK) begin
|
||||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
`UNUSED_VAR (prev_waddr)
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
if (RW_ASSERT) begin
|
||||
`RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("read after write hazard"));
|
||||
end
|
||||
end
|
||||
|
||||
if (LUTRAM || !NO_RWCHECK) begin
|
||||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
`UNUSED_VAR (prev_waddr)
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin
|
||||
assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
if (RW_ASSERT) begin
|
||||
`RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("read after write hazard"));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (READ_ENABLE && reset) begin
|
||||
rdata_r <= '0;
|
||||
end else if (!READ_ENABLE || read) begin
|
||||
rdata_r <= rdata_w;
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
assign rdata = rdata_w;
|
||||
end
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,115 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_dp_ram_rst #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter ADDR_MIN = 0,
|
||||
parameter WRENW = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter NO_RWCHECK = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire read,
|
||||
input wire write,
|
||||
input wire [WRENW-1:0] wren,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire [ADDRW-1:0] raddr,
|
||||
output wire [DATAW-1:0] rdata
|
||||
);
|
||||
localparam WSELW = DATAW / WRENW;
|
||||
`STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter"))
|
||||
|
||||
`define RAM_INITIALIZATION \
|
||||
if (INIT_ENABLE != 0) begin \
|
||||
if (INIT_FILE != "") begin \
|
||||
initial $readmemh(INIT_FILE, ram); \
|
||||
end else begin \
|
||||
initial \
|
||||
for (integer i = 0; i < SIZE; ++i) \
|
||||
ram[i] = INIT_VALUE; \
|
||||
end \
|
||||
end
|
||||
|
||||
`UNUSED_VAR (read)
|
||||
|
||||
// RAM emulation
|
||||
reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
wire [DATAW-1:0] ram_n;
|
||||
for (genvar i = 0; i < WRENW; ++i) begin
|
||||
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
|
||||
end
|
||||
|
||||
if (OUT_REG != 0) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer i = 0; i < SIZE; ++i) begin
|
||||
ram[i] <= DATAW'(INIT_VALUE);
|
||||
end
|
||||
rdata_r <= '0;
|
||||
end else begin
|
||||
if (write) begin
|
||||
ram[waddr] <= ram_n;
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_waddr;
|
||||
reg prev_write;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer i = 0; i < SIZE; ++i) begin
|
||||
ram[i] <= DATAW'(INIT_VALUE);
|
||||
end
|
||||
prev_write <= 0;
|
||||
prev_data <= '0;
|
||||
prev_waddr <= '0;
|
||||
end else begin
|
||||
if (write) begin
|
||||
ram[waddr] <= ram_n;
|
||||
end
|
||||
prev_write <= (| wren);
|
||||
prev_data <= ram[waddr];
|
||||
prev_waddr <= waddr;
|
||||
end
|
||||
end
|
||||
if (LUTRAM || !NO_RWCHECK) begin
|
||||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
`UNUSED_VAR (prev_waddr)
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -103,9 +103,9 @@ module VX_elastic_buffer #(
|
|||
|
||||
assign ready_in = ~full;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE ((OUT_REG == 2) ? 1 : 0)
|
||||
.DEPTH ((OUT_REG > 0) ? (OUT_REG-1) : 0)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -177,10 +177,11 @@ module VX_fifo_queue #(
|
|||
.SIZE (DEPTH),
|
||||
.LUTRAM (LUTRAM)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (wr_ptr_r),
|
||||
.wdata (data_in),
|
||||
.raddr (rd_ptr_r),
|
||||
|
@ -226,9 +227,10 @@ module VX_fifo_queue #(
|
|||
.LUTRAM (LUTRAM)
|
||||
) dp_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (wr_ptr_r),
|
||||
.wdata (data_in),
|
||||
.raddr (rd_ptr_n_r),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -24,17 +24,17 @@ module VX_index_buffer #(
|
|||
input wire reset,
|
||||
|
||||
output wire [ADDRW-1:0] write_addr,
|
||||
input wire [DATAW-1:0] write_data,
|
||||
input wire [DATAW-1:0] write_data,
|
||||
input wire acquire_en,
|
||||
|
||||
input wire [ADDRW-1:0] read_addr,
|
||||
output wire [DATAW-1:0] read_data,
|
||||
input wire release_en,
|
||||
|
||||
|
||||
output wire empty,
|
||||
output wire full
|
||||
output wire full
|
||||
);
|
||||
|
||||
|
||||
VX_allocator #(
|
||||
.SIZE (SIZE)
|
||||
) allocator (
|
||||
|
@ -43,9 +43,9 @@ module VX_index_buffer #(
|
|||
.acquire_en (acquire_en),
|
||||
.acquire_addr (write_addr),
|
||||
.release_en (release_en),
|
||||
.release_addr (read_addr),
|
||||
.release_addr (read_addr),
|
||||
.empty (empty),
|
||||
.full (full)
|
||||
.full (full)
|
||||
);
|
||||
|
||||
VX_dp_ram #(
|
||||
|
@ -54,14 +54,15 @@ module VX_index_buffer #(
|
|||
.LUTRAM (LUTRAM)
|
||||
) data_table (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (acquire_en),
|
||||
`UNUSED_PIN (wren),
|
||||
.wren (1'b1),
|
||||
.waddr (write_addr),
|
||||
.wdata (write_data),
|
||||
.raddr (read_addr),
|
||||
.rdata (read_data)
|
||||
);
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -15,10 +15,10 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_mem_adapter #(
|
||||
parameter SRC_DATA_WIDTH = 1,
|
||||
parameter SRC_ADDR_WIDTH = 1,
|
||||
parameter DST_DATA_WIDTH = 1,
|
||||
parameter DST_ADDR_WIDTH = 1,
|
||||
parameter SRC_DATA_WIDTH = 1,
|
||||
parameter SRC_ADDR_WIDTH = 1,
|
||||
parameter DST_DATA_WIDTH = 1,
|
||||
parameter DST_ADDR_WIDTH = 1,
|
||||
parameter SRC_TAG_WIDTH = 1,
|
||||
parameter DST_TAG_WIDTH = 1,
|
||||
parameter REQ_OUT_BUF = 0,
|
||||
|
@ -35,9 +35,9 @@ module VX_mem_adapter #(
|
|||
input wire [SRC_TAG_WIDTH-1:0] mem_req_tag_in,
|
||||
output wire mem_req_ready_in,
|
||||
|
||||
output wire mem_rsp_valid_in,
|
||||
output wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in,
|
||||
output wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in,
|
||||
output wire mem_rsp_valid_in,
|
||||
output wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in,
|
||||
output wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in,
|
||||
input wire mem_rsp_ready_in,
|
||||
|
||||
output wire mem_req_valid_out,
|
||||
|
@ -48,12 +48,12 @@ module VX_mem_adapter #(
|
|||
output wire [DST_TAG_WIDTH-1:0] mem_req_tag_out,
|
||||
input wire mem_req_ready_out,
|
||||
|
||||
input wire mem_rsp_valid_out,
|
||||
input wire [DST_DATA_WIDTH-1:0] mem_rsp_data_out,
|
||||
input wire mem_rsp_valid_out,
|
||||
input wire [DST_DATA_WIDTH-1:0] mem_rsp_data_out,
|
||||
input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_out,
|
||||
output wire mem_rsp_ready_out
|
||||
);
|
||||
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
|
||||
);
|
||||
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
|
||||
|
||||
localparam DST_DATA_SIZE = (DST_DATA_WIDTH / 8);
|
||||
localparam DST_LDATAW = `CLOG2(DST_DATA_WIDTH);
|
||||
|
@ -69,7 +69,7 @@ module VX_mem_adapter #(
|
|||
wire [DST_TAG_WIDTH-1:0] mem_req_tag_out_w;
|
||||
wire mem_req_ready_out_w;
|
||||
|
||||
wire mem_rsp_valid_in_w;
|
||||
wire mem_rsp_valid_in_w;
|
||||
wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in_w;
|
||||
wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in_w;
|
||||
wire mem_rsp_ready_in_w;
|
||||
|
@ -80,7 +80,7 @@ module VX_mem_adapter #(
|
|||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
|
||||
wire [D-1:0] req_idx = mem_req_addr_in[D-1:0];
|
||||
wire [D-1:0] rsp_idx = mem_rsp_tag_out[D-1:0];
|
||||
|
||||
|
@ -99,31 +99,31 @@ module VX_mem_adapter #(
|
|||
|
||||
assign mem_req_valid_out_w = mem_req_valid_in;
|
||||
assign mem_req_rw_out_w = mem_req_rw_in;
|
||||
assign mem_req_byteen_out_w = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3));
|
||||
assign mem_req_byteen_out_w = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3));
|
||||
assign mem_req_data_out_w = DST_DATA_WIDTH'(mem_req_data_in) << ((DST_LDATAW'(req_idx)) << SRC_LDATAW);
|
||||
assign mem_req_tag_out_w = DST_TAG_WIDTH'({mem_req_tag_in, req_idx});
|
||||
assign mem_req_ready_in = mem_req_ready_out_w;
|
||||
|
||||
assign mem_rsp_valid_in_w = mem_rsp_valid_out;
|
||||
assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx];
|
||||
assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx];
|
||||
assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]);
|
||||
assign mem_rsp_ready_out = mem_rsp_ready_in_w;
|
||||
|
||||
end else if (DST_LDATAW < SRC_LDATAW) begin
|
||||
|
||||
|
||||
reg [D-1:0] req_ctr, rsp_ctr;
|
||||
|
||||
reg [P-1:0][DST_DATA_WIDTH-1:0] mem_rsp_data_out_r, mem_rsp_data_out_n;
|
||||
|
||||
wire mem_req_out_fire = mem_req_valid_out && mem_req_ready_out;
|
||||
wire mem_rsp_in_fire = mem_rsp_valid_out && mem_rsp_ready_out;
|
||||
wire mem_rsp_in_fire = mem_rsp_valid_out && mem_rsp_ready_out;
|
||||
|
||||
wire [P-1:0][DST_DATA_WIDTH-1:0] mem_req_data_in_w = mem_req_data_in;
|
||||
wire [P-1:0][DST_DATA_SIZE-1:0] mem_req_byteen_in_w = mem_req_byteen_in;
|
||||
|
||||
always @(*) begin
|
||||
mem_rsp_data_out_n = mem_rsp_data_out_r;
|
||||
if (mem_rsp_in_fire) begin
|
||||
if (mem_rsp_in_fire) begin
|
||||
mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_out;
|
||||
end
|
||||
end
|
||||
|
@ -139,24 +139,24 @@ module VX_mem_adapter #(
|
|||
if (mem_rsp_in_fire) begin
|
||||
rsp_ctr <= rsp_ctr + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
mem_rsp_data_out_r <= mem_rsp_data_out_n;
|
||||
end
|
||||
|
||||
reg [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_r;
|
||||
wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_x;
|
||||
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (mem_rsp_in_fire) begin
|
||||
mem_rsp_tag_in_r <= mem_rsp_tag_out;
|
||||
end
|
||||
end
|
||||
end
|
||||
assign mem_rsp_tag_in_x = (rsp_ctr != 0) ? mem_rsp_tag_in_r : mem_rsp_tag_out;
|
||||
`RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_x == mem_rsp_tag_out),
|
||||
`RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_x == mem_rsp_tag_out),
|
||||
("%t: *** out-of-order memory reponse! cur=%d, expected=%d", $time, mem_rsp_tag_in_x, mem_rsp_tag_out))
|
||||
|
||||
wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr};
|
||||
|
||||
|
||||
if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin
|
||||
`UNUSED_VAR (mem_req_addr_in_qual)
|
||||
assign mem_req_addr_out_w = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0];
|
||||
|
@ -181,8 +181,8 @@ module VX_mem_adapter #(
|
|||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin
|
||||
`UNUSED_VAR (mem_req_addr_in)
|
||||
assign mem_req_addr_out_w = mem_req_addr_in[DST_ADDR_WIDTH-1:0];
|
||||
|
@ -206,13 +206,15 @@ module VX_mem_adapter #(
|
|||
|
||||
end
|
||||
|
||||
`RESET_RELAY (req_out_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + DST_DATA_SIZE + DST_ADDR_WIDTH + DST_DATA_WIDTH + DST_TAG_WIDTH),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF))
|
||||
) req_out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (req_out_reset),
|
||||
.valid_in (mem_req_valid_out_w),
|
||||
.ready_in (mem_req_ready_out_w),
|
||||
.data_in ({mem_req_rw_out_w, mem_req_byteen_out_w, mem_req_addr_out_w, mem_req_data_out_w, mem_req_tag_out_w}),
|
||||
|
@ -221,13 +223,15 @@ module VX_mem_adapter #(
|
|||
.ready_out (mem_req_ready_out)
|
||||
);
|
||||
|
||||
`RESET_RELAY (rsp_in_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (SRC_DATA_WIDTH + SRC_TAG_WIDTH),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(RSP_OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(RSP_OUT_BUF))
|
||||
) rsp_in_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (rsp_in_reset),
|
||||
.valid_in (mem_rsp_valid_in_w),
|
||||
.ready_in (mem_rsp_ready_in_w),
|
||||
.data_in ({mem_rsp_data_in_w, mem_rsp_tag_in_w}),
|
||||
|
|
|
@ -87,16 +87,16 @@ module VX_mem_coalescer #(
|
|||
localparam STATE_SETUP = 0;
|
||||
localparam STATE_SEND = 1;
|
||||
|
||||
reg state_r, state_n;
|
||||
logic state_r, state_n;
|
||||
|
||||
reg out_req_valid_r, out_req_valid_n;
|
||||
reg out_req_rw_r, out_req_rw_n;
|
||||
reg [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
|
||||
reg [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
|
||||
reg [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
|
||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
|
||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
|
||||
reg [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
|
||||
logic out_req_valid_r, out_req_valid_n;
|
||||
logic out_req_rw_r, out_req_rw_n;
|
||||
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
|
||||
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
|
||||
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
|
||||
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
|
||||
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
|
||||
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
|
||||
|
||||
reg in_req_ready_n;
|
||||
|
||||
|
@ -149,29 +149,6 @@ module VX_mem_coalescer #(
|
|||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state_r <= STATE_SETUP;
|
||||
processed_mask_r <= '0;
|
||||
out_req_valid_r <= 0;
|
||||
end else begin
|
||||
state_r <= state_n;
|
||||
batch_valid_r <= batch_valid_n;
|
||||
seed_addr_r <= seed_addr_n;
|
||||
seed_atype_r <= seed_atype_n;
|
||||
addr_matches_r <= addr_matches_n;
|
||||
out_req_valid_r <= out_req_valid_n;
|
||||
out_req_mask_r <= out_req_mask_n;
|
||||
out_req_rw_r <= out_req_rw_n;
|
||||
out_req_addr_r <= out_req_addr_n;
|
||||
out_req_atype_r <= out_req_atype_n;
|
||||
out_req_byteen_r <= out_req_byteen_n;
|
||||
out_req_data_r <= out_req_data_n;
|
||||
out_req_tag_r <= out_req_tag_n;
|
||||
processed_mask_r <= processed_mask_n;
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r;
|
||||
|
||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged;
|
||||
|
@ -248,6 +225,19 @@ module VX_mem_coalescer #(
|
|||
endcase
|
||||
end
|
||||
|
||||
`RESET_RELAY (pipe_reset, reset);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + ATYPE_WIDTH + OUT_ADDR_WIDTH + ATYPE_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
|
||||
.RESETW (1 + NUM_REQS + 1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (pipe_reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_atype_n, out_req_addr_n, out_req_atype_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}),
|
||||
.data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_atype_r, out_req_addr_r, out_req_atype_r, out_req_byteen_r, out_req_data_r, out_req_tag_r})
|
||||
);
|
||||
|
||||
wire out_rsp_fire = out_rsp_valid && out_rsp_ready;
|
||||
|
||||
wire out_rsp_eop;
|
||||
|
|
|
@ -167,13 +167,15 @@ module VX_mem_scheduler #(
|
|||
assign reqq_tag_u = ibuf_waddr;
|
||||
end
|
||||
|
||||
`RESET_RELAY (reqq_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + ATYPE_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
|
||||
.SIZE (CORE_QUEUE_SIZE),
|
||||
.OUT_REG (1)
|
||||
) req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (reqq_reset),
|
||||
.valid_in (reqq_valid_in),
|
||||
.ready_in (reqq_ready_in),
|
||||
.data_in ({core_req_rw, core_req_mask, core_req_byteen, core_req_addr, core_req_atype, core_req_data, reqq_tag_u}),
|
||||
|
@ -389,13 +391,15 @@ module VX_mem_scheduler #(
|
|||
|
||||
assign reqq_ready_s = req_sent_all;
|
||||
|
||||
`RESET_RELAY (mem_req_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + ATYPE_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mem_req_reset),
|
||||
.valid_in (mem_req_valid_s),
|
||||
.ready_in (mem_req_ready_s),
|
||||
.data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_atype_s, mem_req_data_s, mem_req_tag_s}),
|
||||
|
@ -509,13 +513,15 @@ module VX_mem_scheduler #(
|
|||
|
||||
// Send response to caller
|
||||
|
||||
`RESET_RELAY (crsp_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (CORE_REQS + 1 + 1 + (CORE_REQS * WORD_WIDTH) + TAG_WIDTH),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(CORE_OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (crsp_reset),
|
||||
.valid_in (crsp_valid),
|
||||
.ready_in (crsp_ready),
|
||||
.data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -23,13 +23,13 @@ module VX_onehot_encoder #(
|
|||
parameter MODEL = 1,
|
||||
parameter LN = `LOG2UP(N)
|
||||
) (
|
||||
input wire [N-1:0] data_in,
|
||||
input wire [N-1:0] data_in,
|
||||
output wire [LN-1:0] data_out,
|
||||
output wire valid_out
|
||||
);
|
||||
);
|
||||
if (N == 1) begin
|
||||
|
||||
assign data_out = data_in;
|
||||
assign data_out = 0;
|
||||
assign valid_out = data_in;
|
||||
|
||||
end else if (N == 2) begin
|
||||
|
@ -37,43 +37,43 @@ module VX_onehot_encoder #(
|
|||
assign data_out = data_in[!REVERSE];
|
||||
assign valid_out = (| data_in);
|
||||
|
||||
end else if (MODEL == 1) begin
|
||||
localparam M = 1 << LN;
|
||||
`IGNORE_UNOPTFLAT_BEGIN
|
||||
end else if (MODEL == 1) begin
|
||||
localparam M = 1 << LN;
|
||||
`IGNORE_UNOPTFLAT_BEGIN
|
||||
wire [LN-1:0][M-1:0] addr;
|
||||
wire [LN:0][M-1:0] v;
|
||||
`IGNORE_UNOPTFLAT_END
|
||||
|
||||
|
||||
// base case, also handle padding for non-power of two inputs
|
||||
assign v[0] = REVERSE ? (M'(data_in) << (M - N)) : M'(data_in);
|
||||
|
||||
|
||||
for (genvar lvl = 1; lvl < (LN+1); ++lvl) begin
|
||||
localparam SN = 1 << (LN - lvl);
|
||||
localparam SI = M / SN;
|
||||
localparam SW = lvl;
|
||||
|
||||
|
||||
for (genvar s = 0; s < SN; ++s) begin
|
||||
`IGNORE_UNOPTFLAT_BEGIN
|
||||
wire [1:0] vs = {v[lvl-1][s*SI+(SI>>1)], v[lvl-1][s*SI]};
|
||||
`IGNORE_UNOPTFLAT_END
|
||||
|
||||
|
||||
assign v[lvl][s*SI] = (| vs);
|
||||
|
||||
if (lvl == 1) begin
|
||||
assign addr[lvl-1][s*SI +: SW] = vs[!REVERSE];
|
||||
assign addr[lvl-1][s*SI +: SW] = vs[!REVERSE];
|
||||
end else begin
|
||||
assign addr[lvl-1][s*SI +: SW] = {
|
||||
assign addr[lvl-1][s*SI +: SW] = {
|
||||
vs[!REVERSE],
|
||||
addr[lvl-2][s*SI +: SW-1] | addr[lvl-2][s*SI+(SI>>1) +: SW-1]
|
||||
};
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = addr[LN-1][LN-1:0];
|
||||
assign valid_out = v[LN][0];
|
||||
|
||||
end else if (MODEL == 2 && REVERSE == 0) begin
|
||||
end else if (MODEL == 2 && REVERSE == 0) begin
|
||||
|
||||
for (genvar j = 0; j < LN; ++j) begin
|
||||
wire [N-1:0] mask;
|
||||
|
@ -90,19 +90,19 @@ module VX_onehot_encoder #(
|
|||
reg [LN-1:0] index_r;
|
||||
|
||||
if (REVERSE != 0) begin
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
for (integer i = N-1; i >= 0; --i) begin
|
||||
if (data_in[i]) begin
|
||||
if (data_in[i]) begin
|
||||
index_r = LN'(N-1-i);
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
for (integer i = 0; i < N; ++i) begin
|
||||
if (data_in[i]) begin
|
||||
if (data_in[i]) begin
|
||||
index_r = LN'(i);
|
||||
end
|
||||
end
|
||||
|
|
|
@ -17,7 +17,8 @@
|
|||
module VX_onehot_mux #(
|
||||
parameter DATAW = 1,
|
||||
parameter N = 1,
|
||||
parameter MODEL = 1
|
||||
parameter MODEL = 1,
|
||||
parameter LUT_OPT = 1
|
||||
) (
|
||||
input wire [N-1:0][DATAW-1:0] data_in,
|
||||
input wire [N-1:0] sel_in,
|
||||
|
@ -26,6 +27,90 @@ module VX_onehot_mux #(
|
|||
if (N == 1) begin
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = data_in;
|
||||
end else if (LUT_OPT && N == 2) begin
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = sel_in[0] ? data_in[0] : data_in[1];
|
||||
end else if (LUT_OPT && N == 3) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
3'b001: data_out_r = data_in[0];
|
||||
3'b010: data_out_r = data_in[1];
|
||||
3'b100: data_out_r = data_in[2];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 4) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
4'b0001: data_out_r = data_in[0];
|
||||
4'b0010: data_out_r = data_in[1];
|
||||
4'b0100: data_out_r = data_in[2];
|
||||
4'b1000: data_out_r = data_in[3];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 5) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
5'b00001: data_out_r = data_in[0];
|
||||
5'b00010: data_out_r = data_in[1];
|
||||
5'b00100: data_out_r = data_in[2];
|
||||
5'b01000: data_out_r = data_in[3];
|
||||
5'b10000: data_out_r = data_in[4];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 6) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
6'b000001: data_out_r = data_in[0];
|
||||
6'b000010: data_out_r = data_in[1];
|
||||
6'b000100: data_out_r = data_in[2];
|
||||
6'b001000: data_out_r = data_in[3];
|
||||
6'b010000: data_out_r = data_in[4];
|
||||
6'b100000: data_out_r = data_in[5];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 7) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
7'b0000001: data_out_r = data_in[0];
|
||||
7'b0000010: data_out_r = data_in[1];
|
||||
7'b0000100: data_out_r = data_in[2];
|
||||
7'b0001000: data_out_r = data_in[3];
|
||||
7'b0010000: data_out_r = data_in[4];
|
||||
7'b0100000: data_out_r = data_in[5];
|
||||
7'b1000000: data_out_r = data_in[6];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 8) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
8'b00000001: data_out_r = data_in[0];
|
||||
8'b00000010: data_out_r = data_in[1];
|
||||
8'b00000100: data_out_r = data_in[2];
|
||||
8'b00001000: data_out_r = data_in[3];
|
||||
8'b00010000: data_out_r = data_in[4];
|
||||
8'b00100000: data_out_r = data_in[5];
|
||||
8'b01000000: data_out_r = data_in[6];
|
||||
8'b10000000: data_out_r = data_in[7];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (MODEL == 1) begin
|
||||
wire [N-1:0][DATAW-1:0] mask;
|
||||
for (genvar i = 0; i < N; ++i) begin
|
||||
|
|
|
@ -21,7 +21,8 @@ module VX_pe_serializer #(
|
|||
parameter DATA_IN_WIDTH = 1,
|
||||
parameter DATA_OUT_WIDTH = 1,
|
||||
parameter TAG_WIDTH = 0,
|
||||
parameter PE_REG = 0
|
||||
parameter PE_REG = 0,
|
||||
parameter OUT_BUF = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -43,6 +44,11 @@ module VX_pe_serializer #(
|
|||
output wire [TAG_WIDTH-1:0] tag_out,
|
||||
input wire ready_out
|
||||
);
|
||||
wire valid_out_u;
|
||||
wire [NUM_LANES-1:0][DATA_OUT_WIDTH-1:0] data_out_u;
|
||||
wire [TAG_WIDTH-1:0] tag_out_u;
|
||||
wire ready_out_u;
|
||||
|
||||
wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s;
|
||||
wire valid_out_s;
|
||||
wire [TAG_WIDTH-1:0] tag_out_s;
|
||||
|
@ -105,7 +111,7 @@ module VX_pe_serializer #(
|
|||
reg [TAG_WIDTH-1:0] tag_out_r;
|
||||
|
||||
wire valid_out_b = valid_out_s && batch_out_done;
|
||||
wire ready_out_b = ready_out || ~valid_out;
|
||||
wire ready_out_b = ready_out_u || ~valid_out_u;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -119,29 +125,44 @@ module VX_pe_serializer #(
|
|||
end
|
||||
end
|
||||
|
||||
assign enable = ready_out_b || ~valid_out_b;
|
||||
assign ready_in = enable && batch_in_done;
|
||||
assign enable = ready_out_b || ~valid_out_b;
|
||||
assign ready_in = enable && batch_in_done;
|
||||
assign pe_enable = enable;
|
||||
|
||||
assign pe_enable = enable;
|
||||
|
||||
assign valid_out = valid_out_r;
|
||||
assign data_out = data_out_r;
|
||||
assign tag_out = tag_out_r;
|
||||
assign valid_out_u = valid_out_r;
|
||||
assign data_out_u = data_out_r;
|
||||
assign tag_out_u = tag_out_r;
|
||||
|
||||
end else begin
|
||||
|
||||
assign pe_data_in_s = data_in;
|
||||
|
||||
assign enable = ready_out || ~valid_out;
|
||||
assign ready_in = enable;
|
||||
assign enable = ready_out_u || ~valid_out_u;
|
||||
assign ready_in = enable;
|
||||
assign pe_enable = enable;
|
||||
|
||||
assign pe_enable = enable;
|
||||
|
||||
assign valid_out = valid_out_s;
|
||||
assign data_out = pe_data_out;
|
||||
assign tag_out = tag_out_s;
|
||||
assign valid_out_u = valid_out_s;
|
||||
assign data_out_u = pe_data_out;
|
||||
assign tag_out_u = tag_out_s;
|
||||
|
||||
end
|
||||
|
||||
`RESET_RELAY (out_buf_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (NUM_LANES * DATA_OUT_WIDTH + TAG_WIDTH),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (out_buf_reset),
|
||||
.valid_in (valid_out_u),
|
||||
.ready_in (ready_out_u),
|
||||
.data_in ({data_out_u, tag_out_u}),
|
||||
.data_out ({data_out, tag_out}),
|
||||
.valid_out (valid_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
// Copyright 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -24,39 +24,53 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_pipe_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter PASSTHRU = 0
|
||||
) (
|
||||
parameter DATAW = 1,
|
||||
parameter DEPTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
output wire ready_in,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [DATAW-1:0] data_out,
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
if (PASSTHRU != 0) begin
|
||||
);
|
||||
if (DEPTH == 0) begin
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
assign ready_in = ready_out;
|
||||
assign valid_out = valid_in;
|
||||
assign valid_out = valid_in;
|
||||
assign data_out = data_in;
|
||||
end else begin
|
||||
wire stall = valid_out && ~ready_out;
|
||||
wire [DEPTH:0] valid;
|
||||
`IGNORE_UNOPTFLAT_BEGIN
|
||||
wire [DEPTH:0] ready;
|
||||
`IGNORE_UNOPTFLAT_END
|
||||
wire [DEPTH:0][DATAW-1:0] data;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + DATAW),
|
||||
.RESETW (1)
|
||||
) pipe_register (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in, data_in}),
|
||||
.data_out ({valid_out, data_out})
|
||||
);
|
||||
assign valid[0] = valid_in;
|
||||
assign data[0] = data_in;
|
||||
assign ready_in = ready[0];
|
||||
|
||||
for (genvar i = 0; i < DEPTH; ++i) begin
|
||||
assign ready[i] = (ready[i+1] || ~valid[i+1]);
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + DATAW),
|
||||
.RESETW (1)
|
||||
) pipe_register (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (ready[i]),
|
||||
.data_in ({valid[i], data[i]}),
|
||||
.data_out ({valid[i+1], data[i+1]})
|
||||
);
|
||||
end
|
||||
|
||||
assign valid_out = valid[DEPTH];
|
||||
assign data_out = data[DEPTH];
|
||||
assign ready[DEPTH] = ready_out;
|
||||
|
||||
assign ready_in = ~stall;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -23,12 +23,14 @@ module VX_sp_ram #(
|
|||
parameter NO_RWCHECK = 0,
|
||||
parameter RW_ASSERT = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter RESET_RAM = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire read,
|
||||
input wire write,
|
||||
input wire [WRENW-1:0] wren,
|
||||
|
@ -45,12 +47,14 @@ module VX_sp_ram #(
|
|||
.NO_RWCHECK (NO_RWCHECK),
|
||||
.RW_ASSERT (RW_ASSERT),
|
||||
.LUTRAM (LUTRAM),
|
||||
.RESET_RAM (RESET_RAM),
|
||||
.INIT_ENABLE (INIT_ENABLE),
|
||||
.INIT_FILE (INIT_FILE),
|
||||
.INIT_VALUE (INIT_VALUE),
|
||||
.ADDRW (ADDRW)
|
||||
) dp_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (read),
|
||||
.write (write),
|
||||
.wren (wren),
|
||||
|
|
|
@ -18,7 +18,7 @@ module VX_stream_arb #(
|
|||
parameter NUM_INPUTS = 1,
|
||||
parameter NUM_OUTPUTS = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter `STRING ARBITER = "P",
|
||||
parameter `STRING ARBITER = "R",
|
||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||
parameter OUT_BUF = 0,
|
||||
parameter LUTRAM = 0,
|
||||
|
@ -46,14 +46,14 @@ module VX_stream_arb #(
|
|||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
|
||||
localparam BATCH_BEGIN = i * NUM_REQS;
|
||||
localparam BATCH_END = `MIN(BATCH_BEGIN + NUM_REQS, NUM_INPUTS);
|
||||
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
|
||||
localparam SLICE_BEGIN = i * NUM_REQS;
|
||||
localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_INPUTS);
|
||||
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (BATCH_SIZE),
|
||||
.NUM_INPUTS (SLICE_SIZE),
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
|
@ -63,9 +63,9 @@ module VX_stream_arb #(
|
|||
) arb_slice (
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
.valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.data_in (data_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]),
|
||||
.ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]),
|
||||
.data_in (data_in[SLICE_END-1: SLICE_BEGIN]),
|
||||
.data_out (data_out[i]),
|
||||
.sel_out (sel_out[i]),
|
||||
.valid_out (valid_out[i]),
|
||||
|
@ -77,28 +77,28 @@ module VX_stream_arb #(
|
|||
|
||||
// (#inputs > max_fanout) and (#outputs == 1)
|
||||
|
||||
localparam NUM_BATCHES = `CDIV(NUM_INPUTS, MAX_FANOUT);
|
||||
localparam NUM_SLICES = `CDIV(NUM_INPUTS, MAX_FANOUT);
|
||||
localparam LOG_NUM_REQS2 = `CLOG2(MAX_FANOUT);
|
||||
localparam LOG_NUM_REQS3 = `CLOG2(NUM_BATCHES);
|
||||
localparam LOG_NUM_REQS3 = `CLOG2(NUM_SLICES);
|
||||
|
||||
wire [NUM_BATCHES-1:0] valid_tmp;
|
||||
wire [NUM_BATCHES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp;
|
||||
wire [NUM_BATCHES-1:0] ready_tmp;
|
||||
wire [NUM_SLICES-1:0] valid_tmp;
|
||||
wire [NUM_SLICES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp;
|
||||
wire [NUM_SLICES-1:0] ready_tmp;
|
||||
|
||||
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_SLICES; ++i) begin
|
||||
|
||||
localparam BATCH_BEGIN = i * MAX_FANOUT;
|
||||
localparam BATCH_END = `MIN(BATCH_BEGIN + MAX_FANOUT, NUM_INPUTS);
|
||||
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
|
||||
localparam SLICE_BEGIN = i * MAX_FANOUT;
|
||||
localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_INPUTS);
|
||||
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
|
||||
|
||||
wire [DATAW-1:0] data_tmp_u;
|
||||
wire [`LOG2UP(BATCH_SIZE)-1:0] sel_tmp_u;
|
||||
wire [`LOG2UP(SLICE_SIZE)-1:0] sel_tmp_u;
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
if (MAX_FANOUT != 1) begin
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (BATCH_SIZE),
|
||||
.NUM_INPUTS (SLICE_SIZE),
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
|
@ -108,9 +108,9 @@ module VX_stream_arb #(
|
|||
) fanout_slice_arb (
|
||||
.clk (clk),
|
||||
.reset (slice_reset),
|
||||
.valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.data_in (data_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]),
|
||||
.valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]),
|
||||
.data_in (data_in[SLICE_END-1: SLICE_BEGIN]),
|
||||
.ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]),
|
||||
.valid_out (valid_tmp[i]),
|
||||
.data_out (data_tmp_u),
|
||||
.sel_out (sel_tmp_u),
|
||||
|
@ -125,7 +125,7 @@ module VX_stream_arb #(
|
|||
wire [LOG_NUM_REQS3-1:0] sel_out_u;
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BATCHES),
|
||||
.NUM_INPUTS (NUM_SLICES),
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW + LOG_NUM_REQS2),
|
||||
.ARBITER (ARBITER),
|
||||
|
@ -214,15 +214,15 @@ module VX_stream_arb #(
|
|||
|
||||
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
|
||||
|
||||
localparam BATCH_BEGIN = i * NUM_REQS;
|
||||
localparam BATCH_END = `MIN(BATCH_BEGIN + NUM_REQS, NUM_OUTPUTS);
|
||||
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
|
||||
localparam SLICE_BEGIN = i * NUM_REQS;
|
||||
localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_OUTPUTS);
|
||||
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (1),
|
||||
.NUM_OUTPUTS (BATCH_SIZE),
|
||||
.NUM_OUTPUTS (SLICE_SIZE),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
|
@ -234,13 +234,13 @@ module VX_stream_arb #(
|
|||
.valid_in (valid_in[i]),
|
||||
.ready_in (ready_in[i]),
|
||||
.data_in (data_in[i]),
|
||||
.data_out (data_out[BATCH_END-1: BATCH_BEGIN]),
|
||||
.valid_out (valid_out[BATCH_END-1: BATCH_BEGIN]),
|
||||
.ready_out (ready_out[BATCH_END-1: BATCH_BEGIN]),
|
||||
.data_out (data_out[SLICE_END-1: SLICE_BEGIN]),
|
||||
.valid_out (valid_out[SLICE_END-1: SLICE_BEGIN]),
|
||||
.ready_out (ready_out[SLICE_END-1: SLICE_BEGIN]),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
for (genvar j = BATCH_BEGIN; j < BATCH_END; ++j) begin
|
||||
for (genvar j = SLICE_BEGIN; j < SLICE_END; ++j) begin
|
||||
assign sel_out[j] = i;
|
||||
end
|
||||
end
|
||||
|
@ -249,15 +249,15 @@ module VX_stream_arb #(
|
|||
|
||||
// (#inputs == 1) and (#outputs > max_fanout)
|
||||
|
||||
localparam NUM_BATCHES = `CDIV(NUM_OUTPUTS, MAX_FANOUT);
|
||||
localparam NUM_SLICES = `CDIV(NUM_OUTPUTS, MAX_FANOUT);
|
||||
|
||||
wire [NUM_BATCHES-1:0] valid_tmp;
|
||||
wire [NUM_BATCHES-1:0][DATAW-1:0] data_tmp;
|
||||
wire [NUM_BATCHES-1:0] ready_tmp;
|
||||
wire [NUM_SLICES-1:0] valid_tmp;
|
||||
wire [NUM_SLICES-1:0][DATAW-1:0] data_tmp;
|
||||
wire [NUM_SLICES-1:0] ready_tmp;
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (1),
|
||||
.NUM_OUTPUTS (NUM_BATCHES),
|
||||
.NUM_OUTPUTS (NUM_SLICES),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
|
@ -275,17 +275,17 @@ module VX_stream_arb #(
|
|||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_SLICES; ++i) begin
|
||||
|
||||
localparam BATCH_BEGIN = i * MAX_FANOUT;
|
||||
localparam BATCH_END = `MIN(BATCH_BEGIN + MAX_FANOUT, NUM_OUTPUTS);
|
||||
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
|
||||
localparam SLICE_BEGIN = i * MAX_FANOUT;
|
||||
localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_OUTPUTS);
|
||||
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
|
||||
|
||||
`RESET_RELAY (slice_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (1),
|
||||
.NUM_OUTPUTS (BATCH_SIZE),
|
||||
.NUM_OUTPUTS (SLICE_SIZE),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
|
@ -297,9 +297,9 @@ module VX_stream_arb #(
|
|||
.valid_in (valid_tmp[i]),
|
||||
.ready_in (ready_tmp[i]),
|
||||
.data_in (data_tmp[i]),
|
||||
.data_out (data_out[BATCH_END-1: BATCH_BEGIN]),
|
||||
.valid_out (valid_out[BATCH_END-1: BATCH_BEGIN]),
|
||||
.ready_out (ready_out[BATCH_END-1: BATCH_BEGIN]),
|
||||
.data_out (data_out[SLICE_END-1: SLICE_BEGIN]),
|
||||
.valid_out (valid_out[SLICE_END-1: SLICE_BEGIN]),
|
||||
.ready_out (ready_out[SLICE_END-1: SLICE_BEGIN]),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
end
|
||||
|
|
|
@ -20,7 +20,7 @@ module VX_stream_xbar #(
|
|||
parameter DATAW = 4,
|
||||
parameter IN_WIDTH = `LOG2UP(NUM_INPUTS),
|
||||
parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS),
|
||||
parameter ARBITER = "P",
|
||||
parameter ARBITER = "R",
|
||||
parameter OUT_BUF = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||
|
|
|
@ -94,7 +94,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_all;
|
||||
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_aos;
|
||||
|
||||
wire [NUM_REQS-1:0] req_valid_in;
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
|
||||
|
@ -111,7 +111,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
req_bank_addr[i],
|
||||
mem_bus_if[i].req_data.byteen,
|
||||
mem_bus_if[i].req_data.data,
|
||||
mem_bus_if[i].req_data.tag};
|
||||
mem_bus_if[i].req_data.tag
|
||||
};
|
||||
assign mem_bus_if[i].req_ready = req_ready_in[i];
|
||||
end
|
||||
|
||||
|
@ -120,6 +121,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (REQ_DATAW),
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (3) // output should be registered for the data_store addressing
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
|
@ -134,7 +136,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
.sel_in (req_bank_idx),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (per_bank_req_valid),
|
||||
.data_out (per_bank_req_data_all),
|
||||
.data_out (per_bank_req_data_aos),
|
||||
.sel_out (per_bank_req_idx),
|
||||
.ready_out (per_bank_req_ready)
|
||||
);
|
||||
|
@ -145,7 +147,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
per_bank_req_addr[i],
|
||||
per_bank_req_byteen[i],
|
||||
per_bank_req_data[i],
|
||||
per_bank_req_tag[i]} = per_bank_req_data_all[i];
|
||||
per_bank_req_tag[i]
|
||||
} = per_bank_req_data_aos[i];
|
||||
end
|
||||
|
||||
// banks access
|
||||
|
@ -156,38 +159,55 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_rsp_ready;
|
||||
|
||||
`RESET_RELAY (bank_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
wire bank_rsp_valid, bank_rsp_ready;
|
||||
wire [WORD_WIDTH-1:0] bank_rsp_data;
|
||||
|
||||
`RESET_RELAY (bram_reset, reset);
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (WORD_WIDTH),
|
||||
.SIZE (WORDS_PER_BANK),
|
||||
.WRENW (WORD_SIZE)
|
||||
.WRENW (WORD_SIZE),
|
||||
.NO_RWCHECK (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.reset (bram_reset),
|
||||
.read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]),
|
||||
.write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]),
|
||||
.wren (per_bank_req_byteen[i]),
|
||||
.addr (per_bank_req_addr[i]),
|
||||
.wdata (per_bank_req_data[i]),
|
||||
.rdata (per_bank_rsp_data[i])
|
||||
.rdata (bank_rsp_data)
|
||||
);
|
||||
|
||||
// drop write response
|
||||
wire per_bank_req_valid_w, per_bank_req_ready_w;
|
||||
assign per_bank_req_valid_w = per_bank_req_valid[i] && ~per_bank_req_rw[i];
|
||||
assign per_bank_req_ready[i] = per_bank_req_ready_w || per_bank_req_rw[i];
|
||||
// read-during-write hazard detection
|
||||
reg [BANK_ADDR_WIDTH-1:0] last_wr_addr;
|
||||
reg last_wr_valid;
|
||||
always @(posedge clk) begin
|
||||
if (bram_reset) begin
|
||||
last_wr_valid <= 0;
|
||||
end else begin
|
||||
last_wr_valid <= per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i];
|
||||
end
|
||||
last_wr_addr <= per_bank_req_addr[i];
|
||||
end
|
||||
wire is_rdw_hazard = last_wr_valid && ~per_bank_req_rw[i] && (per_bank_req_addr[i] == last_wr_addr);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (REQ_SEL_WIDTH + TAG_WIDTH),
|
||||
.SIZE (0)
|
||||
) bank_buf (
|
||||
// drop write response and stall on read-during-write hazard
|
||||
assign bank_rsp_valid = per_bank_req_valid[i] && ~per_bank_req_rw[i] && ~is_rdw_hazard;
|
||||
assign per_bank_req_ready[i] = (bank_rsp_ready || per_bank_req_rw[i]) && ~is_rdw_hazard;
|
||||
|
||||
// register BRAM output
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH)
|
||||
) bram_buf (
|
||||
.clk (clk),
|
||||
.reset (bank_reset),
|
||||
.valid_in (per_bank_req_valid_w),
|
||||
.ready_in (per_bank_req_ready_w),
|
||||
.data_in ({per_bank_req_idx[i], per_bank_req_tag[i]}),
|
||||
.data_out ({per_bank_rsp_idx[i], per_bank_rsp_tag[i]}),
|
||||
.reset (bram_reset),
|
||||
.valid_in (bank_rsp_valid),
|
||||
.ready_in (bank_rsp_ready),
|
||||
.data_in ({per_bank_req_idx[i], bank_rsp_data, per_bank_req_tag[i]}),
|
||||
.data_out ({per_bank_rsp_idx[i], per_bank_rsp_data[i], per_bank_rsp_tag[i]}),
|
||||
.valid_out (per_bank_rsp_valid[i]),
|
||||
.ready_out (per_bank_rsp_ready[i])
|
||||
);
|
||||
|
@ -195,10 +215,10 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
|
||||
// bank responses gather
|
||||
|
||||
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_all;
|
||||
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_aos;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign per_bank_rsp_data_all[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
|
||||
assign per_bank_rsp_data_aos[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] rsp_valid_out;
|
||||
|
@ -209,6 +229,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
.NUM_INPUTS (NUM_BANKS),
|
||||
.NUM_OUTPUTS (NUM_REQS),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("P"), // this priority arbiter has negligeable impact om performance
|
||||
.OUT_BUF (OUT_BUF)
|
||||
) rsp_xbar (
|
||||
.clk (clk),
|
||||
|
@ -216,7 +237,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
`UNUSED_PIN (collisions),
|
||||
.sel_in (per_bank_rsp_idx),
|
||||
.valid_in (per_bank_rsp_valid),
|
||||
.data_in (per_bank_rsp_data_all),
|
||||
.data_in (per_bank_rsp_data_aos),
|
||||
.ready_in (per_bank_rsp_ready),
|
||||
.valid_out (rsp_valid_out),
|
||||
.data_out (rsp_data_out),
|
||||
|
|
|
@ -73,10 +73,10 @@ ifneq ($(TARGET), fpga)
|
|||
CFLAGS += -DSIMULATION
|
||||
endif
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
ifneq ($(TARGET), fpga)
|
||||
CFLAGS += $(DBG_TRACE_FLAGS)
|
||||
CFLAGS += -DDEBUG_LEVEL=$(DEBUG) $(DBG_TRACE_FLAGS)
|
||||
else
|
||||
CFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
|
|
@ -45,6 +45,7 @@ FPGA_BIN_DIR=<bin_dir> XRT_DEVICE_INDEX=1 TARGET=hw ./ci/blackbox.sh --driver=xr
|
|||
|
||||
# build report logs
|
||||
<build_dir>/bin/vortex_afu.xclbin.info
|
||||
<build_dir>/_x/logs/link/vivado.log # search for keyword "Very high fanout"
|
||||
<build_dir>/_x/reports/link/link/imp/impl_1_full_util_routed.rpt
|
||||
<build_dir>/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt # search for keyword "VIOLATED"
|
||||
<build_dir>/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log
|
||||
|
|
|
@ -111,12 +111,12 @@ ifeq ($(TARGET), hw_emu)
|
|||
CFLAGS += -DSIMULATION
|
||||
endif
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
VPP_FLAGS += -g --debug.protocol all
|
||||
ifneq ($(TARGET), hw)
|
||||
VPP_FLAGS += --vivado.prop fileset.sim_1.xsim.elaborate.debug_level=all
|
||||
CFLAGS += $(DBG_TRACE_FLAGS)
|
||||
CFLAGS += -DDEBUG_LEVEL=$(DEBUG) $(DBG_TRACE_FLAGS)
|
||||
else
|
||||
CFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
|
|
@ -49,7 +49,7 @@ endif
|
|||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
|
||||
RTL_INCLUDE += $(FPU_INCLUDE)
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CFLAGS += $(DBG_TRACE_FLAGS)
|
||||
else
|
||||
|
|
|
@ -29,7 +29,7 @@ THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count()
|
|||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
|
|
|
@ -51,8 +51,10 @@ _start:
|
|||
# la t0, trap_entry
|
||||
# csrw mtvec, t0
|
||||
|
||||
#ifdef HAVE_INITFINI_ARRAY
|
||||
# run global initialization functions
|
||||
call __libc_init_array
|
||||
#endif
|
||||
|
||||
# call main program routine
|
||||
call main
|
||||
|
|
|
@ -122,8 +122,10 @@ void __libc_fini_array (void) {
|
|||
// This function will be called by LIBC at program exit.
|
||||
// Since this platform only support statically linked programs,
|
||||
// it is not required to support LIBC's exit functions registration via atexit().
|
||||
void __funcs_on_exit() {
|
||||
void __funcs_on_exit (void) {
|
||||
#ifdef HAVE_INITFINI_ARRAY
|
||||
__libc_fini_array();
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -30,7 +30,7 @@ else
|
|||
CXXFLAGS += -I$(SYN_DIR)
|
||||
endif
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
|
|
|
@ -19,7 +19,7 @@ LDFLAGS += -L$(DESTDIR) -lrtlsim
|
|||
|
||||
SRCS := $(SRC_DIR)/vortex.cpp
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
|
|
|
@ -15,7 +15,7 @@ LDFLAGS += -L$(DESTDIR) -lsimx
|
|||
|
||||
SRCS := $(SRC_DIR)/vortex.cpp
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
|
|
|
@ -12,7 +12,7 @@ LDFLAGS += -shared -pthread -ldl
|
|||
|
||||
SRCS := $(SRC_DIR)/vortex.cpp $(SRC_DIR)/utils.cpp
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
|
|
|
@ -314,7 +314,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
if (num_cores > 1) {
|
||||
uint64_t scrb_total = scrb_alu_per_core + scrb_fpu_per_core + scrb_lsu_per_core + scrb_csrs_per_core + scrb_wctl_per_core;
|
||||
int scrb_percent_per_core = calcAvgPercent(scrb_stalls_per_core, cycles_per_core);
|
||||
fprintf(stream, "PERF: core%d: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, scrs=%d%%, wctl=%d%%)\n"
|
||||
fprintf(stream, "PERF: core%d: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, csrs=%d%%, wctl=%d%%)\n"
|
||||
, core_id
|
||||
, scrb_stalls_per_core
|
||||
, scrb_percent_per_core
|
||||
|
@ -559,7 +559,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
fprintf(stream, "PERF: scheduler idle=%ld (%d%%)\n", sched_idles, sched_idles_percent);
|
||||
fprintf(stream, "PERF: scheduler stalls=%ld (%d%%)\n", sched_stalls, sched_stalls_percent);
|
||||
fprintf(stream, "PERF: ibuffer stalls=%ld (%d%%)\n", ibuffer_stalls, ibuffer_percent);
|
||||
fprintf(stream, "PERF: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, scrs=%d%%, wctl=%d%%)\n"
|
||||
fprintf(stream, "PERF: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, csrs=%d%%, wctl=%d%%)\n"
|
||||
, scrb_stalls
|
||||
, scrb_percent
|
||||
, calcAvgPercent(scrb_alu, scrb_total)
|
||||
|
|
|
@ -26,7 +26,7 @@ endif
|
|||
|
||||
PROJECT := libvortex-xrt.so
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
|
|
|
@ -83,7 +83,7 @@ THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count()
|
|||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
|
|
|
@ -65,7 +65,7 @@ THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count()
|
|||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
|
|
|
@ -20,7 +20,7 @@ LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulato
|
|||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG)
|
||||
#CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG) -fsanitize=address -fno-omit-frame-pointer
|
||||
|
|
|
@ -82,7 +82,7 @@ THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count()
|
|||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
|
|
|
@ -46,13 +46,15 @@ int test_global_memory() {
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int* lmem_addr = (int*)LMEM_BASE_ADDR;
|
||||
volatile int* lmem_addr = (int*)LMEM_BASE_ADDR;
|
||||
|
||||
int lmem_buffer[8];
|
||||
|
||||
void __attribute__((noinline)) do_lmem_wr() {
|
||||
unsigned tid = vx_thread_id();
|
||||
lmem_addr[tid] = 65 + tid;
|
||||
int x = lmem_addr[tid];
|
||||
lmem_addr[tid] = x;
|
||||
}
|
||||
|
||||
void __attribute__((noinline)) do_lmem_rd() {
|
||||
|
|
|
@ -44,7 +44,7 @@ CXXFLAGS += -I$(POCL_PATH)/include
|
|||
|
||||
POCL_CC_FLAGS += LLVM_PREFIX=$(LLVM_VORTEX) POCL_VORTEX_BINTOOL="$(VX_BINTOOL)" POCL_VORTEX_CFLAGS="$(VX_CFLAGS)" POCL_VORTEX_LDFLAGS="$(VX_LDFLAGS)"
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
POCL_CC_FLAGS += POCL_DEBUG=all
|
||||
|
|
|
@ -52,7 +52,7 @@ CXXFLAGS += -I$(VORTEX_RT_PATH)/include -I$(ROOT_DIR)/hw
|
|||
|
||||
LDFLAGS += -L$(ROOT_DIR)/runtime -lvortex
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
CXXFLAGS += -I$(VORTEX_RT_PATH)/common
|
||||
|
||||
# Debugigng
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue