mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
L2 and L1 using different block size support, RTLsim fixes, dram_rsp_ready optimization
This commit is contained in:
parent
a7da36c007
commit
1795980a52
50 changed files with 972 additions and 952 deletions
28
.travis.yml
28
.travis.yml
|
@ -20,19 +20,21 @@ install:
|
|||
- export PATH=$VERILATOR_ROOT/bin:$PATH
|
||||
|
||||
script:
|
||||
- make -j
|
||||
- ci/test_runtime.sh
|
||||
- ci/test_driver.sh
|
||||
- ci/test_riscv_isa.sh
|
||||
- ci/test_opencl.sh
|
||||
- ci/blackbox.sh -run_debug
|
||||
- ci/blackbox.sh -run_scope
|
||||
- ci/blackbox.sh -run_1c
|
||||
- ci/blackbox.sh -run_2c
|
||||
- ci/blackbox.sh -run_4c
|
||||
- ci/blackbox.sh -run_4c_l2
|
||||
- travis_wait 30 ci/blackbox.sh -run_4c_2l2_l3
|
||||
- travis_wait 30 ci/blackbox.sh -run_8c_4l2_l3
|
||||
- travis_wait 45 make
|
||||
- travis_wait 45 ci/test_runtime.sh
|
||||
- travis_wait 45 ci/test_driver.sh
|
||||
- travis_wait 45 ci/test_riscv_isa.sh
|
||||
- travis_wait 45 ci/test_opencl.sh
|
||||
- travis_wait 45 ci/blackbox.sh --driver=rtlsim
|
||||
- travis_wait 45 ci/blackbox.sh --driver=vlsim
|
||||
- travis_wait 45 ci/blackbox.sh --driver=vlsim --scope
|
||||
- travis_wait 45 ci/blackbox.sh --driver=vlsim --debug
|
||||
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=1
|
||||
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=2
|
||||
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=4
|
||||
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=4 --l2cache
|
||||
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=2 --l2cache --clusters=2
|
||||
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=2 --l2cache --clusters=4
|
||||
|
||||
after_success:
|
||||
# Gather code coverage
|
||||
|
|
209
ci/blackbox.sh
209
ci/blackbox.sh
|
@ -3,107 +3,130 @@
|
|||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
run_1c()
|
||||
show_usage()
|
||||
{
|
||||
# test single core
|
||||
make -C driver/opae/vlsim clean
|
||||
CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae/vlsim > /dev/null 2>&1
|
||||
make -C benchmarks/opencl/sgemm run-vlsim
|
||||
echo "Vortex BlackBox Test Driver v1.0"
|
||||
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--app=vecadd|sgemm|basic|demo|dogfood][--help]]"
|
||||
}
|
||||
|
||||
run_2c()
|
||||
{
|
||||
# test 2 cores
|
||||
make -C driver/opae/vlsim clean
|
||||
CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0" make -C driver/opae/vlsim > /dev/null 2>&1
|
||||
make -C benchmarks/opencl/sgemm run-vlsim
|
||||
}
|
||||
DRIVER=vlsim
|
||||
APP=sgemm
|
||||
CLUSTERS=1
|
||||
CORES=2
|
||||
WARPS=4
|
||||
THREADS=4
|
||||
L2=0
|
||||
DEBUG=0
|
||||
SCOPE=0
|
||||
|
||||
run_4c()
|
||||
{
|
||||
# test 4 cores
|
||||
make -C driver/opae/vlsim clean
|
||||
CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=0" make -C driver/opae/vlsim > /dev/null 2>&1
|
||||
make -C benchmarks/opencl/sgemm run-vlsim
|
||||
}
|
||||
for i in "$@"
|
||||
do
|
||||
case $i in
|
||||
--driver=*)
|
||||
DRIVER=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--app=*)
|
||||
APP=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--clusters=*)
|
||||
CLUSTERS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--cores=*)
|
||||
CORES=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--warps=*)
|
||||
WARPS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--threads=*)
|
||||
THREADS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--l2cache)
|
||||
L2=1
|
||||
shift
|
||||
;;
|
||||
--debug)
|
||||
DEBUG=1
|
||||
shift
|
||||
;;
|
||||
--scope)
|
||||
SCOPE=1
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
show_usage
|
||||
exit
|
||||
;;
|
||||
*)
|
||||
show_usage
|
||||
exit
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
run_4c_l2()
|
||||
{
|
||||
# test 4 cores with L2
|
||||
make -C driver/opae/vlsim clean
|
||||
CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1" make -C driver/opae/vlsim > /dev/null 2>&1
|
||||
make -C benchmarks/opencl/sgemm run-vlsim
|
||||
}
|
||||
case $DRIVER in
|
||||
rtlsim)
|
||||
DRIVER_PATH=driver/rtlsim
|
||||
DRIVER_EXTRA=
|
||||
;;
|
||||
vlsim)
|
||||
DRIVER_PATH=driver/opae
|
||||
DRIVER_EXTRA=vlsim
|
||||
;;
|
||||
*)
|
||||
echo "invalid driver: $DRIVER"
|
||||
exit
|
||||
;;
|
||||
esac
|
||||
|
||||
run_4c_2l2_l3()
|
||||
{
|
||||
# test 4 cores with L2 and L3
|
||||
make -C driver/opae/vlsim clean
|
||||
CONFIGS="-DNUM_CLUSTERS=2 -DNUM_CORES=2 -DL2_ENABLE=1" make -C driver/opae/vlsim > /dev/null 2>&1
|
||||
make -C benchmarks/opencl/sgemm run-vlsim
|
||||
}
|
||||
case $APP in
|
||||
sgemm)
|
||||
APP_PATH=benchmarks/opencl/sgemm
|
||||
;;
|
||||
vecadd)
|
||||
APP_PATH=benchmarks/opencl/vacadd
|
||||
;;
|
||||
basic)
|
||||
APP_PATH=driver/tests/basic
|
||||
;;
|
||||
demo)
|
||||
APP_PATH=driver/tests/demo
|
||||
;;
|
||||
dogfood)
|
||||
APP_PATH=driver/tests/dogfood
|
||||
;;
|
||||
*)
|
||||
echo "invalid app: $APP"
|
||||
exit
|
||||
;;
|
||||
esac
|
||||
|
||||
run_8c_4l2_l3()
|
||||
{
|
||||
# test 8 cores with L2 and L3
|
||||
make -C driver/opae/vlsim clean
|
||||
CONFIGS="-DNUM_CLUSTERS=4 -DNUM_CORES=2 -DL2_ENABLE=1" make -C driver/opae/vlsim > /dev/null 2>&1
|
||||
make -C benchmarks/opencl/sgemm run-vlsim
|
||||
}
|
||||
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2"
|
||||
|
||||
run_debug()
|
||||
{
|
||||
# test debug build
|
||||
make -C driver/opae/vlsim clean
|
||||
DEBUG=1 CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae/vlsim > /dev/null 2>&1
|
||||
make -C benchmarks/opencl/sgemm run-vlsim > /dev/null 2>&1
|
||||
}
|
||||
echo "CONFIGS=$CONFIGS"
|
||||
|
||||
run_scope()
|
||||
{
|
||||
# test build with scope analyzer
|
||||
make -C driver/opae clean
|
||||
SCOPE=1 CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae > /dev/null 2>&1
|
||||
make -C benchmarks/opencl/sgemm run-vlsim
|
||||
}
|
||||
make -C $DRIVER_PATH clean
|
||||
|
||||
usage()
|
||||
{
|
||||
echo "usage: blackbox [[-run_1c] [-run_2c] [-run_4c] [-run_4c_l2] [-run_4c_2l2_l3] [-run_8c_4l2_l3] [-run_debug] [-run_scope] [-all] [-h|--help]]"
|
||||
}
|
||||
if [[ $DEBUG -eq 1 ]]
|
||||
then
|
||||
if [[ $SCOPE -eq 1 ]]
|
||||
then
|
||||
DEBUG=1 SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH $DRIVER_EXTRA > build.log 2>&1
|
||||
else
|
||||
DEBUG=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH $DRIVER_EXTRA > build.log 2>&1
|
||||
fi
|
||||
else
|
||||
if [[ $SCOPE -eq 1 ]]
|
||||
then
|
||||
SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH $DRIVER_EXTRA > build.log 2>&1
|
||||
else
|
||||
CONFIGS="$CONFIGS" make -C $DRIVER_PATH $DRIVER_EXTRA > build.log 2>&1
|
||||
fi
|
||||
fi
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
-run_1c ) run_1c
|
||||
;;
|
||||
-run_2c ) run_2c
|
||||
;;
|
||||
-run_4c ) run_4c
|
||||
;;
|
||||
-run_4c_l2 ) run_4c_l2
|
||||
;;
|
||||
-run_4c_2l2_l3 ) run_4c_2l2_l3
|
||||
;;
|
||||
-run_8c_4l2_l3 ) run_8c_4l2_l3
|
||||
;;
|
||||
-run_debug ) run_debug
|
||||
;;
|
||||
-run_scope ) run_scope
|
||||
;;
|
||||
-all ) run_1c
|
||||
run_2c
|
||||
run_4c
|
||||
run_4c_l2
|
||||
run_4c_2l2_l3
|
||||
run_8c_4l2_l3
|
||||
run_debug
|
||||
run_scope
|
||||
;;
|
||||
-h | --help ) usage
|
||||
exit
|
||||
;;
|
||||
* ) usage
|
||||
exit 1
|
||||
esac
|
||||
shift
|
||||
done
|
||||
make -C $APP_PATH run-$DRIVER > run.log 2>&1
|
|
@ -1,7 +1,7 @@
|
|||
OPAE_HOME ?= /tools/opae/1.4.0
|
||||
|
||||
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw
|
||||
|
||||
|
@ -60,7 +60,7 @@ json: ../../hw/opae/vortex_afu.json
|
|||
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
|
||||
|
||||
fpga: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
|
||||
|
||||
asesim: $(SRCS) $(ASE_DIR)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
|
||||
CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CFLAGS += -I../../../../hw
|
||||
|
||||
|
@ -30,7 +30,7 @@ CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
|||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
CFLAGS += -DUSE_RTLSIM $(CONFIGS)
|
||||
CFLAGS += -DUSE_VLSIM $(CONFIGS)
|
||||
|
||||
CFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
|
@ -79,7 +79,7 @@ VL_FLAGS += -DNOPAE
|
|||
CFLAGS += -DNOPAE
|
||||
|
||||
# use DPI FPU
|
||||
#VL_FLAGS += -DFPU_FAST
|
||||
VL_FLAGS += -DFPU_FAST
|
||||
|
||||
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
|
||||
|
||||
|
|
|
@ -206,11 +206,10 @@ void opae_sim::sRxPort_bus() {
|
|||
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
|
||||
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->block.data(), CACHE_BLOCK_SIZE);
|
||||
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
|
||||
/*printf("*** [vlsim] read-rsp: addr=%ld, mdata=%d, data=", cci_rd_it->addr, cci_rd_it->mdata);
|
||||
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
|
||||
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
|
||||
printf("%02x", cci_rd_it->block[CACHE_BLOCK_SIZE-1-i]);
|
||||
printf("\n");*/
|
||||
fflush(stdout);
|
||||
cci_reads_.erase(cci_rd_it);
|
||||
}
|
||||
}
|
||||
|
@ -225,8 +224,7 @@ void opae_sim::sTxPort_bus() {
|
|||
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE);
|
||||
//printf("*** [vlsim] read-req: addr=%ld, mdata=%d\n", vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
|
||||
fflush(stdout);
|
||||
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
|
||||
cci_reads_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
|
@ -265,12 +263,12 @@ void opae_sim::avs_bus() {
|
|||
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
|
||||
uint32_t tag = dram_rd_it->tag;
|
||||
dram_reads_.erase(dram_rd_it);
|
||||
/*printf("%0ld: VLSIM: DRAM rsp: addr=%x, pending={", timestamp, tag);
|
||||
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, tag);
|
||||
for (auto& req : dram_reads_) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.tag);
|
||||
printf(" !%0x", req.tag);
|
||||
else
|
||||
printf(" %0x", req.tag);
|
||||
printf(" %0x", req.tag);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
|
@ -288,7 +286,8 @@ void opae_sim::avs_bus() {
|
|||
|
||||
// process DRAM requests
|
||||
if (!dram_stalled) {
|
||||
if (vortex_afu_->avs_write) {
|
||||
assert(!vortex_afu_->avs_read || !vortex_afu_->avs_write);
|
||||
if (vortex_afu_->avs_write) {
|
||||
assert(0 == vortex_afu_->mem_bank_select);
|
||||
uint64_t byteen = vortex_afu_->avs_byteenable;
|
||||
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
|
||||
|
@ -307,12 +306,12 @@ void opae_sim::avs_bus() {
|
|||
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
|
||||
dram_req.tag = base_addr;
|
||||
dram_reads_.emplace_back(dram_req);
|
||||
/*printf("%0ld: VLSIM: DRAM req: addr=%x, pending={", timestamp, base_addr);
|
||||
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, base_addr);
|
||||
for (auto& req : dram_reads_) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.tag);
|
||||
printf(" !%0x", req.tag);
|
||||
else
|
||||
printf(" %0x", req.tag);
|
||||
printf(" %0x", req.tag);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
|
|
|
@ -7,11 +7,11 @@
|
|||
#include <assert.h>
|
||||
#include <cmath>
|
||||
|
||||
#ifdef USE_VLSIM
|
||||
#include "vlsim/fpga.h"
|
||||
#else
|
||||
#if defined(USE_FPGA) || defined(USE_ASE)
|
||||
#include <opae/fpga.h>
|
||||
#include <uuid/uuid.h>
|
||||
#elif defined(USE_VLSIM)
|
||||
#include "vlsim/fpga.h"
|
||||
#endif
|
||||
|
||||
#include <vortex.h>
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
|
||||
CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
|
||||
|
||||
|
@ -65,7 +65,7 @@ else
|
|||
endif
|
||||
|
||||
# use DPI FPU
|
||||
#VL_FLAGS += -DFPU_FAST
|
||||
VL_FLAGS += -DFPU_FAST
|
||||
|
||||
PROJECT = libvortex.so
|
||||
# PROJECT = libvortex.dylib
|
||||
|
|
|
@ -92,7 +92,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
|||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
std::cout << "Found " << errors << " errors!" << std::endl;
|
||||
std::cout << "Found " << std::dec << errors << " errors!" << std::endl;
|
||||
std::cout << "FAILED!" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -260,7 +260,7 @@ int main(int argc, char *argv[]) {
|
|||
(void*)vx_host_ptr(src1_buf),
|
||||
(void*)vx_host_ptr(src2_buf));
|
||||
if (errors != 0) {
|
||||
std::cout << "found " << errors << " errors!" << std::endl;
|
||||
std::cout << "found " << std::dec << errors << " errors!" << std::endl;
|
||||
std::cout << "Test" << t << "-" << name << " FAILED!" << std::endl << std::flush;
|
||||
if (stop_on_error) {
|
||||
cleanup();
|
||||
|
|
|
@ -14,29 +14,36 @@ union Float_t {
|
|||
} parts;
|
||||
};
|
||||
|
||||
inline float fround(float x, int32_t precision = 4) {
|
||||
inline float fround(float x, int32_t precision = 8) {
|
||||
auto power_of_10 = std::pow(10, precision);
|
||||
return std::round(x * power_of_10) / power_of_10;
|
||||
}
|
||||
|
||||
inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<float>::epsilon()) {
|
||||
auto tolerance = std::min(fabs(a), fabs(b)) * eps;
|
||||
return fabs(a - b) <= tolerance;
|
||||
inline bool almost_equal_eps(float a, float b, int ulp = 128) {
|
||||
auto eps = std::numeric_limits<float>::epsilon() * (std::max(fabs(a), fabs(b)) * ulp);
|
||||
auto d = fabs(a - b);
|
||||
if (d > eps) {
|
||||
std::cout << "*** almost_equal_eps: d=" << d << ", eps=" << eps << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) {
|
||||
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 6) {
|
||||
Float_t fa{a}, fb{b};
|
||||
auto d = std::abs(fa.i - fb.i);
|
||||
if (d > ulp) {
|
||||
std::cout << "*** float compare: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl;
|
||||
std::cout << "*** almost_equal_ulp: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool almost_equal(float a, float b) {
|
||||
if (almost_equal_eps(a, b))
|
||||
if (a == b)
|
||||
return true;
|
||||
/*if (almost_equal_eps(a, b))
|
||||
return true;*/
|
||||
return almost_equal_ulp(a, b);
|
||||
}
|
||||
|
||||
|
|
|
@ -45,19 +45,19 @@ module VX_avs_wrapper #(
|
|||
reg [AVS_BANKS_BITS-1:0] avs_bankselect_r;
|
||||
reg [AVS_BURSTW-1:0] avs_burstcount_r;
|
||||
|
||||
wire avs_rtq_push = !dram_req_rw && dram_req_valid && dram_req_ready;
|
||||
wire avs_rtq_pop = dram_rsp_valid && dram_rsp_ready;
|
||||
wire avs_reqq_push = dram_req_valid && dram_req_ready && !dram_req_rw;
|
||||
wire avs_reqq_pop = dram_rsp_valid && dram_rsp_ready;
|
||||
|
||||
wire avs_rdq_push = avs_readdatavalid;
|
||||
wire avs_rdq_pop = avs_rtq_pop;
|
||||
wire avs_rdq_empty;
|
||||
wire avs_rspq_push = avs_readdatavalid;
|
||||
wire avs_rspq_pop = avs_reqq_pop;
|
||||
wire avs_rspq_empty;
|
||||
|
||||
reg [RD_QUEUE_ADDRW-1:0] avs_pending_reads;
|
||||
wire [RD_QUEUE_ADDRW-1:0] avs_pending_reads_n;
|
||||
|
||||
assign avs_pending_reads_n = avs_pending_reads
|
||||
+ RD_QUEUE_ADDRW'((avs_rtq_push && !avs_rdq_pop) ? 1 :
|
||||
(avs_rdq_pop && !avs_rtq_push) ? -1 : 0);
|
||||
+ RD_QUEUE_ADDRW'((avs_reqq_push && !avs_rspq_pop) ? 1 :
|
||||
(avs_rspq_pop && !avs_reqq_push) ? -1 : 0);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -75,9 +75,9 @@ module VX_avs_wrapper #(
|
|||
) rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (avs_rtq_push),
|
||||
.push (avs_reqq_push),
|
||||
.pop (avs_reqq_pop),
|
||||
.data_in (dram_req_tag),
|
||||
.pop (avs_rtq_pop),
|
||||
.data_out (dram_rsp_tag),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (full),
|
||||
|
@ -90,37 +90,38 @@ module VX_avs_wrapper #(
|
|||
) rd_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (avs_rdq_push),
|
||||
.data_in (avs_readdata),
|
||||
.pop (avs_rdq_pop),
|
||||
.push (avs_rspq_push),
|
||||
.pop (avs_rspq_pop),
|
||||
.data_in (avs_readdata),
|
||||
.data_out (dram_rsp_data),
|
||||
.empty (avs_rdq_empty),
|
||||
.empty (avs_rspq_empty),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign avs_read = dram_req_valid && !dram_req_rw;
|
||||
assign avs_write = dram_req_valid && dram_req_rw;
|
||||
wire rsp_queue_ready = (avs_pending_reads != RD_QUEUE_SIZE);
|
||||
|
||||
assign avs_read = dram_req_valid && !dram_req_rw && rsp_queue_ready;
|
||||
assign avs_write = dram_req_valid && dram_req_rw && rsp_queue_ready;
|
||||
assign avs_address = dram_req_addr;
|
||||
assign avs_byteenable = dram_req_byteen;
|
||||
assign avs_writedata = dram_req_data;
|
||||
assign dram_req_ready = !avs_waitrequest
|
||||
&& (avs_pending_reads < RD_QUEUE_SIZE);
|
||||
assign dram_req_ready = !avs_waitrequest && rsp_queue_ready;
|
||||
assign avs_burstcount = avs_burstcount_r;
|
||||
assign avs_bankselect = avs_bankselect_r;
|
||||
|
||||
assign dram_rsp_valid = !avs_rdq_empty;
|
||||
assign dram_rsp_valid = !avs_rspq_empty;
|
||||
|
||||
`ifdef DBG_PRINT_AVS
|
||||
always @(posedge clk) begin
|
||||
if (dram_req_valid && dram_req_ready) begin
|
||||
if (dram_req_rw)
|
||||
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, dram_req_tag, avs_writedata);
|
||||
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `DRAM_TO_BYTE_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, dram_req_data);
|
||||
else
|
||||
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, dram_req_tag, avs_pending_reads_n);
|
||||
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, avs_pending_reads_n);
|
||||
end
|
||||
if (dram_rsp_valid && dram_rsp_ready) begin
|
||||
$display("%t: AVS Rd Rsp: data=%0h, pending=%0d", $time, avs_readdata, avs_pending_reads_n);
|
||||
$display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, dram_rsp_tag, dram_rsp_data, avs_pending_reads_n);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -501,7 +501,6 @@ wire [AVS_REQ_TAGW-1:0] vx_dram_rsp_tag_unqual;
|
|||
wire cci_dram_rd_req_valid, cci_dram_wr_req_valid;
|
||||
wire [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
|
||||
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout;
|
||||
wire [VX_DRAM_LINE_IDX-1:0] vx_dram_req_idx, vx_dram_rsp_idx;
|
||||
|
||||
//--
|
||||
|
||||
|
@ -526,20 +525,19 @@ assign vx_dram_req_valid_qual = vx_dram_req_valid && vx_enabled;
|
|||
assign vx_dram_req_addr_qual = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
|
||||
|
||||
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
|
||||
assign vx_dram_req_idx = vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0];
|
||||
wire [VX_DRAM_LINE_IDX-1:0] vx_dram_req_idx = vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0];
|
||||
wire [VX_DRAM_LINE_IDX-1:0] vx_dram_rsp_idx = vx_dram_rsp_tag_unqual[VX_DRAM_LINE_IDX-1:0];
|
||||
assign vx_dram_req_byteen_qual = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]) << (VX_DRAM_LINE_LW-3));
|
||||
assign vx_dram_req_data_qual = DRAM_LINE_WIDTH'(vx_dram_req_data) << ((DRAM_LINE_LW'(vx_dram_req_idx)) << VX_DRAM_LINE_LW);
|
||||
assign vx_dram_req_tag_qual = {vx_dram_req_tag, vx_dram_req_idx};
|
||||
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual[vx_dram_rsp_idx];
|
||||
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual[vx_dram_rsp_idx];
|
||||
end else begin
|
||||
assign vx_dram_req_idx = VX_DRAM_LINE_IDX'(0);
|
||||
assign vx_dram_req_byteen_qual = vx_dram_req_byteen;
|
||||
assign vx_dram_req_tag_qual = vx_dram_req_tag;
|
||||
assign vx_dram_req_data_qual = vx_dram_req_data;
|
||||
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual;
|
||||
end
|
||||
|
||||
assign vx_dram_rsp_idx = vx_dram_rsp_tag_unqual[VX_DRAM_LINE_IDX-1:0];
|
||||
assign vx_dram_rsp_tag = vx_dram_rsp_tag_unqual[`VX_DRAM_TAG_WIDTH+VX_DRAM_LINE_IDX-1:VX_DRAM_LINE_IDX];
|
||||
|
||||
//--
|
||||
|
@ -723,15 +721,15 @@ always @(posedge clk) begin
|
|||
cci_rd_req_wait <= 0; // restart new request batch
|
||||
end
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr);
|
||||
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data);
|
||||
`endif
|
||||
end
|
||||
|
||||
if (cci_rdq_pop) begin
|
||||
/*if (cci_rdq_pop) begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Rd Queue Pop: pending=%0d", $time, cci_pending_reads_next);
|
||||
`endif
|
||||
end
|
||||
end*/
|
||||
|
||||
if (cci_dram_wr_req_fire) begin
|
||||
cci_dram_wr_req_addr_unqual <= cci_dram_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
|
||||
|
@ -836,15 +834,15 @@ begin
|
|||
cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1);
|
||||
cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next);
|
||||
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, af2cp_sTxPort.c1.data);
|
||||
`endif
|
||||
end
|
||||
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
/*`ifdef DBG_PRINT_OPAE
|
||||
if (cci_wr_rsp_fire) begin
|
||||
$display("%t: CCI Wr Rsp: pending=%0d", $time, cci_pending_writes_next);
|
||||
end
|
||||
`endif
|
||||
`endif*/
|
||||
|
||||
if (cci_dram_rd_req_fire) begin
|
||||
cci_dram_rd_req_addr_unqual <= cci_dram_rd_req_addr_unqual + DRAM_ADDR_WIDTH'(1);
|
||||
|
|
|
@ -304,30 +304,60 @@ module VX_cluster #(
|
|||
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag;
|
||||
wire[`NUM_CORES-1:0] core_snp_fwdin_ready;
|
||||
|
||||
wire snp_fwd_rsp_valid;
|
||||
wire [`L2DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr;
|
||||
wire snp_fwd_rsp_invalidate;
|
||||
wire [`L2SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag;
|
||||
wire snp_fwd_rsp_ready;
|
||||
|
||||
reg [`L2NUM_REQUESTS-1:0] core_dram_rsp_ready_other;
|
||||
reg core_dram_rsp_ready_all;
|
||||
|
||||
always @(*) begin
|
||||
core_dram_rsp_ready_other = {`L2NUM_REQUESTS{1'b1}};
|
||||
core_dram_rsp_ready_all = 1'b1;
|
||||
|
||||
for (integer i = 0; i < `L2NUM_REQUESTS; i++) begin
|
||||
for (integer j = 0; j < `L2NUM_REQUESTS; j++) begin
|
||||
if (i != j) begin
|
||||
if (0 == (j & 1))
|
||||
core_dram_rsp_ready_other[i] &= (per_core_D_dram_rsp_ready [(j/2)] | !core_dram_rsp_valid [j]);
|
||||
else
|
||||
core_dram_rsp_ready_other[i] &= (per_core_I_dram_rsp_ready [(j/2)] | !core_dram_rsp_valid [j]);
|
||||
end
|
||||
end
|
||||
|
||||
if (0 == (i & 1))
|
||||
core_dram_rsp_ready_all &= (per_core_D_dram_rsp_ready [(i/2)] | !core_dram_rsp_valid [i]);
|
||||
else
|
||||
core_dram_rsp_ready_all &= (per_core_I_dram_rsp_ready [(i/2)] | !core_dram_rsp_valid [i]);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
|
||||
assign core_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
|
||||
assign core_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];
|
||||
assign core_dram_req_valid [i] = per_core_D_dram_req_valid [(i/2)];
|
||||
assign core_dram_req_valid [i+1] = per_core_I_dram_req_valid [(i/2)];
|
||||
|
||||
assign core_dram_req_rw [i] = per_core_D_dram_req_rw[(i/2)];
|
||||
assign core_dram_req_rw [i+1] = per_core_I_dram_req_rw[(i/2)];
|
||||
assign core_dram_req_rw [i] = per_core_D_dram_req_rw [(i/2)];
|
||||
assign core_dram_req_rw [i+1] = per_core_I_dram_req_rw [(i/2)];
|
||||
|
||||
assign core_dram_req_byteen [i] = per_core_D_dram_req_byteen[(i/2)];
|
||||
assign core_dram_req_byteen [i+1] = per_core_I_dram_req_byteen[(i/2)];
|
||||
assign core_dram_req_byteen [i] = per_core_D_dram_req_byteen [(i/2)];
|
||||
assign core_dram_req_byteen [i+1] = per_core_I_dram_req_byteen [(i/2)];
|
||||
|
||||
assign core_dram_req_addr [i] = per_core_D_dram_req_addr[(i/2)];
|
||||
assign core_dram_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)];
|
||||
assign core_dram_req_addr [i] = per_core_D_dram_req_addr [(i/2)];
|
||||
assign core_dram_req_addr [i+1] = per_core_I_dram_req_addr [(i/2)];
|
||||
|
||||
assign core_dram_req_data [i] = per_core_D_dram_req_data[(i/2)];
|
||||
assign core_dram_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
|
||||
assign core_dram_req_data [i] = per_core_D_dram_req_data [(i/2)];
|
||||
assign core_dram_req_data [i+1] = per_core_I_dram_req_data [(i/2)];
|
||||
|
||||
assign core_dram_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
|
||||
assign core_dram_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
|
||||
assign core_dram_req_tag [i] = per_core_D_dram_req_tag [(i/2)];
|
||||
assign core_dram_req_tag [i+1] = per_core_I_dram_req_tag [(i/2)];
|
||||
|
||||
assign per_core_D_dram_req_ready [(i/2)] = core_dram_req_ready;
|
||||
assign per_core_I_dram_req_ready [(i/2)] = core_dram_req_ready;
|
||||
|
||||
assign per_core_D_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i] && core_dram_rsp_ready;
|
||||
assign per_core_I_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i+1] && core_dram_rsp_ready;
|
||||
assign per_core_D_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i] & core_dram_rsp_ready_other [i];
|
||||
assign per_core_I_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i+1] & core_dram_rsp_ready_other [i+1];
|
||||
|
||||
assign per_core_D_dram_rsp_data [(i/2)] = core_dram_rsp_data[i];
|
||||
assign per_core_I_dram_rsp_data [(i/2)] = core_dram_rsp_data[i+1];
|
||||
|
@ -346,32 +376,63 @@ module VX_cluster #(
|
|||
assign per_core_snp_rsp_ready [(i/2)] = core_snp_fwdin_ready [(i/2)];
|
||||
end
|
||||
|
||||
assign core_dram_rsp_ready = (& per_core_D_dram_rsp_ready) && (& per_core_I_dram_rsp_ready);
|
||||
assign core_dram_rsp_ready = core_dram_rsp_ready_all;
|
||||
|
||||
VX_snp_forwarder #(
|
||||
.CACHE_ID (`L2CACHE_ID),
|
||||
.NUM_REQUESTS (`NUM_CORES),
|
||||
.SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
|
||||
.DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
|
||||
.SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH),
|
||||
.SNRQ_SIZE (`L2SNRQ_SIZE)
|
||||
) snp_forwarder (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.snp_req_valid (snp_req_valid),
|
||||
.snp_req_addr (snp_req_addr),
|
||||
.snp_req_invalidate (snp_req_invalidate),
|
||||
.snp_req_tag (snp_req_tag),
|
||||
.snp_req_ready (snp_req_ready),
|
||||
|
||||
.snp_rsp_valid (snp_fwd_rsp_valid),
|
||||
.snp_rsp_addr (snp_fwd_rsp_addr),
|
||||
.snp_rsp_invalidate (snp_fwd_rsp_invalidate),
|
||||
.snp_rsp_tag (snp_fwd_rsp_tag),
|
||||
.snp_rsp_ready (snp_fwd_rsp_ready),
|
||||
|
||||
.snp_fwdout_valid (core_snp_fwdout_valid),
|
||||
.snp_fwdout_addr (core_snp_fwdout_addr),
|
||||
.snp_fwdout_invalidate(core_snp_fwdout_invalidate),
|
||||
.snp_fwdout_tag (core_snp_fwdout_tag),
|
||||
.snp_fwdout_ready (core_snp_fwdout_ready),
|
||||
|
||||
.snp_fwdin_valid (core_snp_fwdin_valid),
|
||||
.snp_fwdin_tag (core_snp_fwdin_tag),
|
||||
.snp_fwdin_ready (core_snp_fwdin_ready)
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L2CACHE_ID),
|
||||
.CACHE_SIZE (`L2CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE),
|
||||
.NUM_BANKS (`L2NUM_BANKS),
|
||||
.WORD_SIZE (`L2WORD_SIZE),
|
||||
.NUM_REQUESTS (`L2NUM_REQUESTS),
|
||||
.CREQ_SIZE (`L2CREQ_SIZE),
|
||||
.MRVQ_SIZE (`L2MRVQ_SIZE),
|
||||
.DRFQ_SIZE (`L2DRFQ_SIZE),
|
||||
.SNRQ_SIZE (`L2SNRQ_SIZE),
|
||||
.CWBQ_SIZE (`L2CWBQ_SIZE),
|
||||
.DREQ_SIZE (`L2DREQ_SIZE),
|
||||
.SNPQ_SIZE (`L2SNPQ_SIZE),
|
||||
.DRAM_ENABLE (1),
|
||||
.FLUSH_ENABLE (1),
|
||||
.WRITE_ENABLE (1),
|
||||
.SNOOP_FORWARDING (1),
|
||||
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||
.NUM_SNP_REQUESTS (`NUM_CORES),
|
||||
.SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH),
|
||||
.SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH)
|
||||
.CACHE_ID (`L2CACHE_ID),
|
||||
.CACHE_SIZE (`L2CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE),
|
||||
.NUM_BANKS (`L2NUM_BANKS),
|
||||
.WORD_SIZE (`L2WORD_SIZE),
|
||||
.NUM_REQUESTS (`L2NUM_REQUESTS),
|
||||
.CREQ_SIZE (`L2CREQ_SIZE),
|
||||
.MRVQ_SIZE (`L2MRVQ_SIZE),
|
||||
.DRFQ_SIZE (`L2DRFQ_SIZE),
|
||||
.SNRQ_SIZE (`L2SNRQ_SIZE),
|
||||
.CWBQ_SIZE (`L2CWBQ_SIZE),
|
||||
.DREQ_SIZE (`L2DREQ_SIZE),
|
||||
.SNPQ_SIZE (`L2SNPQ_SIZE),
|
||||
.DRAM_ENABLE (1),
|
||||
.FLUSH_ENABLE (1),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||
.SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH)
|
||||
) l2cache (
|
||||
`SCOPE_BIND_VX_cluster_l2cache
|
||||
|
||||
|
@ -409,29 +470,17 @@ module VX_cluster #(
|
|||
.dram_rsp_ready (dram_rsp_ready),
|
||||
|
||||
// Snoop request
|
||||
.snp_req_valid (snp_req_valid),
|
||||
.snp_req_addr (snp_req_addr),
|
||||
.snp_req_invalidate (snp_req_invalidate),
|
||||
.snp_req_tag (snp_req_tag),
|
||||
.snp_req_ready (snp_req_ready),
|
||||
.snp_req_valid (snp_fwd_rsp_valid),
|
||||
.snp_req_addr (snp_fwd_rsp_addr),
|
||||
.snp_req_invalidate (snp_fwd_rsp_invalidate),
|
||||
.snp_req_tag (snp_fwd_rsp_tag),
|
||||
.snp_req_ready (snp_fwd_rsp_ready),
|
||||
|
||||
// Snoop response
|
||||
.snp_rsp_valid (snp_rsp_valid),
|
||||
.snp_rsp_tag (snp_rsp_tag),
|
||||
.snp_rsp_ready (snp_rsp_ready),
|
||||
|
||||
// Snoop forwarding out
|
||||
.snp_fwdout_valid (core_snp_fwdout_valid),
|
||||
.snp_fwdout_addr (core_snp_fwdout_addr),
|
||||
.snp_fwdout_invalidate(core_snp_fwdout_invalidate),
|
||||
.snp_fwdout_tag (core_snp_fwdout_tag),
|
||||
.snp_fwdout_ready (core_snp_fwdout_ready),
|
||||
|
||||
// Snoop forwarding in
|
||||
.snp_fwdin_valid (core_snp_fwdin_valid),
|
||||
.snp_fwdin_tag (core_snp_fwdin_tag),
|
||||
.snp_fwdin_ready (core_snp_fwdin_ready),
|
||||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
);
|
||||
|
@ -508,11 +557,12 @@ module VX_cluster #(
|
|||
|
||||
if (`NUM_CORES > 1) begin
|
||||
VX_snp_forwarder #(
|
||||
.CACHE_ID (`L2CACHE_ID),
|
||||
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE),
|
||||
.NUM_REQUESTS (`NUM_CORES),
|
||||
.SNRQ_SIZE (`L2SNRQ_SIZE),
|
||||
.SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH)
|
||||
.CACHE_ID (`L2CACHE_ID),
|
||||
.NUM_REQUESTS (`NUM_CORES),
|
||||
.SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
|
||||
.DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
|
||||
.SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH),
|
||||
.SNRQ_SIZE (`L2SNRQ_SIZE)
|
||||
) snp_forwarder (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
`endif
|
||||
|
||||
`ifndef NUM_CORES
|
||||
`define NUM_CORES 2
|
||||
`define NUM_CORES 4
|
||||
`endif
|
||||
|
||||
`ifndef NUM_WARPS
|
||||
|
@ -23,8 +23,20 @@
|
|||
`define NUM_BARRIERS 4
|
||||
`endif
|
||||
|
||||
`ifndef L2_ENABLE
|
||||
`define L2_ENABLE (`NUM_CORES > 2)
|
||||
`endif
|
||||
|
||||
`ifndef L3_ENABLE
|
||||
`define L3_ENABLE (`NUM_CLUSTERS > 1)
|
||||
`endif
|
||||
|
||||
`ifndef GLOBAL_BLOCK_SIZE
|
||||
`define GLOBAL_BLOCK_SIZE 16
|
||||
`define GLOBAL_BLOCK_SIZE 64
|
||||
`endif
|
||||
|
||||
`ifndef L1_BLOCK_SIZE
|
||||
`define L1_BLOCK_SIZE 16
|
||||
`endif
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
|
@ -57,14 +69,6 @@
|
|||
|
||||
`define FRAME_BUFFER_SIZE (FRAME_BUFFER_WIDTH * FRAME_BUFFER_HEIGHT)
|
||||
|
||||
`ifndef L2_ENABLE
|
||||
`define L2_ENABLE 0
|
||||
`endif
|
||||
|
||||
`ifndef L3_ENABLE
|
||||
`define L3_ENABLE (`NUM_CLUSTERS > 1)
|
||||
`endif
|
||||
|
||||
`ifndef EXT_M_DISABLE
|
||||
`define EXT_M_ENABLE
|
||||
`endif
|
||||
|
@ -159,7 +163,7 @@
|
|||
`define CSR_MIMPID 12'hF13
|
||||
`define CSR_MHARTID 12'hF14
|
||||
|
||||
// Pipeline Queues ============================================================
|
||||
// Pipeline Queues ////////////////////////////////////////////////////////////
|
||||
|
||||
// Size of instruction queue
|
||||
`ifndef IBUF_SIZE
|
||||
|
@ -181,28 +185,18 @@
|
|||
`define FPUQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Dcache Configurable Knobs ==================================================
|
||||
// Dcache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef DCACHE_SIZE
|
||||
`define DCACHE_SIZE 4096
|
||||
`define DCACHE_SIZE 8192
|
||||
`endif
|
||||
|
||||
// Size of line inside a bank in bytes
|
||||
`ifndef DBANK_LINE_SIZE
|
||||
`define DBANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
// Number of banks
|
||||
`ifndef DNUM_BANKS
|
||||
`define DNUM_BANKS 4
|
||||
`endif
|
||||
|
||||
// Size of a word in bytes
|
||||
`ifndef DWORD_SIZE
|
||||
`define DWORD_SIZE 4
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef DCREQ_SIZE
|
||||
`define DCREQ_SIZE `NUM_WARPS
|
||||
|
@ -238,21 +232,11 @@
|
|||
`define DSNRQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Icache Configurable Knobs ==================================================
|
||||
// Icache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef ICACHE_SIZE
|
||||
`define ICACHE_SIZE 2048
|
||||
`endif
|
||||
|
||||
// Size of line inside a bank in bytes
|
||||
`ifndef IBANK_LINE_SIZE
|
||||
`define IBANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
// Size of a word in bytes
|
||||
`ifndef IWORD_SIZE
|
||||
`define IWORD_SIZE 4
|
||||
`define ICACHE_SIZE 8192
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
|
@ -280,28 +264,18 @@
|
|||
`define IDRFQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// SM Configurable Knobs ======================================================
|
||||
// SM Configurable Knobs //////////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef SCACHE_SIZE
|
||||
`define SCACHE_SIZE 1024
|
||||
`define SCACHE_SIZE 4096
|
||||
`endif
|
||||
|
||||
// Size of line inside a bank in bytes
|
||||
`ifndef SBANK_LINE_SIZE
|
||||
`define SBANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
// Number of banks
|
||||
`ifndef SNUM_BANKS
|
||||
`define SNUM_BANKS 4
|
||||
`endif
|
||||
|
||||
// Size of a word in bytes
|
||||
`ifndef SWORD_SIZE
|
||||
`define SWORD_SIZE 4
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef SCREQ_SIZE
|
||||
`define SCREQ_SIZE `NUM_WARPS
|
||||
|
@ -312,28 +286,18 @@
|
|||
`define SCWBQ_SIZE `SCREQ_SIZE
|
||||
`endif
|
||||
|
||||
// L2cache Configurable Knobs =================================================
|
||||
// L2cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef L2CACHE_SIZE
|
||||
`define L2CACHE_SIZE 4096
|
||||
`define L2CACHE_SIZE 131072
|
||||
`endif
|
||||
|
||||
// Size of line inside a bank in bytes
|
||||
`ifndef L2BANK_LINE_SIZE
|
||||
`define L2BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
// Number of banks
|
||||
`ifndef L2NUM_BANKS
|
||||
`define L2NUM_BANKS 4
|
||||
`endif
|
||||
|
||||
// Size of a word in bytes
|
||||
`ifndef L2WORD_SIZE
|
||||
`define L2WORD_SIZE `L2BANK_LINE_SIZE
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef L2CREQ_SIZE
|
||||
`define L2CREQ_SIZE 8
|
||||
|
@ -369,28 +333,18 @@
|
|||
`define L2SNPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// L3cache Configurable Knobs =================================================
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef L3CACHE_SIZE
|
||||
`define L3CACHE_SIZE 8192
|
||||
`define L3CACHE_SIZE 262144
|
||||
`endif
|
||||
|
||||
// Size of line inside a bank in bytes
|
||||
`ifndef L3BANK_LINE_SIZE
|
||||
`define L3BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
// Number of banks
|
||||
`ifndef L3NUM_BANKS
|
||||
`define L3NUM_BANKS 4
|
||||
`endif
|
||||
|
||||
// Size of a word in bytes
|
||||
`ifndef L3WORD_SIZE
|
||||
`define L3WORD_SIZE `L3BANK_LINE_SIZE
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef L3CREQ_SIZE
|
||||
`define L3CREQ_SIZE 8
|
||||
|
|
|
@ -6,203 +6,203 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define NW_BITS `LOG2UP(`NUM_WARPS)
|
||||
`define NW_BITS `LOG2UP(`NUM_WARPS)
|
||||
|
||||
`define NT_BITS `LOG2UP(`NUM_THREADS)
|
||||
`define NT_BITS `LOG2UP(`NUM_THREADS)
|
||||
|
||||
`define NC_BITS `LOG2UP(`NUM_CORES)
|
||||
`define NC_BITS `LOG2UP(`NUM_CORES)
|
||||
|
||||
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
|
||||
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
|
||||
|
||||
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
|
||||
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define NUM_REGS 64
|
||||
`define NUM_REGS 64
|
||||
`else
|
||||
`define NUM_REGS 32
|
||||
`define NUM_REGS 32
|
||||
`endif
|
||||
|
||||
`define NR_BITS `LOG2UP(`NUM_REGS)
|
||||
`define NR_BITS `LOG2UP(`NUM_REGS)
|
||||
|
||||
`define CSR_ADDR_BITS 12
|
||||
`define CSR_ADDR_BITS 12
|
||||
|
||||
`define CSR_WIDTH 12
|
||||
`define CSR_WIDTH 12
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_LUI 7'b0110111
|
||||
`define INST_AUIPC 7'b0010111
|
||||
`define INST_JAL 7'b1101111
|
||||
`define INST_JALR 7'b1100111
|
||||
`define INST_B 7'b1100011 // branch instructions
|
||||
`define INST_L 7'b0000011 // load instructions
|
||||
`define INST_S 7'b0100011 // store instructions
|
||||
`define INST_I 7'b0010011 // immediate instructions
|
||||
`define INST_R 7'b0110011 // register instructions
|
||||
`define INST_F 7'b0001111 // Fence instructions
|
||||
`define INST_SYS 7'b1110011 // system instructions
|
||||
`define INST_LUI 7'b0110111
|
||||
`define INST_AUIPC 7'b0010111
|
||||
`define INST_JAL 7'b1101111
|
||||
`define INST_JALR 7'b1100111
|
||||
`define INST_B 7'b1100011 // branch instructions
|
||||
`define INST_L 7'b0000011 // load instructions
|
||||
`define INST_S 7'b0100011 // store instructions
|
||||
`define INST_I 7'b0010011 // immediate instructions
|
||||
`define INST_R 7'b0110011 // register instructions
|
||||
`define INST_F 7'b0001111 // Fence instructions
|
||||
`define INST_SYS 7'b1110011 // system instructions
|
||||
|
||||
`define INST_FL 7'b0000111 // float load instruction
|
||||
`define INST_FS 7'b0100111 // float store instruction
|
||||
`define INST_FMADD 7'b1000011
|
||||
`define INST_FMSUB 7'b1000111
|
||||
`define INST_FNMSUB 7'b1001011
|
||||
`define INST_FNMADD 7'b1001111
|
||||
`define INST_FCI 7'b1010011 // float common instructions
|
||||
`define INST_FL 7'b0000111 // float load instruction
|
||||
`define INST_FS 7'b0100111 // float store instruction
|
||||
`define INST_FMADD 7'b1000011
|
||||
`define INST_FMSUB 7'b1000111
|
||||
`define INST_FNMSUB 7'b1001011
|
||||
`define INST_FNMADD 7'b1001111
|
||||
`define INST_FCI 7'b1010011 // float common instructions
|
||||
|
||||
`define INST_GPU 7'b1101011
|
||||
`define INST_GPU 7'b1101011
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define BYTEEN_SB 3'h0
|
||||
`define BYTEEN_SH 3'h1
|
||||
`define BYTEEN_SW 3'h2
|
||||
`define BYTEEN_UB 3'h4
|
||||
`define BYTEEN_UH 3'h5
|
||||
`define BYTEEN_BITS 3
|
||||
`define BYTEEN_TYPE(x) x[1:0]
|
||||
`define BYTEEN_SB 3'h0
|
||||
`define BYTEEN_SH 3'h1
|
||||
`define BYTEEN_SW 3'h2
|
||||
`define BYTEEN_UB 3'h4
|
||||
`define BYTEEN_UH 3'h5
|
||||
`define BYTEEN_BITS 3
|
||||
`define BYTEEN_TYPE(x) x[1:0]
|
||||
|
||||
`define FRM_RNE 3'b000 // round to nearest even
|
||||
`define FRM_RTZ 3'b001 // round to zero
|
||||
`define FRM_RDN 3'b010 // round to -inf
|
||||
`define FRM_RUP 3'b011 // round to +inf
|
||||
`define FRM_RMM 3'b100 // round to nearest max magnitude
|
||||
`define FRM_DYN 3'b111 // dynamic mode
|
||||
`define FRM_BITS 3
|
||||
`define FRM_RNE 3'b000 // round to nearest even
|
||||
`define FRM_RTZ 3'b001 // round to zero
|
||||
`define FRM_RDN 3'b010 // round to -inf
|
||||
`define FRM_RUP 3'b011 // round to +inf
|
||||
`define FRM_RMM 3'b100 // round to nearest max magnitude
|
||||
`define FRM_DYN 3'b111 // dynamic mode
|
||||
`define FRM_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_MUL 3'h4
|
||||
`define EX_FPU 3'h5
|
||||
`define EX_GPU 3'h6
|
||||
`define EX_BITS 3
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_MUL 3'h4
|
||||
`define EX_FPU 3'h5
|
||||
`define EX_GPU 3'h6
|
||||
`define EX_BITS 3
|
||||
|
||||
`define NUM_EXS 6
|
||||
`define NE_BITS `LOG2UP(`NUM_EXS)
|
||||
`define NUM_EXS 6
|
||||
`define NE_BITS `LOG2UP(`NUM_EXS)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define OP_BITS 4
|
||||
`define MOD_BITS 3
|
||||
`define OP_BITS 4
|
||||
`define MOD_BITS 3
|
||||
|
||||
`define ALU_ADD 4'b0000
|
||||
`define ALU_LUI 4'b0010
|
||||
`define ALU_AUIPC 4'b0011
|
||||
`define ALU_SLTU 4'b0100
|
||||
`define ALU_SLT 4'b0101
|
||||
`define ALU_SRL 4'b1000
|
||||
`define ALU_SRA 4'b1001
|
||||
`define ALU_SUB 4'b1011
|
||||
`define ALU_AND 4'b1100
|
||||
`define ALU_OR 4'b1101
|
||||
`define ALU_XOR 4'b1110
|
||||
`define ALU_SLL 4'b1111
|
||||
`define ALU_OTHER 4'b0111
|
||||
`define ALU_BITS 4
|
||||
`define ALU_OP(x) x[`ALU_BITS-1:0]
|
||||
`define ALU_ADD 4'b0000
|
||||
`define ALU_LUI 4'b0010
|
||||
`define ALU_AUIPC 4'b0011
|
||||
`define ALU_SLTU 4'b0100
|
||||
`define ALU_SLT 4'b0101
|
||||
`define ALU_SRL 4'b1000
|
||||
`define ALU_SRA 4'b1001
|
||||
`define ALU_SUB 4'b1011
|
||||
`define ALU_AND 4'b1100
|
||||
`define ALU_OR 4'b1101
|
||||
`define ALU_XOR 4'b1110
|
||||
`define ALU_SLL 4'b1111
|
||||
`define ALU_OTHER 4'b0111
|
||||
`define ALU_BITS 4
|
||||
`define ALU_OP(x) x[`ALU_BITS-1:0]
|
||||
`define ALU_OP_CLASS(x) x[3:2]
|
||||
`define ALU_SIGNED(x) x[0]
|
||||
`define ALU_SIGNED(x) x[0]
|
||||
|
||||
`define BR_EQ 4'b0000
|
||||
`define BR_NE 4'b0010
|
||||
`define BR_LTU 4'b0100
|
||||
`define BR_GEU 4'b0110
|
||||
`define BR_LT 4'b0101
|
||||
`define BR_GE 4'b0111
|
||||
`define BR_JAL 4'b1000
|
||||
`define BR_JALR 4'b1001
|
||||
`define BR_ECALL 4'b1010
|
||||
`define BR_EBREAK 4'b1011
|
||||
`define BR_MRET 4'b1100
|
||||
`define BR_SRET 4'b1101
|
||||
`define BR_DRET 4'b1110
|
||||
`define BR_OTHER 4'b1111
|
||||
`define BR_BITS 4
|
||||
`define BR_OP(x) x[`BR_BITS-1:0]
|
||||
`define BR_NEG(x) x[1]
|
||||
`define BR_LESS(x) x[2]
|
||||
`define BR_STATIC(x) x[3]
|
||||
`define ALU_BR_BITS 4
|
||||
`define ALU_BR_OP(x) x[`ALU_BR_BITS-1:0]
|
||||
`define IS_BR_MOD(x) x[0]
|
||||
`define BR_EQ 4'b0000
|
||||
`define BR_NE 4'b0010
|
||||
`define BR_LTU 4'b0100
|
||||
`define BR_GEU 4'b0110
|
||||
`define BR_LT 4'b0101
|
||||
`define BR_GE 4'b0111
|
||||
`define BR_JAL 4'b1000
|
||||
`define BR_JALR 4'b1001
|
||||
`define BR_ECALL 4'b1010
|
||||
`define BR_EBREAK 4'b1011
|
||||
`define BR_MRET 4'b1100
|
||||
`define BR_SRET 4'b1101
|
||||
`define BR_DRET 4'b1110
|
||||
`define BR_OTHER 4'b1111
|
||||
`define BR_BITS 4
|
||||
`define BR_OP(x) x[`BR_BITS-1:0]
|
||||
`define BR_NEG(x) x[1]
|
||||
`define BR_LESS(x) x[2]
|
||||
`define BR_STATIC(x) x[3]
|
||||
`define ALU_BR_BITS 4
|
||||
`define ALU_BR_OP(x) x[`ALU_BR_BITS-1:0]
|
||||
`define IS_BR_MOD(x) x[0]
|
||||
|
||||
`define LSU_LB {1'b0, `BYTEEN_SB}
|
||||
`define LSU_LH {1'b0, `BYTEEN_SH}
|
||||
`define LSU_LW {1'b0, `BYTEEN_SW}
|
||||
`define LSU_LBU {1'b0, `BYTEEN_UB}
|
||||
`define LSU_LHU {1'b0, `BYTEEN_UH}
|
||||
`define LSU_SB {1'b1, `BYTEEN_SB}
|
||||
`define LSU_SH {1'b1, `BYTEEN_SH}
|
||||
`define LSU_SW {1'b1, `BYTEEN_SW}
|
||||
`define LSU_SBU {1'b1, `BYTEEN_UB}
|
||||
`define LSU_SHU {1'b1, `BYTEEN_UH}
|
||||
`define LSU_BITS 4
|
||||
`define LSU_RW(x) x[3]
|
||||
`define LSU_BE(x) x[2:0]
|
||||
`define LSU_LB {1'b0, `BYTEEN_SB}
|
||||
`define LSU_LH {1'b0, `BYTEEN_SH}
|
||||
`define LSU_LW {1'b0, `BYTEEN_SW}
|
||||
`define LSU_LBU {1'b0, `BYTEEN_UB}
|
||||
`define LSU_LHU {1'b0, `BYTEEN_UH}
|
||||
`define LSU_SB {1'b1, `BYTEEN_SB}
|
||||
`define LSU_SH {1'b1, `BYTEEN_SH}
|
||||
`define LSU_SW {1'b1, `BYTEEN_SW}
|
||||
`define LSU_SBU {1'b1, `BYTEEN_UB}
|
||||
`define LSU_SHU {1'b1, `BYTEEN_UH}
|
||||
`define LSU_BITS 4
|
||||
`define LSU_RW(x) x[3]
|
||||
`define LSU_BE(x) x[2:0]
|
||||
|
||||
`define CSR_RW 2'h0
|
||||
`define CSR_RS 2'h1
|
||||
`define CSR_RC 2'h2
|
||||
`define CSR_OTHER 2'h3
|
||||
`define CSR_BITS 2
|
||||
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
||||
`define CSR_RW 2'h0
|
||||
`define CSR_RS 2'h1
|
||||
`define CSR_RC 2'h2
|
||||
`define CSR_OTHER 2'h3
|
||||
`define CSR_BITS 2
|
||||
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
||||
|
||||
`define MUL_MUL 3'h0
|
||||
`define MUL_MULH 3'h1
|
||||
`define MUL_MULHSU 3'h2
|
||||
`define MUL_MULHU 3'h3
|
||||
`define MUL_DIV 3'h4
|
||||
`define MUL_DIVU 3'h5
|
||||
`define MUL_REM 3'h6
|
||||
`define MUL_REMU 3'h7
|
||||
`define MUL_BITS 3
|
||||
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
||||
`define IS_DIV_OP(x) x[2]
|
||||
`define MUL_MUL 3'h0
|
||||
`define MUL_MULH 3'h1
|
||||
`define MUL_MULHSU 3'h2
|
||||
`define MUL_MULHU 3'h3
|
||||
`define MUL_DIV 3'h4
|
||||
`define MUL_DIVU 3'h5
|
||||
`define MUL_REM 3'h6
|
||||
`define MUL_REMU 3'h7
|
||||
`define MUL_BITS 3
|
||||
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
||||
`define IS_DIV_OP(x) x[2]
|
||||
|
||||
`define FPU_ADD 4'h0
|
||||
`define FPU_SUB 4'h1
|
||||
`define FPU_MUL 4'h2
|
||||
`define FPU_DIV 4'h3
|
||||
`define FPU_SQRT 4'h4
|
||||
`define FPU_MADD 4'h5
|
||||
`define FPU_MSUB 4'h6
|
||||
`define FPU_NMSUB 4'h7
|
||||
`define FPU_NMADD 4'h8
|
||||
`define FPU_CVTWS 4'h9 // FCVT.W.S
|
||||
`define FPU_CVTWUS 4'hA // FCVT.WU.S
|
||||
`define FPU_CVTSW 4'hB // FCVT.S.W
|
||||
`define FPU_CVTSWU 4'hC // FCVT.S.WU
|
||||
`define FPU_CLASS 4'hD
|
||||
`define FPU_CMP 4'hE
|
||||
`define FPU_MISC 4'hF // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
|
||||
`define FPU_BITS 4
|
||||
`define FPU_OP(x) x[`FPU_BITS-1:0]
|
||||
`define FPU_ADD 4'h0
|
||||
`define FPU_SUB 4'h1
|
||||
`define FPU_MUL 4'h2
|
||||
`define FPU_DIV 4'h3
|
||||
`define FPU_SQRT 4'h4
|
||||
`define FPU_MADD 4'h5
|
||||
`define FPU_MSUB 4'h6
|
||||
`define FPU_NMSUB 4'h7
|
||||
`define FPU_NMADD 4'h8
|
||||
`define FPU_CVTWS 4'h9 // FCVT.W.S
|
||||
`define FPU_CVTWUS 4'hA // FCVT.WU.S
|
||||
`define FPU_CVTSW 4'hB // FCVT.S.W
|
||||
`define FPU_CVTSWU 4'hC // FCVT.S.WU
|
||||
`define FPU_CLASS 4'hD
|
||||
`define FPU_CMP 4'hE
|
||||
`define FPU_MISC 4'hF // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
|
||||
`define FPU_BITS 4
|
||||
`define FPU_OP(x) x[`FPU_BITS-1:0]
|
||||
|
||||
`define GPU_TMC 3'h0
|
||||
`define GPU_WSPAWN 3'h1
|
||||
`define GPU_SPLIT 3'h2
|
||||
`define GPU_JOIN 3'h3
|
||||
`define GPU_BAR 3'h4
|
||||
`define GPU_OTHER 3'h7
|
||||
`define GPU_BITS 3
|
||||
`define GPU_OP(x) x[`GPU_BITS-1:0]
|
||||
`define GPU_TMC 3'h0
|
||||
`define GPU_WSPAWN 3'h1
|
||||
`define GPU_SPLIT 3'h2
|
||||
`define GPU_JOIN 3'h3
|
||||
`define GPU_BAR 3'h4
|
||||
`define GPU_OTHER 3'h7
|
||||
`define GPU_BITS 3
|
||||
`define GPU_OP(x) x[`GPU_BITS-1:0]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
`define ISA_EXT_M (1 << 12)
|
||||
`define ISA_EXT_M (1 << 12)
|
||||
`else
|
||||
`define ISA_EXT_M 0
|
||||
`define ISA_EXT_M 0
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define ISA_EXT_F (1 << 5)
|
||||
`define ISA_EXT_F (1 << 5)
|
||||
`else
|
||||
`define ISA_EXT_F 0
|
||||
`define ISA_EXT_F 0
|
||||
`endif
|
||||
|
||||
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
|
||||
|
@ -234,144 +234,174 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO // pc, rd, wid
|
||||
`define DBG_CACHE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS)
|
||||
`ifdef DBG_CACHE_REQ_INFO // pc, rd, wid
|
||||
`define DBG_CACHE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS)
|
||||
`else
|
||||
`define DBG_CACHE_REQ_MDATAW 0
|
||||
`define DBG_CACHE_REQ_MDATAW 0
|
||||
`endif
|
||||
|
||||
////////////////////////// Dcache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define DCACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0
|
||||
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
|
||||
|
||||
// Block size in bytes
|
||||
`define DBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
|
||||
|
||||
// Word size in bytes
|
||||
`define DWORD_SIZE 4
|
||||
|
||||
// TAG sharing enable
|
||||
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
|
||||
// Core request tag bits
|
||||
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
|
||||
// DRAM request data bits
|
||||
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
|
||||
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
|
||||
|
||||
// DRAM request address bits
|
||||
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE))
|
||||
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE))
|
||||
|
||||
// DRAM byte enable bits
|
||||
`define DDRAM_BYTEEN_WIDTH `DBANK_LINE_SIZE
|
||||
`define DDRAM_BYTEEN_WIDTH `DBANK_LINE_SIZE
|
||||
|
||||
// DRAM request tag bits
|
||||
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
|
||||
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
|
||||
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
`define DNUM_REQUESTS `NUM_THREADS
|
||||
// Core request size
|
||||
`define DNUM_REQUESTS `NUM_THREADS
|
||||
|
||||
// Snoop request tag bits
|
||||
`define DSNP_TAG_WIDTH ((`NUM_CORES > 1) ? `LOG2UP(`L2SNRQ_SIZE) : `L2SNP_TAG_WIDTH)
|
||||
`define DSNP_TAG_WIDTH ((`NUM_CORES > 1) ? `LOG2UP(`L2SNRQ_SIZE) : `L2SNP_TAG_WIDTH)
|
||||
|
||||
////////////////////////// Icache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define ICACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1
|
||||
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
|
||||
|
||||
// Block size in bytes
|
||||
`define IBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
|
||||
|
||||
// Word size in bytes
|
||||
`define IWORD_SIZE 4
|
||||
|
||||
// Number of banks
|
||||
`define INUM_BANKS 1
|
||||
`define INUM_BANKS 1
|
||||
|
||||
// Core request address bits
|
||||
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
|
||||
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
|
||||
|
||||
// Core request byte enable bits
|
||||
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
|
||||
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
|
||||
|
||||
// TAG sharing enable
|
||||
`define ICORE_TAG_ID_BITS `NW_BITS
|
||||
`define ICORE_TAG_ID_BITS `NW_BITS
|
||||
|
||||
// Core request tag bits
|
||||
`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
|
||||
`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
|
||||
|
||||
// DRAM request data bits
|
||||
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)
|
||||
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)
|
||||
|
||||
// DRAM request address bits
|
||||
`define IDRAM_ADDR_WIDTH (32 - `CLOG2(`IBANK_LINE_SIZE))
|
||||
`define IDRAM_ADDR_WIDTH (32 - `CLOG2(`IBANK_LINE_SIZE))
|
||||
|
||||
// DRAM byte enable bits
|
||||
`define IDRAM_BYTEEN_WIDTH `IBANK_LINE_SIZE
|
||||
`define IDRAM_BYTEEN_WIDTH `IBANK_LINE_SIZE
|
||||
|
||||
// DRAM request tag bits
|
||||
`define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH
|
||||
`define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH
|
||||
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
`define INUM_REQUESTS 1
|
||||
// Core request size
|
||||
`define INUM_REQUESTS 1
|
||||
|
||||
////////////////////////// SM Configurable Knobs //////////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define SCACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2
|
||||
`define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
|
||||
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
`define SNUM_REQUESTS `NUM_THREADS
|
||||
// Block size in bytes
|
||||
`define SBANK_LINE_SIZE 4
|
||||
|
||||
// Word size in bytes
|
||||
`define SWORD_SIZE 4
|
||||
|
||||
// Core request size
|
||||
`define SNUM_REQUESTS `NUM_THREADS
|
||||
|
||||
// DRAM request address bits
|
||||
`define SDRAM_ADDR_WIDTH (32 - `CLOG2(`SBANK_LINE_SIZE))
|
||||
`define SDRAM_ADDR_WIDTH (32 - `CLOG2(`SBANK_LINE_SIZE))
|
||||
|
||||
// DRAM request tag bits
|
||||
`define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH
|
||||
// DRAM request tag bits
|
||||
`define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH
|
||||
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
`define SNUM_REQUESTS `NUM_THREADS
|
||||
// Core request size
|
||||
`define SNUM_REQUESTS `NUM_THREADS
|
||||
|
||||
////////////////////////// L2cache Configurable Knobs /////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define L2CACHE_ID 32'(`L3_ENABLE) + CLUSTER_ID
|
||||
`define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
|
||||
|
||||
// Block size in bytes
|
||||
`define L2BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
|
||||
|
||||
// Word size in bytes
|
||||
`define L2WORD_SIZE `DBANK_LINE_SIZE
|
||||
|
||||
// Core request tag bits
|
||||
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
|
||||
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
|
||||
|
||||
// DRAM request data bits
|
||||
`define L2DRAM_LINE_WIDTH (`L2_ENABLE ? (`L2BANK_LINE_SIZE * 8) : `DDRAM_LINE_WIDTH)
|
||||
`define L2DRAM_LINE_WIDTH (`L2BANK_LINE_SIZE * 8)
|
||||
|
||||
// DRAM request address bits
|
||||
`define L2DRAM_ADDR_WIDTH (`L2_ENABLE ? (32 - `CLOG2(`L2BANK_LINE_SIZE)) : `DDRAM_ADDR_WIDTH)
|
||||
`define L2DRAM_ADDR_WIDTH (32 - `CLOG2(`L2BANK_LINE_SIZE))
|
||||
|
||||
// DRAM byte enable bits
|
||||
`define L2DRAM_BYTEEN_WIDTH (`L2_ENABLE ? `L2BANK_LINE_SIZE : `DDRAM_BYTEEN_WIDTH)
|
||||
`define L2DRAM_BYTEEN_WIDTH `L2BANK_LINE_SIZE
|
||||
|
||||
// DRAM request tag bits
|
||||
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
|
||||
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
|
||||
|
||||
// Snoop request tag bits
|
||||
`define L2SNP_TAG_WIDTH (`L3_ENABLE ? `LOG2UP(`L3SNRQ_SIZE) : `L3SNP_TAG_WIDTH)
|
||||
`define L2SNP_TAG_WIDTH (`L3_ENABLE ? `LOG2UP(`L3SNRQ_SIZE) : `L3SNP_TAG_WIDTH)
|
||||
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
`define L2NUM_REQUESTS (2 * `NUM_CORES)
|
||||
// Core request size
|
||||
`define L2NUM_REQUESTS (2 * `NUM_CORES)
|
||||
|
||||
////////////////////////// L3cache Configurable Knobs /////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define L3CACHE_ID 0
|
||||
`define L3CACHE_ID 0
|
||||
|
||||
// Block size in bytes
|
||||
`define L3BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
|
||||
|
||||
// Word size in bytes
|
||||
`define L3WORD_SIZE `L2BANK_LINE_SIZE
|
||||
|
||||
// Core request tag bits
|
||||
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
|
||||
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
|
||||
|
||||
// DRAM request data bits
|
||||
`define L3DRAM_LINE_WIDTH (`L3_ENABLE ? (`L3BANK_LINE_SIZE * 8) : `L2DRAM_LINE_WIDTH)
|
||||
`define L3DRAM_LINE_WIDTH (`L3BANK_LINE_SIZE * 8)
|
||||
|
||||
// DRAM request address bits
|
||||
`define L3DRAM_ADDR_WIDTH (`L3_ENABLE ? (32 - `CLOG2(`L3BANK_LINE_SIZE)) : `L2DRAM_ADDR_WIDTH)
|
||||
`define L3DRAM_ADDR_WIDTH (32 - `CLOG2(`L3BANK_LINE_SIZE))
|
||||
|
||||
// DRAM byte enable bits
|
||||
`define L3DRAM_BYTEEN_WIDTH (`L3_ENABLE ? `L3BANK_LINE_SIZE : `L2DRAM_BYTEEN_WIDTH)
|
||||
`define L3DRAM_BYTEEN_WIDTH `L3BANK_LINE_SIZE
|
||||
|
||||
// DRAM request tag bits
|
||||
`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
|
||||
`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
|
||||
|
||||
// Snoop request tag bits
|
||||
`define L3SNP_TAG_WIDTH 16
|
||||
`define L3SNP_TAG_WIDTH 16
|
||||
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
`define L3NUM_REQUESTS `NUM_CLUSTERS
|
||||
// Core request size
|
||||
`define L3NUM_REQUESTS `NUM_CLUSTERS
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
|
@ -168,9 +168,9 @@ module VX_ibuffer #(
|
|||
for (integer i = 0; i < `NUM_WARPS; i++) begin
|
||||
nw += 32'(q_size[i] != 0);
|
||||
end
|
||||
assert(nw == 32'(num_warps)) else $display("%t: error: invalid num_warps: nw=%0d, ref=%0d", $time, num_warps, nw);
|
||||
assert(~deq_valid || (q_size[deq_wid] != 0)) else $display("%t: error: invalid schedule: wid=%0d", $time, deq_wid);
|
||||
assert(~deq_fire || (q_size[deq_wid] != 0)) else $display("%t: error: invalid dequeu: wid=%0d", $time, deq_wid);
|
||||
assert(nw == 32'(num_warps)) else $error("%t: error: invalid num_warps: nw=%0d, ref=%0d", $time, num_warps, nw);
|
||||
assert(~deq_valid || (q_size[deq_wid] != 0)) else $error("%t: error: invalid schedule: wid=%0d", $time, deq_wid);
|
||||
assert(~deq_fire || (q_size[deq_wid] != 0)) else $error("%t: error: invalid dequeu: wid=%0d", $time, deq_wid);
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
|
|
@ -75,7 +75,6 @@ module VX_mem_unit # (
|
|||
.DRAM_ENABLE (0),
|
||||
.FLUSH_ENABLE (0),
|
||||
.WRITE_ENABLE (1),
|
||||
.SNOOP_FORWARDING (0),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH)
|
||||
|
@ -127,44 +126,31 @@ module VX_mem_unit # (
|
|||
`UNUSED_PIN (snp_rsp_tag),
|
||||
.snp_rsp_ready (1'b0),
|
||||
|
||||
// Snoop forward out
|
||||
`UNUSED_PIN (snp_fwdout_valid),
|
||||
`UNUSED_PIN (snp_fwdout_addr),
|
||||
`UNUSED_PIN (snp_fwdout_invalidate),
|
||||
`UNUSED_PIN (snp_fwdout_tag),
|
||||
.snp_fwdout_ready (1'b0),
|
||||
|
||||
// Snoop forward in
|
||||
.snp_fwdin_valid (1'b0),
|
||||
.snp_fwdin_tag (0),
|
||||
`UNUSED_PIN (snp_fwdin_ready),
|
||||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`DCACHE_ID),
|
||||
.CACHE_SIZE (`DCACHE_SIZE),
|
||||
.BANK_LINE_SIZE (`DBANK_LINE_SIZE),
|
||||
.NUM_BANKS (`DNUM_BANKS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.NUM_REQUESTS (`DNUM_REQUESTS),
|
||||
.CREQ_SIZE (`DCREQ_SIZE),
|
||||
.MRVQ_SIZE (`DMRVQ_SIZE),
|
||||
.DRFQ_SIZE (`DDRFQ_SIZE),
|
||||
.SNRQ_SIZE (`DSNRQ_SIZE),
|
||||
.CWBQ_SIZE (`DCWBQ_SIZE),
|
||||
.DREQ_SIZE (`DDREQ_SIZE),
|
||||
.SNPQ_SIZE (`DSNPQ_SIZE),
|
||||
.DRAM_ENABLE (1),
|
||||
.FLUSH_ENABLE (1),
|
||||
.WRITE_ENABLE (1),
|
||||
.SNOOP_FORWARDING (0),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
|
||||
.SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH)
|
||||
.CACHE_ID (`DCACHE_ID),
|
||||
.CACHE_SIZE (`DCACHE_SIZE),
|
||||
.BANK_LINE_SIZE (`DBANK_LINE_SIZE),
|
||||
.NUM_BANKS (`DNUM_BANKS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.NUM_REQUESTS (`DNUM_REQUESTS),
|
||||
.CREQ_SIZE (`DCREQ_SIZE),
|
||||
.MRVQ_SIZE (`DMRVQ_SIZE),
|
||||
.DRFQ_SIZE (`DDRFQ_SIZE),
|
||||
.SNRQ_SIZE (`DSNRQ_SIZE),
|
||||
.CWBQ_SIZE (`DCWBQ_SIZE),
|
||||
.DREQ_SIZE (`DDREQ_SIZE),
|
||||
.SNPQ_SIZE (`DSNPQ_SIZE),
|
||||
.DRAM_ENABLE (1),
|
||||
.FLUSH_ENABLE (1),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
|
||||
.SNP_TAG_WIDTH (`DSNP_TAG_WIDTH)
|
||||
) dcache (
|
||||
`SCOPE_BIND_VX_mem_unit_dcache
|
||||
|
||||
|
@ -212,18 +198,6 @@ module VX_mem_unit # (
|
|||
.snp_rsp_valid (dcache_snp_rsp_if.valid),
|
||||
.snp_rsp_tag (dcache_snp_rsp_if.tag),
|
||||
.snp_rsp_ready (dcache_snp_rsp_if.ready),
|
||||
|
||||
// Snoop forward out
|
||||
`UNUSED_PIN (snp_fwdout_valid),
|
||||
`UNUSED_PIN (snp_fwdout_addr),
|
||||
`UNUSED_PIN (snp_fwdout_invalidate),
|
||||
`UNUSED_PIN (snp_fwdout_tag),
|
||||
.snp_fwdout_ready (1'b0),
|
||||
|
||||
// Snoop forward in
|
||||
.snp_fwdin_valid (1'b0),
|
||||
.snp_fwdin_tag (0),
|
||||
`UNUSED_PIN (snp_fwdin_ready),
|
||||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
|
@ -246,7 +220,6 @@ module VX_mem_unit # (
|
|||
.DRAM_ENABLE (1),
|
||||
.FLUSH_ENABLE (0),
|
||||
.WRITE_ENABLE (0),
|
||||
.SNOOP_FORWARDING (0),
|
||||
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
|
||||
|
@ -298,18 +271,6 @@ module VX_mem_unit # (
|
|||
`UNUSED_PIN (snp_rsp_tag),
|
||||
.snp_rsp_ready (1'b0),
|
||||
|
||||
// Snoop forward out
|
||||
`UNUSED_PIN (snp_fwdout_valid),
|
||||
`UNUSED_PIN (snp_fwdout_addr),
|
||||
`UNUSED_PIN (snp_fwdout_invalidate),
|
||||
`UNUSED_PIN (snp_fwdout_tag),
|
||||
.snp_fwdout_ready (1'b0),
|
||||
|
||||
// Snoop forward in
|
||||
.snp_fwdin_valid (1'b0),
|
||||
.snp_fwdin_tag (0),
|
||||
`UNUSED_PIN (snp_fwdin_ready),
|
||||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
);
|
||||
|
|
142
hw/rtl/Vortex.v
142
hw/rtl/Vortex.v
|
@ -320,56 +320,70 @@ module Vortex (
|
|||
|
||||
// L3 Cache ///////////////////////////////////////////////////////////
|
||||
|
||||
wire [`L3NUM_REQUESTS-1:0] cluster_dram_req_valid;
|
||||
wire [`L3NUM_REQUESTS-1:0] cluster_dram_req_rw;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] cluster_dram_req_byteen;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] cluster_dram_req_addr;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_req_data;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_req_tag;
|
||||
|
||||
wire [`L3NUM_REQUESTS-1:0] cluster_dram_rsp_valid;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_rsp_data;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_rsp_tag;
|
||||
wire cluster_dram_rsp_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] cluster_snp_fwdout_addr;
|
||||
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_invalidate;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] cluster_snp_fwdout_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_ready;
|
||||
wire snp_fwd_rsp_valid;
|
||||
wire [`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr;
|
||||
wire snp_fwd_rsp_invalidate;
|
||||
wire [`L3SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag;
|
||||
wire snp_fwd_rsp_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdin_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] cluster_snp_fwdin_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdin_ready;
|
||||
reg [`L3NUM_REQUESTS-1:0] cluster_dram_rsp_ready_other;
|
||||
|
||||
for (genvar i = 0; i < `L3NUM_REQUESTS; i++) begin
|
||||
// Core Request
|
||||
assign cluster_dram_req_valid [i] = per_cluster_dram_req_valid [i];
|
||||
assign cluster_dram_req_rw [i] = per_cluster_dram_req_rw [i];
|
||||
assign cluster_dram_req_byteen [i] = per_cluster_dram_req_byteen[i];
|
||||
assign cluster_dram_req_addr [i] = per_cluster_dram_req_addr [i];
|
||||
assign cluster_dram_req_tag [i] = per_cluster_dram_req_tag [i];
|
||||
assign cluster_dram_req_data [i] = per_cluster_dram_req_data [i];
|
||||
|
||||
// Core Response
|
||||
assign per_cluster_dram_rsp_valid [i] = cluster_dram_rsp_valid [i] && cluster_dram_rsp_ready;
|
||||
assign per_cluster_dram_rsp_data [i] = cluster_dram_rsp_data [i];
|
||||
assign per_cluster_dram_rsp_tag [i] = cluster_dram_rsp_tag [i];
|
||||
|
||||
// Snoop Forwarding out
|
||||
assign per_cluster_snp_req_valid [i] = cluster_snp_fwdout_valid[i];
|
||||
assign per_cluster_snp_req_addr [i] = cluster_snp_fwdout_addr[i];
|
||||
assign per_cluster_snp_req_invalidate [i] = cluster_snp_fwdout_invalidate[i];
|
||||
assign per_cluster_snp_req_tag [i] = cluster_snp_fwdout_tag[i];
|
||||
assign cluster_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i];
|
||||
|
||||
// Snoop Forwarding in
|
||||
assign cluster_snp_fwdin_valid [i] = per_cluster_snp_rsp_valid [i];
|
||||
assign cluster_snp_fwdin_tag [i] = per_cluster_snp_rsp_tag [i];
|
||||
assign per_cluster_snp_rsp_ready [i] = cluster_snp_fwdin_ready [i];
|
||||
always @(*) begin
|
||||
cluster_dram_rsp_ready_other = {`L3NUM_REQUESTS{1'b1}};
|
||||
for (integer i = 0; i < `L3NUM_REQUESTS; i++) begin
|
||||
for (integer j = 0; j < `L3NUM_REQUESTS; j++) begin
|
||||
if (i != j)
|
||||
cluster_dram_rsp_ready_other[i] &= (per_cluster_dram_rsp_ready [j] | !cluster_dram_rsp_valid [j]);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign cluster_dram_rsp_ready = (& per_cluster_dram_rsp_ready);
|
||||
for (genvar i = 0; i < `L3NUM_REQUESTS; i++) begin
|
||||
// Core Response
|
||||
assign per_cluster_dram_rsp_valid [i] = cluster_dram_rsp_valid [i] & cluster_dram_rsp_ready_other [i];
|
||||
assign per_cluster_dram_rsp_data [i] = cluster_dram_rsp_data [i];
|
||||
assign per_cluster_dram_rsp_tag [i] = cluster_dram_rsp_tag [i];
|
||||
end
|
||||
assign cluster_dram_rsp_ready = & (per_cluster_dram_rsp_ready | ~cluster_dram_rsp_valid);
|
||||
|
||||
VX_snp_forwarder #(
|
||||
.CACHE_ID (`L3CACHE_ID),
|
||||
.NUM_REQUESTS (`NUM_CLUSTERS),
|
||||
.SRC_ADDR_WIDTH (`L3DRAM_ADDR_WIDTH),
|
||||
.DST_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
|
||||
.SNP_TAG_WIDTH (`L3SNP_TAG_WIDTH),
|
||||
.SNRQ_SIZE (`L3SNRQ_SIZE)
|
||||
) snp_forwarder (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.snp_req_valid (snp_req_valid),
|
||||
.snp_req_addr (snp_req_addr),
|
||||
.snp_req_invalidate (snp_req_invalidate),
|
||||
.snp_req_tag (snp_req_tag),
|
||||
.snp_req_ready (snp_req_ready),
|
||||
|
||||
.snp_rsp_valid (snp_fwd_rsp_valid),
|
||||
.snp_rsp_addr (snp_fwd_rsp_addr),
|
||||
.snp_rsp_invalidate (snp_fwd_rsp_invalidate),
|
||||
.snp_rsp_tag (snp_fwd_rsp_tag),
|
||||
.snp_rsp_ready (snp_fwd_rsp_ready),
|
||||
|
||||
.snp_fwdout_valid (per_cluster_snp_req_valid),
|
||||
.snp_fwdout_addr (per_cluster_snp_req_addr),
|
||||
.snp_fwdout_invalidate(per_cluster_snp_req_invalidate),
|
||||
.snp_fwdout_tag (per_cluster_snp_req_tag),
|
||||
.snp_fwdout_ready (per_cluster_snp_req_ready),
|
||||
|
||||
.snp_fwdin_valid (per_cluster_snp_rsp_valid),
|
||||
.snp_fwdin_tag (per_cluster_snp_rsp_tag),
|
||||
.snp_fwdin_ready (per_cluster_snp_rsp_ready)
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L3CACHE_ID),
|
||||
|
@ -388,13 +402,10 @@ module Vortex (
|
|||
.DRAM_ENABLE (1),
|
||||
.FLUSH_ENABLE (1),
|
||||
.WRITE_ENABLE (1),
|
||||
.SNOOP_FORWARDING (1),
|
||||
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH),
|
||||
.NUM_SNP_REQUESTS (`NUM_CLUSTERS),
|
||||
.SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH),
|
||||
.SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH)
|
||||
.SNP_TAG_WIDTH (`L3SNP_TAG_WIDTH)
|
||||
) l3cache (
|
||||
`SCOPE_BIND_Vortex_l3cache
|
||||
|
||||
|
@ -402,12 +413,12 @@ module Vortex (
|
|||
.reset (reset),
|
||||
|
||||
// Core request
|
||||
.core_req_valid (cluster_dram_req_valid),
|
||||
.core_req_rw (cluster_dram_req_rw),
|
||||
.core_req_byteen (cluster_dram_req_byteen),
|
||||
.core_req_addr (cluster_dram_req_addr),
|
||||
.core_req_data (cluster_dram_req_data),
|
||||
.core_req_tag (cluster_dram_req_tag),
|
||||
.core_req_valid (per_cluster_dram_req_valid),
|
||||
.core_req_rw (per_cluster_dram_req_rw),
|
||||
.core_req_byteen (per_cluster_dram_req_byteen),
|
||||
.core_req_addr (per_cluster_dram_req_addr),
|
||||
.core_req_data (per_cluster_dram_req_data),
|
||||
.core_req_tag (per_cluster_dram_req_tag),
|
||||
.core_req_ready (cluster_dram_req_ready),
|
||||
|
||||
// Core response
|
||||
|
@ -432,29 +443,17 @@ module Vortex (
|
|||
.dram_rsp_ready (dram_rsp_ready),
|
||||
|
||||
// Snoop request
|
||||
.snp_req_valid (snp_req_valid),
|
||||
.snp_req_addr (snp_req_addr),
|
||||
.snp_req_invalidate (snp_req_invalidate),
|
||||
.snp_req_tag (snp_req_tag),
|
||||
.snp_req_ready (snp_req_ready),
|
||||
.snp_req_valid (snp_fwd_rsp_valid),
|
||||
.snp_req_addr (snp_fwd_rsp_addr),
|
||||
.snp_req_invalidate (snp_fwd_rsp_invalidate),
|
||||
.snp_req_tag (snp_fwd_rsp_tag),
|
||||
.snp_req_ready (snp_fwd_rsp_ready),
|
||||
|
||||
// Snoop response
|
||||
.snp_rsp_valid (snp_rsp_valid),
|
||||
.snp_rsp_tag (snp_rsp_tag),
|
||||
.snp_rsp_ready (snp_rsp_ready),
|
||||
|
||||
// Snoop forwarding out
|
||||
.snp_fwdout_valid (cluster_snp_fwdout_valid),
|
||||
.snp_fwdout_addr (cluster_snp_fwdout_addr),
|
||||
.snp_fwdout_invalidate(cluster_snp_fwdout_invalidate),
|
||||
.snp_fwdout_tag (cluster_snp_fwdout_tag),
|
||||
.snp_fwdout_ready (cluster_snp_fwdout_ready),
|
||||
|
||||
// Snoop forwarding in
|
||||
.snp_fwdin_valid (cluster_snp_fwdin_valid),
|
||||
.snp_fwdin_tag (cluster_snp_fwdin_tag),
|
||||
.snp_fwdin_ready (cluster_snp_fwdin_ready),
|
||||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
);
|
||||
|
@ -497,4 +496,11 @@ module Vortex (
|
|||
end
|
||||
`endif
|
||||
|
||||
|
||||
`ifndef NDEBUG
|
||||
always @(posedge clk) begin
|
||||
$fflush(); // flush stdout buffer
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
26
hw/rtl/cache/VX_bank.v
vendored
26
hw/rtl/cache/VX_bank.v
vendored
|
@ -47,7 +47,7 @@ module VX_bank #(
|
|||
parameter CORE_TAG_ID_BITS = 0,
|
||||
|
||||
// Snooping request tag width
|
||||
parameter SNP_REQ_TAG_WIDTH = 1
|
||||
parameter SNP_TAG_WIDTH = 1
|
||||
) (
|
||||
`SCOPE_IO_VX_bank
|
||||
|
||||
|
@ -88,12 +88,12 @@ module VX_bank #(
|
|||
input wire snp_req_valid,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr,
|
||||
input wire snp_req_invalidate,
|
||||
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
|
||||
input wire [SNP_TAG_WIDTH-1:0] snp_req_tag,
|
||||
output wire snp_req_ready,
|
||||
|
||||
// Snoop Response
|
||||
output wire snp_rsp_valid,
|
||||
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
|
||||
output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
|
||||
input wire snp_rsp_ready,
|
||||
|
||||
// Misses
|
||||
|
@ -142,13 +142,13 @@ module VX_bank #(
|
|||
|
||||
wire [`LINE_ADDR_WIDTH-1:0] snrq_addr_st0;
|
||||
wire snrq_invalidate_st0;
|
||||
wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st0;
|
||||
wire [SNP_TAG_WIDTH-1:0] snrq_tag_st0;
|
||||
|
||||
wire snp_req_fire = snp_req_valid && snp_req_ready;
|
||||
assign snp_req_ready = !snrq_full;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW(`LINE_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH),
|
||||
.DATAW(`LINE_ADDR_WIDTH + 1 + SNP_TAG_WIDTH),
|
||||
.SIZE(SNRQ_SIZE)
|
||||
) snp_req_queue (
|
||||
.clk (clk),
|
||||
|
@ -352,7 +352,7 @@ module VX_bank #(
|
|||
|| ((miss_st3 || force_miss_st3) && (addr_st3 == addr_st0));
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = inst_meta_st0;
|
||||
end else begin
|
||||
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = 0;
|
||||
|
@ -371,7 +371,7 @@ module VX_bank #(
|
|||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
|
||||
end else begin
|
||||
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = 0;
|
||||
|
@ -474,7 +474,7 @@ module VX_bank #(
|
|||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
||||
end else begin
|
||||
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = 0;
|
||||
|
@ -574,7 +574,7 @@ module VX_bank #(
|
|||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = inst_meta_st3;
|
||||
end else begin
|
||||
assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = 0;
|
||||
|
@ -621,7 +621,7 @@ module VX_bank #(
|
|||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
|
||||
.SNP_TAG_WIDTH (SNP_TAG_WIDTH)
|
||||
) cache_miss_resrv (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -803,12 +803,12 @@ module VX_bank #(
|
|||
|
||||
wire snpq_pop = snp_rsp_valid && snp_rsp_ready;
|
||||
|
||||
wire [SNP_REQ_TAG_WIDTH-1:0] snpq_tag_st3 = SNP_REQ_TAG_WIDTH'(req_tag_st3);
|
||||
wire [SNP_TAG_WIDTH-1:0] snpq_tag_st3 = SNP_TAG_WIDTH'(req_tag_st3);
|
||||
|
||||
if (FLUSH_ENABLE) begin
|
||||
VX_generic_queue #(
|
||||
.DATAW(SNP_REQ_TAG_WIDTH),
|
||||
.SIZE(SNPQ_SIZE)
|
||||
.DATAW (SNP_TAG_WIDTH),
|
||||
.SIZE (SNPQ_SIZE)
|
||||
) snp_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
129
hw/rtl/cache/VX_cache.v
vendored
129
hw/rtl/cache/VX_cache.v
vendored
|
@ -39,9 +39,6 @@ module VX_cache #(
|
|||
// Enable cache flush
|
||||
parameter FLUSH_ENABLE = 1,
|
||||
|
||||
// Enable snoop forwarding
|
||||
parameter SNOOP_FORWARDING = 1,
|
||||
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 4,
|
||||
|
||||
|
@ -51,14 +48,8 @@ module VX_cache #(
|
|||
// dram request tag size
|
||||
parameter DRAM_TAG_WIDTH = 28,
|
||||
|
||||
// Number of snoop forwarding requests
|
||||
parameter NUM_SNP_REQUESTS = (SNOOP_FORWARDING ? 4 : 1),
|
||||
|
||||
// Snooping request tag width
|
||||
parameter SNP_REQ_TAG_WIDTH = (SNOOP_FORWARDING ? 4 : 1),
|
||||
|
||||
// Snooping forward tag width
|
||||
parameter SNP_FWD_TAG_WIDTH = (SNOOP_FORWARDING ? 4 : 1)
|
||||
parameter SNP_TAG_WIDTH = 1
|
||||
) (
|
||||
`SCOPE_IO_VX_cache
|
||||
|
||||
|
@ -99,28 +90,14 @@ module VX_cache #(
|
|||
input wire snp_req_valid,
|
||||
input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr,
|
||||
input wire snp_req_invalidate,
|
||||
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
|
||||
input wire [SNP_TAG_WIDTH-1:0] snp_req_tag,
|
||||
output wire snp_req_ready,
|
||||
|
||||
// Snoop response
|
||||
output wire snp_rsp_valid,
|
||||
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
|
||||
output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
|
||||
input wire snp_rsp_ready,
|
||||
|
||||
// Snoop Forwarding out
|
||||
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_valid,
|
||||
output wire [NUM_SNP_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr,
|
||||
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_invalidate,
|
||||
output wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdout_tag,
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_ready,
|
||||
|
||||
// Snoop forwarding in
|
||||
input wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_valid,
|
||||
input wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdin_tag,
|
||||
`IGNORE_WARNINGS_END
|
||||
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_ready,
|
||||
|
||||
output wire [NUM_BANKS-1:0] miss_vec
|
||||
);
|
||||
|
||||
|
@ -146,72 +123,16 @@ module VX_cache #(
|
|||
wire [NUM_BANKS-1:0] per_bank_snp_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid;
|
||||
wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag;
|
||||
wire [NUM_BANKS-1:0][SNP_TAG_WIDTH-1:0] per_bank_snp_rsp_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_miss;
|
||||
assign miss_vec = per_bank_miss;
|
||||
|
||||
|
||||
|
||||
wire snp_req_valid_qual;
|
||||
wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr_qual;
|
||||
wire snp_req_invalidate_qual;
|
||||
wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag_qual;
|
||||
wire snp_req_ready_qual;
|
||||
|
||||
if (SNOOP_FORWARDING) begin
|
||||
VX_snp_forwarder #(
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_REQUESTS (NUM_SNP_REQUESTS),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
|
||||
) snp_forwarder (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.snp_req_valid (snp_req_valid),
|
||||
.snp_req_addr (snp_req_addr),
|
||||
.snp_req_invalidate (snp_req_invalidate),
|
||||
.snp_req_tag (snp_req_tag),
|
||||
.snp_req_ready (snp_req_ready),
|
||||
|
||||
.snp_rsp_valid (snp_req_valid_qual),
|
||||
.snp_rsp_addr (snp_req_addr_qual),
|
||||
.snp_rsp_invalidate (snp_req_invalidate_qual),
|
||||
.snp_rsp_tag (snp_req_tag_qual),
|
||||
.snp_rsp_ready (snp_req_ready_qual),
|
||||
|
||||
.snp_fwdout_valid (snp_fwdout_valid),
|
||||
.snp_fwdout_addr (snp_fwdout_addr),
|
||||
.snp_fwdout_invalidate(snp_fwdout_invalidate),
|
||||
.snp_fwdout_tag (snp_fwdout_tag),
|
||||
.snp_fwdout_ready (snp_fwdout_ready),
|
||||
|
||||
.snp_fwdin_valid (snp_fwdin_valid),
|
||||
.snp_fwdin_tag (snp_fwdin_tag),
|
||||
.snp_fwdin_ready (snp_fwdin_ready)
|
||||
);
|
||||
end else begin
|
||||
assign snp_fwdout_valid = 0;
|
||||
assign snp_fwdout_addr = 0;
|
||||
assign snp_fwdout_invalidate = 0;
|
||||
assign snp_fwdout_tag = 0;
|
||||
|
||||
assign snp_fwdin_ready = 0;
|
||||
|
||||
assign snp_req_valid_qual = snp_req_valid;
|
||||
assign snp_req_addr_qual = snp_req_addr;
|
||||
assign snp_req_invalidate_qual = snp_req_invalidate;
|
||||
assign snp_req_tag_qual = snp_req_tag;
|
||||
assign snp_req_ready = snp_req_ready_qual;
|
||||
end
|
||||
|
||||
assign miss_vec = per_bank_miss;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign snp_req_ready_qual = per_bank_snp_req_ready;
|
||||
assign snp_req_ready = per_bank_snp_req_ready;
|
||||
end else begin
|
||||
assign snp_req_ready_qual = per_bank_snp_req_ready[`DRAM_ADDR_BANK(snp_req_addr_qual)];
|
||||
assign snp_req_ready = per_bank_snp_req_ready[`DRAM_ADDR_BANK(snp_req_addr)];
|
||||
end
|
||||
|
||||
VX_cache_core_req_bank_sel #(
|
||||
|
@ -221,14 +142,18 @@ module VX_cache #(
|
|||
.NUM_REQUESTS (NUM_REQUESTS)
|
||||
) cache_core_req_bank_sel (
|
||||
.core_req_valid (core_req_valid),
|
||||
.per_bank_ready (per_bank_core_req_ready),
|
||||
.core_req_addr (core_req_addr),
|
||||
.core_req_ready (core_req_ready),
|
||||
.per_bank_valid (per_bank_valid),
|
||||
.core_req_ready (core_req_ready)
|
||||
.per_bank_ready (per_bank_core_req_ready)
|
||||
);
|
||||
|
||||
assign dram_req_tag = dram_req_addr;
|
||||
assign dram_rsp_ready = (& per_bank_dram_rsp_ready);
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign dram_rsp_ready = per_bank_dram_rsp_ready;
|
||||
end else begin
|
||||
assign dram_rsp_ready = per_bank_dram_rsp_ready[`DRAM_ADDR_BANK(dram_rsp_tag)];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
|
||||
|
@ -260,11 +185,11 @@ module VX_cache #(
|
|||
wire curr_bank_snp_req_valid;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
|
||||
wire curr_bank_snp_req_invalidate;
|
||||
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
|
||||
wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
|
||||
wire curr_bank_snp_req_ready;
|
||||
|
||||
wire curr_bank_snp_rsp_valid;
|
||||
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
|
||||
wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
|
||||
wire curr_bank_snp_rsp_ready;
|
||||
|
||||
wire curr_bank_miss;
|
||||
|
@ -310,14 +235,14 @@ module VX_cache #(
|
|||
|
||||
// Snoop request
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_snp_req_valid = snp_req_valid_qual;
|
||||
assign curr_bank_snp_req_addr = snp_req_addr_qual;
|
||||
assign curr_bank_snp_req_valid = snp_req_valid;
|
||||
assign curr_bank_snp_req_addr = snp_req_addr;
|
||||
end else begin
|
||||
assign curr_bank_snp_req_valid = snp_req_valid_qual && (`DRAM_ADDR_BANK(snp_req_addr_qual) == i);
|
||||
assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr_qual);
|
||||
assign curr_bank_snp_req_valid = snp_req_valid && (`DRAM_ADDR_BANK(snp_req_addr) == i);
|
||||
assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr);
|
||||
end
|
||||
assign curr_bank_snp_req_invalidate = snp_req_invalidate_qual;
|
||||
assign curr_bank_snp_req_tag = snp_req_tag_qual;
|
||||
assign curr_bank_snp_req_invalidate = snp_req_invalidate;
|
||||
assign curr_bank_snp_req_tag = snp_req_tag;
|
||||
assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready;
|
||||
|
||||
// Snoop response
|
||||
|
@ -348,7 +273,7 @@ module VX_cache #(
|
|||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
|
||||
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
|
||||
.SNP_TAG_WIDTH (SNP_TAG_WIDTH)
|
||||
) bank (
|
||||
`SCOPE_BIND_VX_cache_bank(i)
|
||||
|
||||
|
@ -459,9 +384,9 @@ module VX_cache #(
|
|||
|
||||
if (FLUSH_ENABLE) begin
|
||||
VX_snp_rsp_arb #(
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.SNP_TAG_WIDTH (SNP_TAG_WIDTH)
|
||||
) snp_rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
2
hw/rtl/cache/VX_cache_config.vh
vendored
2
hw/rtl/cache/VX_cache_config.vh
vendored
|
@ -7,7 +7,7 @@
|
|||
`include "VX_define.vh"
|
||||
`endif
|
||||
|
||||
`define REQ_TAG_WIDTH `MAX(CORE_TAG_WIDTH, SNP_REQ_TAG_WIDTH)
|
||||
`define REQ_TAG_WIDTH `MAX(CORE_TAG_WIDTH, SNP_TAG_WIDTH)
|
||||
|
||||
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
|
||||
|
||||
|
|
40
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
40
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
|
@ -11,27 +11,43 @@ module VX_cache_core_req_bank_sel #(
|
|||
parameter NUM_REQUESTS = 1
|
||||
) (
|
||||
input wire [NUM_REQUESTS-1:0] core_req_valid,
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
`IGNORE_WARNINGS_END
|
||||
input wire [NUM_BANKS-1:0] per_bank_ready,
|
||||
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
output wire core_req_ready,
|
||||
|
||||
output wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid,
|
||||
output wire core_req_ready
|
||||
input wire [NUM_BANKS-1:0] per_bank_ready
|
||||
);
|
||||
if (NUM_BANKS > 1) begin
|
||||
reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid_r;
|
||||
reg [NUM_BANKS-1:0] per_bank_ready_sel;
|
||||
reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid_r;
|
||||
reg [NUM_BANKS-1:0] per_bank_ready_ignore;
|
||||
reg [NUM_BANKS-1:0] per_bank_ready_other;
|
||||
|
||||
always @(*) begin
|
||||
per_bank_valid_r = 0;
|
||||
per_bank_ready_sel = {NUM_BANKS{1'b1}};
|
||||
per_bank_valid_r = 0;
|
||||
per_bank_ready_other = {NUM_BANKS{1'b1}};
|
||||
per_bank_ready_ignore = {NUM_BANKS{1'b1}};
|
||||
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
for (integer j = 0; j < NUM_BANKS; j++) begin
|
||||
if (i != j)
|
||||
per_bank_ready_other[i] &= (per_bank_ready[j] | per_bank_ready_ignore[j]);
|
||||
end
|
||||
end
|
||||
|
||||
for (integer i = 0; i < NUM_REQUESTS; i++) begin
|
||||
per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
|
||||
per_bank_ready_sel[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 0;
|
||||
per_bank_ready_ignore[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 1'b0;
|
||||
end
|
||||
end
|
||||
assign per_bank_valid = per_bank_valid_r;
|
||||
assign core_req_ready = & (per_bank_ready | per_bank_ready_sel);
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
for (genvar j = 0; j < NUM_REQUESTS; j++) begin
|
||||
assign per_bank_valid[i][j] = per_bank_valid_r[i][j] & per_bank_ready_other[i];
|
||||
end
|
||||
end
|
||||
assign core_req_ready = & (per_bank_ready | per_bank_ready_ignore);
|
||||
end else begin
|
||||
`UNUSED_VAR (core_req_addr)
|
||||
assign per_bank_valid = core_req_valid;
|
||||
assign core_req_ready = per_bank_ready;
|
||||
end
|
||||
|
|
2
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
2
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
|
@ -17,7 +17,7 @@ module VX_cache_miss_resrv #(
|
|||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
// Snooping request tag width
|
||||
parameter SNP_REQ_TAG_WIDTH = 1,
|
||||
parameter SNP_TAG_WIDTH = 1,
|
||||
// size of tag id in core request tag
|
||||
parameter CORE_TAG_ID_BITS = 0
|
||||
) (
|
||||
|
|
110
hw/rtl/cache/VX_snp_forwarder.v
vendored
110
hw/rtl/cache/VX_snp_forwarder.v
vendored
|
@ -1,33 +1,33 @@
|
|||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_snp_forwarder #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_LINE_SIZE = 1,
|
||||
parameter NUM_REQUESTS = 1,
|
||||
parameter SNRQ_SIZE = 1,
|
||||
parameter SNP_REQ_TAG_WIDTH = 1,
|
||||
parameter SNP_FWD_TAG_WIDTH = 1
|
||||
parameter CACHE_ID = 0,
|
||||
parameter SRC_ADDR_WIDTH = 1,
|
||||
parameter DST_ADDR_WIDTH = 1,
|
||||
parameter NUM_REQUESTS = 1,
|
||||
parameter SNP_TAG_WIDTH = 1,
|
||||
parameter SNRQ_SIZE = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Snoop request
|
||||
input wire snp_req_valid,
|
||||
input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr,
|
||||
input wire [SRC_ADDR_WIDTH-1:0] snp_req_addr,
|
||||
input wire snp_req_invalidate,
|
||||
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
|
||||
input wire [SNP_TAG_WIDTH-1:0] snp_req_tag,
|
||||
output wire snp_req_ready,
|
||||
|
||||
// Snoop response
|
||||
output wire snp_rsp_valid,
|
||||
output wire [`DRAM_ADDR_WIDTH-1:0] snp_rsp_addr,
|
||||
output wire [SRC_ADDR_WIDTH-1:0] snp_rsp_addr,
|
||||
output wire snp_rsp_invalidate,
|
||||
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
|
||||
output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
|
||||
input wire snp_rsp_ready,
|
||||
|
||||
// Snoop Forwarding out
|
||||
output wire [NUM_REQUESTS-1:0] snp_fwdout_valid,
|
||||
output wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr,
|
||||
output wire [NUM_REQUESTS-1:0][DST_ADDR_WIDTH-1:0] snp_fwdout_addr,
|
||||
output wire [NUM_REQUESTS-1:0] snp_fwdout_invalidate,
|
||||
output wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdout_tag,
|
||||
input wire [NUM_REQUESTS-1:0] snp_fwdout_ready,
|
||||
|
@ -37,30 +37,37 @@ module VX_snp_forwarder #(
|
|||
input wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdin_tag,
|
||||
output wire [NUM_REQUESTS-1:0] snp_fwdin_ready
|
||||
);
|
||||
localparam ADDR_DIFF = DST_ADDR_WIDTH - SRC_ADDR_WIDTH;
|
||||
localparam NUM_REQUESTS_QUAL = NUM_REQUESTS * (1 << ADDR_DIFF);
|
||||
localparam REQ_QUAL_BITS = `LOG2UP(NUM_REQUESTS_QUAL);
|
||||
|
||||
`STATIC_ASSERT(NUM_REQUESTS > 1, ("invalid value"))
|
||||
|
||||
reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
|
||||
reg [REQ_QUAL_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
|
||||
|
||||
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr;
|
||||
wire sfq_acquire, sfq_release, sfq_full;
|
||||
|
||||
wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdout_tag;
|
||||
reg [NUM_REQUESTS-1:0] snp_fwdout_ready_other;
|
||||
wire fwdout_ready;
|
||||
|
||||
wire fwdin_valid;
|
||||
wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdin_tag;
|
||||
wire fwdin_valid;
|
||||
|
||||
wire fwdin_ready = snp_rsp_ready || (1 != pending_cntrs[sfq_read_addr]);
|
||||
wire fwdin_fire = fwdin_valid && fwdin_ready;
|
||||
|
||||
wire fwdout_ready = (& snp_fwdout_ready);
|
||||
|
||||
assign snp_rsp_valid = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]); // send response
|
||||
assign snp_rsp_valid = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]);
|
||||
|
||||
assign sfq_read_addr = fwdin_tag;
|
||||
|
||||
assign sfq_acquire = snp_req_valid && !sfq_full && fwdout_ready;
|
||||
assign sfq_release = snp_rsp_valid && snp_rsp_ready;
|
||||
|
||||
wire snp_req_ready_unqual = !sfq_full && fwdout_ready;
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW (`DRAM_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH),
|
||||
.DATAW (SRC_ADDR_WIDTH + 1 + SNP_TAG_WIDTH),
|
||||
.SIZE (SNRQ_SIZE)
|
||||
) snp_fwd_cam (
|
||||
.clk (clk),
|
||||
|
@ -75,9 +82,54 @@ module VX_snp_forwarder #(
|
|||
.full (sfq_full)
|
||||
);
|
||||
|
||||
wire [DST_ADDR_WIDTH-1:0] snp_req_addr_qual;
|
||||
wire dispatch_ready;
|
||||
|
||||
if (ADDR_DIFF != 0) begin
|
||||
reg [`LOG2UP(SNRQ_SIZE)-1:0] fwdout_tag_r;
|
||||
reg [DST_ADDR_WIDTH-1:0] snp_req_addr_r;
|
||||
reg dispatch_ready_r;
|
||||
reg use_cter_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
dispatch_ready_r <= 0;
|
||||
use_cter_r <= 0;
|
||||
end else begin
|
||||
if (snp_req_valid && snp_req_ready_unqual) begin
|
||||
if (snp_req_addr_r[ADDR_DIFF-1:0] == ((1 << ADDR_DIFF)-2)) begin
|
||||
dispatch_ready_r <= 1;
|
||||
end
|
||||
if (snp_req_addr_r[ADDR_DIFF-1:0] == ((1 << ADDR_DIFF)-1)) begin
|
||||
dispatch_ready_r <= 0;
|
||||
use_cter_r <= 0;
|
||||
end else begin
|
||||
use_cter_r <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (snp_req_valid && snp_req_ready_unqual) begin
|
||||
snp_req_addr_r <= snp_req_addr_qual + DST_ADDR_WIDTH'(1'b1);
|
||||
end
|
||||
if (!use_cter_r) begin
|
||||
fwdout_tag_r <= sfq_write_addr;
|
||||
end
|
||||
end
|
||||
assign sfq_acquire = snp_req_valid && snp_req_ready_unqual && !use_cter_r;
|
||||
assign fwdout_tag = use_cter_r ? fwdout_tag_r : sfq_write_addr;
|
||||
assign snp_req_addr_qual = use_cter_r ? snp_req_addr_r : {snp_req_addr, ADDR_DIFF'(0)};
|
||||
assign dispatch_ready = dispatch_ready_r;
|
||||
end else begin
|
||||
assign sfq_acquire = snp_req_valid && snp_req_ready;
|
||||
assign fwdout_tag = sfq_write_addr;
|
||||
assign snp_req_addr_qual = snp_req_addr;
|
||||
assign dispatch_ready = 1'b1;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (sfq_acquire) begin
|
||||
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS;
|
||||
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS_QUAL;
|
||||
end
|
||||
if (fwdin_fire) begin
|
||||
pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1;
|
||||
|
@ -85,13 +137,25 @@ module VX_snp_forwarder #(
|
|||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign snp_fwdout_valid[i] = snp_req_valid && snp_req_ready;
|
||||
assign snp_fwdout_addr[i] = snp_req_addr;
|
||||
assign snp_fwdout_valid[i] = snp_req_valid && snp_fwdout_ready_other[i] && !sfq_full;
|
||||
assign snp_fwdout_addr[i] = snp_req_addr_qual;
|
||||
assign snp_fwdout_invalidate[i] = snp_req_invalidate;
|
||||
assign snp_fwdout_tag[i] = sfq_write_addr;
|
||||
assign snp_fwdout_tag[i] = fwdout_tag;
|
||||
end
|
||||
|
||||
assign snp_req_ready = !sfq_full && fwdout_ready;
|
||||
always @(*) begin
|
||||
snp_fwdout_ready_other = {NUM_REQUESTS{1'b1}};
|
||||
for (integer i = 0; i < NUM_REQUESTS; i++) begin
|
||||
for (integer j = 0; j < NUM_REQUESTS; j++) begin
|
||||
if (i != j)
|
||||
snp_fwdout_ready_other[i] &= snp_fwdout_ready[j];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign fwdout_ready = (& snp_fwdout_ready);
|
||||
|
||||
assign snp_req_ready = snp_req_ready_unqual && dispatch_ready;
|
||||
|
||||
if (NUM_REQUESTS > 1) begin
|
||||
wire sel_valid;
|
||||
|
|
8
hw/rtl/cache/VX_snp_rsp_arb.v
vendored
8
hw/rtl/cache/VX_snp_rsp_arb.v
vendored
|
@ -3,17 +3,17 @@
|
|||
module VX_snp_rsp_arb #(
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter BANK_LINE_SIZE = 1,
|
||||
parameter SNP_REQ_TAG_WIDTH = 1
|
||||
parameter SNP_TAG_WIDTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid,
|
||||
input wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag,
|
||||
input wire [NUM_BANKS-1:0][SNP_TAG_WIDTH-1:0] per_bank_snp_rsp_tag,
|
||||
output wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready,
|
||||
|
||||
output wire snp_rsp_valid,
|
||||
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
|
||||
output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
|
||||
input wire snp_rsp_ready
|
||||
);
|
||||
if (NUM_BANKS > 1) begin
|
||||
|
@ -35,7 +35,7 @@ module VX_snp_rsp_arb #(
|
|||
wire stall = ~snp_rsp_ready && snp_rsp_valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + SNP_REQ_TAG_WIDTH),
|
||||
.N(1 + SNP_TAG_WIDTH),
|
||||
.PASSTHRU(NUM_BANKS <= 2)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
|
|
|
@ -11,8 +11,10 @@ interface VX_cache_core_rsp_if #(
|
|||
) ();
|
||||
|
||||
wire [NUM_REQUESTS-1:0] valid;
|
||||
|
||||
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] data;
|
||||
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -10,11 +10,13 @@ interface VX_cache_dram_req_if #(
|
|||
) ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire rw;
|
||||
wire [(DRAM_LINE_WIDTH/8)-1:0] byteen;
|
||||
wire [DRAM_ADDR_WIDTH-1:0] addr;
|
||||
wire [DRAM_LINE_WIDTH-1:0] data;
|
||||
wire [DRAM_TAG_WIDTH-1:0] tag;
|
||||
wire [DRAM_TAG_WIDTH-1:0] tag;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -9,8 +9,10 @@ interface VX_cache_dram_rsp_if #(
|
|||
) ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [DRAM_LINE_WIDTH-1:0] data;
|
||||
wire [DRAM_TAG_WIDTH-1:0] tag;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -9,9 +9,11 @@ interface VX_cache_snp_req_if #(
|
|||
) ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [DRAM_ADDR_WIDTH-1:0] addr;
|
||||
wire invalidate;
|
||||
wire [SNP_TAG_WIDTH-1:0] tag;
|
||||
wire [SNP_TAG_WIDTH-1:0] tag;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -8,7 +8,9 @@ interface VX_cache_snp_rsp_if #(
|
|||
) ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [SNP_TAG_WIDTH-1:0] tag;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -5,14 +5,12 @@
|
|||
|
||||
interface VX_cmt_to_csr_if ();
|
||||
|
||||
wire valid;
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] commit_size;
|
||||
|
||||
wire has_fflags;
|
||||
fflags_t fflags;
|
||||
wire has_fflags;
|
||||
fflags_t fflags;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -6,9 +6,11 @@
|
|||
interface VX_csr_io_req_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`CSR_ADDR_BITS-1:0] addr;
|
||||
wire rw;
|
||||
wire [31:0] data;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -6,7 +6,9 @@
|
|||
interface VX_csr_io_rsp_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [31:0] data;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -10,18 +10,15 @@ interface VX_decode_if ();
|
|||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
|
||||
wire [`EX_BITS-1:0] ex_type;
|
||||
wire [`OP_BITS-1:0] op_type;
|
||||
wire [`MOD_BITS-1:0] op_mod;
|
||||
wire wb;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
wire [31:0] imm;
|
||||
|
||||
wire [31:0] imm;
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
wire use_rs3;
|
||||
|
|
|
@ -5,13 +5,15 @@
|
|||
|
||||
interface VX_exu_to_cmt_if ();
|
||||
|
||||
wire valid;
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -5,7 +5,8 @@
|
|||
|
||||
interface VX_fpu_to_cmt_if ();
|
||||
|
||||
wire valid;
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
|
@ -14,6 +15,7 @@ interface VX_fpu_to_cmt_if ();
|
|||
wire wb;
|
||||
wire has_fflags;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -9,15 +9,13 @@
|
|||
|
||||
interface VX_fpu_to_csr_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
wire fflags_NV;
|
||||
wire fflags_DZ;
|
||||
wire fflags_OF;
|
||||
wire fflags_UF;
|
||||
wire fflags_NX;
|
||||
wire fflags_NV;
|
||||
wire fflags_DZ;
|
||||
wire fflags_OF;
|
||||
wire fflags_UF;
|
||||
wire fflags_NX;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -9,7 +9,6 @@ interface VX_gpr_rsp_if ();
|
|||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] PC;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
|
|
@ -6,9 +6,11 @@
|
|||
interface VX_ifetch_req_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] PC;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -5,11 +5,13 @@
|
|||
|
||||
interface VX_ifetch_rsp_if ();
|
||||
|
||||
wire valid;
|
||||
wire valid;
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] PC;
|
||||
wire [31:0] instr;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -10,14 +10,11 @@ interface VX_lsu_req_if ();
|
|||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
|
||||
wire rw;
|
||||
wire [`BYTEEN_BITS-1:0] byteen;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] store_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] base_addr;
|
||||
wire [31:0] offset;
|
||||
|
||||
wire [31:0] offset;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
|
|
|
@ -5,13 +5,12 @@
|
|||
|
||||
interface VX_warp_ctl_if ();
|
||||
|
||||
wire valid;
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
gpu_tmc_t tmc;
|
||||
gpu_wspawn_t wspawn;
|
||||
gpu_barrier_t barrier;
|
||||
gpu_split_t split;
|
||||
gpu_tmc_t tmc;
|
||||
gpu_wspawn_t wspawn;
|
||||
gpu_barrier_t barrier;
|
||||
gpu_split_t split;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -6,13 +6,12 @@
|
|||
interface VX_writeback_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [31:0] PC;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
interface VX_wstall_if();
|
||||
|
||||
wire valid;
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -54,7 +54,7 @@ module VX_cam_buffer #(
|
|||
end else begin
|
||||
for (integer i = 0; i < CPORTS; i++) begin
|
||||
if (release_slot[i]) begin
|
||||
assert(0 == free_slots[release_addr[i]]) else $display("%t: freed slot at port %d", $time, release_addr[i]);
|
||||
assert(0 == free_slots[release_addr[i]]) else $error("%t: releasing invalid slot at port %d", $time, release_addr[i]);
|
||||
end
|
||||
end
|
||||
free_slots <= free_slots_n;
|
||||
|
@ -63,7 +63,7 @@ module VX_cam_buffer #(
|
|||
end
|
||||
|
||||
if (acquire_slot) begin
|
||||
assert(1 == free_slots[write_addr]) else $display("%t: inused slot at port %d", $time, write_addr);
|
||||
assert(1 == free_slots[write_addr]) else $error("%t: acquiring used slot at port %d", $time, write_addr);
|
||||
entries[write_addr] <= write_data;
|
||||
end
|
||||
end
|
||||
|
|
|
@ -57,34 +57,24 @@ if args.outc != 'none':
|
|||
print('\n#endif', file=f)
|
||||
|
||||
translation_rules = [
|
||||
(re.compile(r'^$'), r''),
|
||||
(re.compile(r'^(\s*)`ifndef\s+([^ ]+)'), r'\1#ifndef \2'),
|
||||
(re.compile(r'^(\s*)`define\s+([^ ]+)'), r'\1#define \2'),
|
||||
(re.compile(r'^(\s*)`include "VX_user_config\.vh"'), r''),
|
||||
(re.compile(r'^(\s*)`define\s+([^ ]+) (.+)'), r'\1#define \2 \3'),
|
||||
(re.compile(r'^(\s*)`endif\s+'), r'\1#endif'),
|
||||
(re.compile(r'^(\s*)//(.*)'), r'\1// \2'),
|
||||
]
|
||||
# preprocessor directives
|
||||
(re.compile(r'^\s*`include .*$'), r''),
|
||||
(re.compile(r'`ifdef'), r'#ifdef'),
|
||||
(re.compile(r'`ifndef'), r'#ifndef'),
|
||||
(re.compile(r'`elif'), r'#elif'),
|
||||
(re.compile(r'`else'), r'#else'),
|
||||
(re.compile(r'`define'), r'#define'),
|
||||
(re.compile(r'`endif'), r'#endif'),
|
||||
|
||||
post_rules = [
|
||||
(re.compile(r"\d+'d(\d+)"), r'\1'),
|
||||
|
||||
# non-standard C but supported by GCC and Clang
|
||||
(re.compile(r"\d+'b([01]+)"), r'0b\1'),
|
||||
(re.compile(r"\d+'h([\da-fA-F]+)"), r'0x\1'),
|
||||
|
||||
# fix macro references (does not support escaped identifiers §5.6.1)
|
||||
# macro expansion
|
||||
(re.compile(r"`([A-Za-z_][$_0-9A-Za-z]*)"), r'\1'),
|
||||
|
||||
# literals
|
||||
(re.compile(r"\d+'d(\d+)"), r'\1'),
|
||||
(re.compile(r"\d+'b([01]+)"), r'0b\1'),
|
||||
(re.compile(r"\d+'h([\da-fA-F]+)"), r'0x\1')
|
||||
]
|
||||
|
||||
def post_process_line(line):
|
||||
for pat, repl in post_rules:
|
||||
line = pat.sub(repl, line)
|
||||
return line
|
||||
|
||||
|
||||
in_expansion = False
|
||||
|
||||
if args.outc != 'none':
|
||||
with open(args.outc, 'a') as f:
|
||||
print('''
|
||||
|
@ -96,36 +86,14 @@ if args.outc != 'none':
|
|||
with open(path.join(script_dir, '../rtl/VX_config.vh'), 'r') as r:
|
||||
lineno = 0
|
||||
for line in r:
|
||||
if in_expansion:
|
||||
f.write(post_process_line(line))
|
||||
if not line.strip().endswith('\\'):
|
||||
in_expansion = False
|
||||
else:
|
||||
for pat, repl in translation_rules:
|
||||
if pat.match(line):
|
||||
if line.strip().endswith('\\'):
|
||||
in_expansion = True
|
||||
f.write(post_process_line(pat.sub(repl, line)))
|
||||
break
|
||||
else:
|
||||
raise ValueError('failed to find rule for: "' + line + '" (' + str(lineno) + ')')
|
||||
for pat, repl in translation_rules:
|
||||
match = pat.search(line)
|
||||
if match:
|
||||
line = re.sub(pat, repl, line)
|
||||
#print("*** match @" + str(lineno) + ": " + match.group() + " => " + line)
|
||||
f.write(line)
|
||||
lineno = lineno + 1
|
||||
|
||||
print('''
|
||||
// Misc
|
||||
|
||||
#define THREADS_PER_WARP NUM_THREADS
|
||||
#define WARPS_PER_CORE NUM_WARPS
|
||||
#define NUM_WI (NUM_WARPS * NUM_THREADS * NUM_CORES_PER_CLUSTER * NUM_CLUSTERS)
|
||||
|
||||
// legacy
|
||||
#define TOTAL_THREADS NUM_WI
|
||||
#define TOTAL_WARPS (NUM_WARPS * NUM_CORES_PER_CLUSTER * NUM_CLUSTERS)
|
||||
|
||||
// COLORS
|
||||
#define GREEN "\\033[32m"
|
||||
#define RED "\\033[31m"
|
||||
#define DEFAULT "\\033[39m"
|
||||
'''[1:], file=f)
|
||||
|
||||
|
||||
|
|
|
@ -1,19 +1,19 @@
|
|||
SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0
|
||||
|
||||
#MULTICORE ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
|
|
|
@ -57,11 +57,7 @@ void Simulator::attach_ram(RAM* ram) {
|
|||
dram_rsp_vec_.clear();
|
||||
}
|
||||
|
||||
void Simulator::reset() {
|
||||
#ifndef NDEBUG
|
||||
std::cout << timestamp << ": [sim] reset()" << std::endl;
|
||||
#endif
|
||||
|
||||
void Simulator::reset() {
|
||||
print_bufs_.clear();
|
||||
dram_rsp_vec_.clear();
|
||||
|
||||
|
@ -96,15 +92,25 @@ void Simulator::reset() {
|
|||
}
|
||||
|
||||
void Simulator::step() {
|
||||
|
||||
vortex_->clk = 0;
|
||||
this->eval();
|
||||
|
||||
dram_rsp_ready_ = vortex_->dram_rsp_ready;
|
||||
snp_req_ready_ = vortex_->snp_req_ready;
|
||||
csr_io_req_ready_ = vortex_->csr_io_req_ready;
|
||||
|
||||
vortex_->clk = 1;
|
||||
this->eval();
|
||||
|
||||
|
||||
this->eval_dram_bus();
|
||||
this->eval_io_bus();
|
||||
this->eval_csr_bus();
|
||||
this->eval_snp_bus();
|
||||
|
||||
#ifndef NDEBUG
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Simulator::eval() {
|
||||
|
@ -134,8 +140,7 @@ void Simulator::eval_dram_bus() {
|
|||
|
||||
// send DRAM response
|
||||
if (dram_rsp_active_
|
||||
&& vortex_->dram_rsp_valid
|
||||
&& vortex_->dram_rsp_ready) {
|
||||
&& vortex_->dram_rsp_valid && dram_rsp_ready_) {
|
||||
dram_rsp_active_ = false;
|
||||
}
|
||||
if (!dram_rsp_active_) {
|
||||
|
@ -183,7 +188,7 @@ void Simulator::eval_dram_bus() {
|
|||
}
|
||||
}
|
||||
|
||||
vortex_->dram_req_ready = ~dram_stalled;
|
||||
vortex_->dram_req_ready = !dram_stalled;
|
||||
}
|
||||
|
||||
void Simulator::eval_io_bus() {
|
||||
|
@ -207,31 +212,32 @@ void Simulator::eval_io_bus() {
|
|||
}
|
||||
|
||||
void Simulator::eval_snp_bus() {
|
||||
if (snp_req_active_) {
|
||||
if (vortex_->snp_rsp_valid) {
|
||||
assert(pending_snp_reqs_ > 0);
|
||||
--pending_snp_reqs_;
|
||||
if (snp_req_active_) {
|
||||
if (vortex_->snp_req_valid && snp_req_ready_) {
|
||||
assert(snp_req_size_);
|
||||
#ifdef DBG_PRINT_CACHE_SNP
|
||||
std::cout << timestamp << ": [sim] snp rsp: tag=" << vortex_->snp_rsp_tag << " pending=" << pending_snp_reqs_ << std::endl;
|
||||
std::cout << std::dec << timestamp << ": [sim] SNP Req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << (snp_req_size_-1) << std::endl;
|
||||
#endif
|
||||
}
|
||||
if (vortex_->snp_req_valid && vortex_->snp_req_ready) {
|
||||
if (snp_req_size_ != 0) {
|
||||
vortex_->snp_req_addr += 1;
|
||||
vortex_->snp_req_tag += 1;
|
||||
--snp_req_size_;
|
||||
++pending_snp_reqs_;
|
||||
#ifdef DBG_PRINT_CACHE_SNP
|
||||
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
|
||||
#endif
|
||||
} else {
|
||||
vortex_->snp_req_valid = 0;
|
||||
++vortex_->snp_req_addr;
|
||||
++vortex_->snp_req_tag;
|
||||
++pending_snp_reqs_;
|
||||
--snp_req_size_;
|
||||
if (0 == snp_req_size_) {
|
||||
vortex_->snp_req_valid = false;
|
||||
}
|
||||
}
|
||||
if (!vortex_->snp_req_valid
|
||||
&& 0 == pending_snp_reqs_) {
|
||||
snp_req_active_ = false;
|
||||
}
|
||||
|
||||
if (vortex_->snp_rsp_valid && vortex_->snp_rsp_ready) {
|
||||
assert(pending_snp_reqs_ > 0);
|
||||
--pending_snp_reqs_;
|
||||
if (!vortex_->snp_req_valid && 0 == pending_snp_reqs_) {
|
||||
vortex_->snp_rsp_ready = false;
|
||||
snp_req_active_ = false;
|
||||
}
|
||||
#ifdef DBG_PRINT_CACHE_SNP
|
||||
std::cout << std::dec << timestamp << ": [sim] SNP Rsp: tag=" << std::hex << vortex_->snp_rsp_tag << " pending=" << pending_snp_reqs_ << std::endl;
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
vortex_->snp_req_valid = 0;
|
||||
vortex_->snp_rsp_ready = 0;
|
||||
|
@ -240,18 +246,24 @@ void Simulator::eval_snp_bus() {
|
|||
|
||||
void Simulator::eval_csr_bus() {
|
||||
if (csr_req_active_) {
|
||||
if (vortex_->csr_io_req_rw) {
|
||||
if (vortex_->csr_io_req_ready) {
|
||||
vortex_->snp_req_valid = 0;
|
||||
csr_req_active_ = false;
|
||||
}
|
||||
} else {
|
||||
if (vortex_->csr_io_rsp_valid) {
|
||||
*csr_rsp_value_ = vortex_->csr_io_rsp_data;
|
||||
vortex_->snp_req_valid = 0;
|
||||
vortex_->csr_io_rsp_ready = 0;
|
||||
csr_req_active_ = false;
|
||||
}
|
||||
if (vortex_->csr_io_req_valid && csr_io_req_ready_) {
|
||||
#ifndef NDEBUG
|
||||
if (vortex_->csr_io_req_rw)
|
||||
std::cout << std::dec << timestamp << ": [sim] CSR Wr Req: core=" << (int)vortex_->csr_io_req_coreid << ", addr=" << std::hex << vortex_->csr_io_req_addr << ", value=" << vortex_->csr_io_req_data << std::endl;
|
||||
else
|
||||
std::cout << std::dec << timestamp << ": [sim] CSR Rd Req: core=" << (int)vortex_->csr_io_req_coreid << ", addr=" << std::hex << vortex_->csr_io_req_addr << std::endl;
|
||||
#endif
|
||||
vortex_->csr_io_req_valid = 0;
|
||||
if (vortex_->csr_io_req_rw)
|
||||
csr_req_active_ = false;
|
||||
}
|
||||
if (vortex_->csr_io_rsp_valid && vortex_->csr_io_rsp_ready) {
|
||||
*csr_rsp_value_ = vortex_->csr_io_rsp_data;
|
||||
vortex_->csr_io_rsp_ready = 0;
|
||||
csr_req_active_ = false;
|
||||
#ifndef NDEBUG
|
||||
std::cout << std::dec << timestamp << ": [sim] CSR Rsp: value=" << vortex_->csr_io_rsp_data << std::endl;
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
vortex_->csr_io_req_valid = 0;
|
||||
|
@ -278,33 +290,23 @@ bool Simulator::csr_req_active() const {
|
|||
}
|
||||
|
||||
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
||||
#ifndef NDEBUG
|
||||
std::cout << timestamp << ": [sim] flush_caches()" << std::endl;
|
||||
#endif
|
||||
if (0 == size)
|
||||
return;
|
||||
|
||||
assert(!vortex_->snp_rsp_valid);
|
||||
|
||||
vortex_->snp_req_addr = mem_addr / GLOBAL_BLOCK_SIZE;
|
||||
vortex_->snp_req_tag = 0;
|
||||
vortex_->snp_req_valid = 1;
|
||||
vortex_->snp_rsp_ready = 1;
|
||||
|
||||
snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
|
||||
--snp_req_size_;
|
||||
pending_snp_reqs_ = 1;
|
||||
snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
|
||||
pending_snp_reqs_ = 0;
|
||||
|
||||
snp_req_active_ = true;
|
||||
|
||||
#ifdef DBG_PRINT_CACHE_SNP
|
||||
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Simulator::set_csr(int core_id, int addr, unsigned value) {
|
||||
#ifndef NDEBUG
|
||||
std::cout << timestamp << ": [sim] set_csr()" << std::endl;
|
||||
#endif
|
||||
|
||||
vortex_->csr_io_req_valid = 1;
|
||||
vortex_->csr_io_req_coreid = core_id;
|
||||
vortex_->csr_io_req_addr = addr;
|
||||
|
@ -316,10 +318,6 @@ void Simulator::set_csr(int core_id, int addr, unsigned value) {
|
|||
}
|
||||
|
||||
void Simulator::get_csr(int core_id, int addr, unsigned *value) {
|
||||
#ifndef NDEBUG
|
||||
std::cout << timestamp << ": [sim] get_csr()" << std::endl;
|
||||
#endif
|
||||
|
||||
vortex_->csr_io_req_valid = 1;
|
||||
vortex_->csr_io_req_coreid = core_id;
|
||||
vortex_->csr_io_req_addr = addr;
|
||||
|
@ -327,12 +325,13 @@ void Simulator::get_csr(int core_id, int addr, unsigned *value) {
|
|||
vortex_->csr_io_rsp_ready = 1;
|
||||
|
||||
csr_rsp_value_ = value;
|
||||
|
||||
csr_req_active_ = true;
|
||||
}
|
||||
|
||||
void Simulator::run() {
|
||||
#ifndef NDEBUG
|
||||
std::cout << timestamp << ": [sim] run()" << std::endl;
|
||||
std::cout << std::dec << timestamp << ": [sim] run()" << std::endl;
|
||||
#endif
|
||||
|
||||
// execute program
|
||||
|
|
|
@ -65,7 +65,11 @@ private:
|
|||
|
||||
std::list<dram_req_t> dram_rsp_vec_;
|
||||
bool dram_rsp_active_;
|
||||
|
||||
|
||||
bool dram_rsp_ready_;
|
||||
bool snp_req_ready_;
|
||||
bool csr_io_req_ready_;
|
||||
|
||||
bool snp_req_active_;
|
||||
bool csr_req_active_;
|
||||
|
||||
|
|
|
@ -3,6 +3,10 @@
|
|||
#include <fstream>
|
||||
#include <iomanip>
|
||||
|
||||
#define GREEN "\\033[32m"
|
||||
#define RED "\\033[31m"
|
||||
#define DEFAULT "\\033[39m"
|
||||
|
||||
#define ALL_TESTS
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue