L2 and L1 using different block size support, RTLsim fixes, dram_rsp_ready optimization

This commit is contained in:
Blaise Tine 2020-11-21 09:47:56 -08:00
parent a7da36c007
commit 1795980a52
50 changed files with 972 additions and 952 deletions

View file

@ -20,19 +20,21 @@ install:
- export PATH=$VERILATOR_ROOT/bin:$PATH - export PATH=$VERILATOR_ROOT/bin:$PATH
script: script:
- make -j - travis_wait 45 make
- ci/test_runtime.sh - travis_wait 45 ci/test_runtime.sh
- ci/test_driver.sh - travis_wait 45 ci/test_driver.sh
- ci/test_riscv_isa.sh - travis_wait 45 ci/test_riscv_isa.sh
- ci/test_opencl.sh - travis_wait 45 ci/test_opencl.sh
- ci/blackbox.sh -run_debug - travis_wait 45 ci/blackbox.sh --driver=rtlsim
- ci/blackbox.sh -run_scope - travis_wait 45 ci/blackbox.sh --driver=vlsim
- ci/blackbox.sh -run_1c - travis_wait 45 ci/blackbox.sh --driver=vlsim --scope
- ci/blackbox.sh -run_2c - travis_wait 45 ci/blackbox.sh --driver=vlsim --debug
- ci/blackbox.sh -run_4c - travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=1
- ci/blackbox.sh -run_4c_l2 - travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=2
- travis_wait 30 ci/blackbox.sh -run_4c_2l2_l3 - travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=4
- travis_wait 30 ci/blackbox.sh -run_8c_4l2_l3 - travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=4 --l2cache
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=2 --l2cache --clusters=2
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=2 --l2cache --clusters=4
after_success: after_success:
# Gather code coverage # Gather code coverage

View file

@ -3,107 +3,130 @@
# exit when any command fails # exit when any command fails
set -e set -e
run_1c() show_usage()
{ {
# test single core echo "Vortex BlackBox Test Driver v1.0"
make -C driver/opae/vlsim clean echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--app=vecadd|sgemm|basic|demo|dogfood][--help]]"
CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae/vlsim > /dev/null 2>&1
make -C benchmarks/opencl/sgemm run-vlsim
} }
run_2c() DRIVER=vlsim
{ APP=sgemm
# test 2 cores CLUSTERS=1
make -C driver/opae/vlsim clean CORES=2
CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0" make -C driver/opae/vlsim > /dev/null 2>&1 WARPS=4
make -C benchmarks/opencl/sgemm run-vlsim THREADS=4
} L2=0
DEBUG=0
SCOPE=0
run_4c() for i in "$@"
{ do
# test 4 cores case $i in
make -C driver/opae/vlsim clean --driver=*)
CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=0" make -C driver/opae/vlsim > /dev/null 2>&1 DRIVER=${i#*=}
make -C benchmarks/opencl/sgemm run-vlsim shift
} ;;
--app=*)
APP=${i#*=}
shift
;;
--clusters=*)
CLUSTERS=${i#*=}
shift
;;
--cores=*)
CORES=${i#*=}
shift
;;
--warps=*)
WARPS=${i#*=}
shift
;;
--threads=*)
THREADS=${i#*=}
shift
;;
--l2cache)
L2=1
shift
;;
--debug)
DEBUG=1
shift
;;
--scope)
SCOPE=1
shift
;;
--help)
show_usage
exit
;;
*)
show_usage
exit
;;
esac
done
run_4c_l2() case $DRIVER in
{ rtlsim)
# test 4 cores with L2 DRIVER_PATH=driver/rtlsim
make -C driver/opae/vlsim clean DRIVER_EXTRA=
CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1" make -C driver/opae/vlsim > /dev/null 2>&1 ;;
make -C benchmarks/opencl/sgemm run-vlsim vlsim)
} DRIVER_PATH=driver/opae
DRIVER_EXTRA=vlsim
;;
*)
echo "invalid driver: $DRIVER"
exit
;;
esac
run_4c_2l2_l3() case $APP in
{ sgemm)
# test 4 cores with L2 and L3 APP_PATH=benchmarks/opencl/sgemm
make -C driver/opae/vlsim clean ;;
CONFIGS="-DNUM_CLUSTERS=2 -DNUM_CORES=2 -DL2_ENABLE=1" make -C driver/opae/vlsim > /dev/null 2>&1 vecadd)
make -C benchmarks/opencl/sgemm run-vlsim APP_PATH=benchmarks/opencl/vacadd
} ;;
basic)
APP_PATH=driver/tests/basic
;;
demo)
APP_PATH=driver/tests/demo
;;
dogfood)
APP_PATH=driver/tests/dogfood
;;
*)
echo "invalid app: $APP"
exit
;;
esac
run_8c_4l2_l3() CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2"
{
# test 8 cores with L2 and L3
make -C driver/opae/vlsim clean
CONFIGS="-DNUM_CLUSTERS=4 -DNUM_CORES=2 -DL2_ENABLE=1" make -C driver/opae/vlsim > /dev/null 2>&1
make -C benchmarks/opencl/sgemm run-vlsim
}
run_debug() echo "CONFIGS=$CONFIGS"
{
# test debug build
make -C driver/opae/vlsim clean
DEBUG=1 CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae/vlsim > /dev/null 2>&1
make -C benchmarks/opencl/sgemm run-vlsim > /dev/null 2>&1
}
run_scope() make -C $DRIVER_PATH clean
{
# test build with scope analyzer
make -C driver/opae clean
SCOPE=1 CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae > /dev/null 2>&1
make -C benchmarks/opencl/sgemm run-vlsim
}
usage() if [[ $DEBUG -eq 1 ]]
{ then
echo "usage: blackbox [[-run_1c] [-run_2c] [-run_4c] [-run_4c_l2] [-run_4c_2l2_l3] [-run_8c_4l2_l3] [-run_debug] [-run_scope] [-all] [-h|--help]]" if [[ $SCOPE -eq 1 ]]
} then
DEBUG=1 SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH $DRIVER_EXTRA > build.log 2>&1
else
DEBUG=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH $DRIVER_EXTRA > build.log 2>&1
fi
else
if [[ $SCOPE -eq 1 ]]
then
SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH $DRIVER_EXTRA > build.log 2>&1
else
CONFIGS="$CONFIGS" make -C $DRIVER_PATH $DRIVER_EXTRA > build.log 2>&1
fi
fi
while [ "$1" != "" ]; do make -C $APP_PATH run-$DRIVER > run.log 2>&1
case $1 in
-run_1c ) run_1c
;;
-run_2c ) run_2c
;;
-run_4c ) run_4c
;;
-run_4c_l2 ) run_4c_l2
;;
-run_4c_2l2_l3 ) run_4c_2l2_l3
;;
-run_8c_4l2_l3 ) run_8c_4l2_l3
;;
-run_debug ) run_debug
;;
-run_scope ) run_scope
;;
-all ) run_1c
run_2c
run_4c
run_4c_l2
run_4c_2l2_l3
run_8c_4l2_l3
run_debug
run_scope
;;
-h | --help ) usage
exit
;;
* ) usage
exit 1
esac
shift
done

View file

@ -1,7 +1,7 @@
OPAE_HOME ?= /tools/opae/1.4.0 OPAE_HOME ?= /tools/opae/1.4.0
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors #CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw
@ -60,7 +60,7 @@ json: ../../hw/opae/vortex_afu.json
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@ afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
fpga: $(SRCS) fpga: $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT) $(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
asesim: $(SRCS) $(ASE_DIR) asesim: $(SRCS) $(ASE_DIR)
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE) $(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)

View file

@ -1,5 +1,5 @@
CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors #CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -I../../../../hw CFLAGS += -I../../../../hw
@ -30,7 +30,7 @@ CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
CFLAGS += -fPIC CFLAGS += -fPIC
CFLAGS += -DUSE_RTLSIM $(CONFIGS) CFLAGS += -DUSE_VLSIM $(CONFIGS)
CFLAGS += -DDUMP_PERF_STATS CFLAGS += -DDUMP_PERF_STATS
@ -79,7 +79,7 @@ VL_FLAGS += -DNOPAE
CFLAGS += -DNOPAE CFLAGS += -DNOPAE
# use DPI FPU # use DPI FPU
#VL_FLAGS += -DFPU_FAST VL_FLAGS += -DFPU_FAST
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip

View file

@ -206,11 +206,10 @@ void opae_sim::sRxPort_bus() {
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1; vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->block.data(), CACHE_BLOCK_SIZE); memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->block.data(), CACHE_BLOCK_SIZE);
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata; vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
/*printf("*** [vlsim] read-rsp: addr=%ld, mdata=%d, data=", cci_rd_it->addr, cci_rd_it->mdata); /*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i) for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
printf("%02x", cci_rd_it->block[CACHE_BLOCK_SIZE-1-i]); printf("%02x", cci_rd_it->block[CACHE_BLOCK_SIZE-1-i]);
printf("\n");*/ printf("\n");*/
fflush(stdout);
cci_reads_.erase(cci_rd_it); cci_reads_.erase(cci_rd_it);
} }
} }
@ -225,8 +224,7 @@ void opae_sim::sTxPort_bus() {
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata; cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata;
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE); auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE); memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE);
//printf("*** [vlsim] read-req: addr=%ld, mdata=%d\n", vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata); //printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
fflush(stdout);
cci_reads_.emplace_back(cci_req); cci_reads_.emplace_back(cci_req);
} }
@ -265,12 +263,12 @@ void opae_sim::avs_bus() {
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE); memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
uint32_t tag = dram_rd_it->tag; uint32_t tag = dram_rd_it->tag;
dram_reads_.erase(dram_rd_it); dram_reads_.erase(dram_rd_it);
/*printf("%0ld: VLSIM: DRAM rsp: addr=%x, pending={", timestamp, tag); /*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, tag);
for (auto& req : dram_reads_) { for (auto& req : dram_reads_) {
if (req.cycles_left != 0) if (req.cycles_left != 0)
printf(" !%0x", req.tag); printf(" !%0x", req.tag);
else else
printf(" %0x", req.tag); printf(" %0x", req.tag);
} }
printf("}\n");*/ printf("}\n");*/
} }
@ -288,7 +286,8 @@ void opae_sim::avs_bus() {
// process DRAM requests // process DRAM requests
if (!dram_stalled) { if (!dram_stalled) {
if (vortex_afu_->avs_write) { assert(!vortex_afu_->avs_read || !vortex_afu_->avs_write);
if (vortex_afu_->avs_write) {
assert(0 == vortex_afu_->mem_bank_select); assert(0 == vortex_afu_->mem_bank_select);
uint64_t byteen = vortex_afu_->avs_byteenable; uint64_t byteen = vortex_afu_->avs_byteenable;
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE); unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
@ -307,12 +306,12 @@ void opae_sim::avs_bus() {
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data()); ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
dram_req.tag = base_addr; dram_req.tag = base_addr;
dram_reads_.emplace_back(dram_req); dram_reads_.emplace_back(dram_req);
/*printf("%0ld: VLSIM: DRAM req: addr=%x, pending={", timestamp, base_addr); /*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, base_addr);
for (auto& req : dram_reads_) { for (auto& req : dram_reads_) {
if (req.cycles_left != 0) if (req.cycles_left != 0)
printf(" !%0x", req.tag); printf(" !%0x", req.tag);
else else
printf(" %0x", req.tag); printf(" %0x", req.tag);
} }
printf("}\n");*/ printf("}\n");*/
} }

View file

@ -7,11 +7,11 @@
#include <assert.h> #include <assert.h>
#include <cmath> #include <cmath>
#ifdef USE_VLSIM #if defined(USE_FPGA) || defined(USE_ASE)
#include "vlsim/fpga.h"
#else
#include <opae/fpga.h> #include <opae/fpga.h>
#include <uuid/uuid.h> #include <uuid/uuid.h>
#elif defined(USE_VLSIM)
#include "vlsim/fpga.h"
#endif #endif
#include <vortex.h> #include <vortex.h>

View file

@ -1,5 +1,5 @@
CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors #CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
@ -65,7 +65,7 @@ else
endif endif
# use DPI FPU # use DPI FPU
#VL_FLAGS += -DFPU_FAST VL_FLAGS += -DFPU_FAST
PROJECT = libvortex.so PROJECT = libvortex.so
# PROJECT = libvortex.dylib # PROJECT = libvortex.dylib

View file

@ -92,7 +92,7 @@ int run_test(const kernel_arg_t& kernel_arg,
} }
} }
if (errors != 0) { if (errors != 0) {
std::cout << "Found " << errors << " errors!" << std::endl; std::cout << "Found " << std::dec << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl; std::cout << "FAILED!" << std::endl;
return 1; return 1;
} }

View file

@ -260,7 +260,7 @@ int main(int argc, char *argv[]) {
(void*)vx_host_ptr(src1_buf), (void*)vx_host_ptr(src1_buf),
(void*)vx_host_ptr(src2_buf)); (void*)vx_host_ptr(src2_buf));
if (errors != 0) { if (errors != 0) {
std::cout << "found " << errors << " errors!" << std::endl; std::cout << "found " << std::dec << errors << " errors!" << std::endl;
std::cout << "Test" << t << "-" << name << " FAILED!" << std::endl << std::flush; std::cout << "Test" << t << "-" << name << " FAILED!" << std::endl << std::flush;
if (stop_on_error) { if (stop_on_error) {
cleanup(); cleanup();

View file

@ -14,29 +14,36 @@ union Float_t {
} parts; } parts;
}; };
inline float fround(float x, int32_t precision = 4) { inline float fround(float x, int32_t precision = 8) {
auto power_of_10 = std::pow(10, precision); auto power_of_10 = std::pow(10, precision);
return std::round(x * power_of_10) / power_of_10; return std::round(x * power_of_10) / power_of_10;
} }
inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<float>::epsilon()) { inline bool almost_equal_eps(float a, float b, int ulp = 128) {
auto tolerance = std::min(fabs(a), fabs(b)) * eps; auto eps = std::numeric_limits<float>::epsilon() * (std::max(fabs(a), fabs(b)) * ulp);
return fabs(a - b) <= tolerance; auto d = fabs(a - b);
if (d > eps) {
std::cout << "*** almost_equal_eps: d=" << d << ", eps=" << eps << std::endl;
return false;
}
return true;
} }
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) { inline bool almost_equal_ulp(float a, float b, int32_t ulp = 6) {
Float_t fa{a}, fb{b}; Float_t fa{a}, fb{b};
auto d = std::abs(fa.i - fb.i); auto d = std::abs(fa.i - fb.i);
if (d > ulp) { if (d > ulp) {
std::cout << "*** float compare: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl; std::cout << "*** almost_equal_ulp: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl;
return false; return false;
} }
return true; return true;
} }
inline bool almost_equal(float a, float b) { inline bool almost_equal(float a, float b) {
if (almost_equal_eps(a, b)) if (a == b)
return true; return true;
/*if (almost_equal_eps(a, b))
return true;*/
return almost_equal_ulp(a, b); return almost_equal_ulp(a, b);
} }

View file

@ -45,19 +45,19 @@ module VX_avs_wrapper #(
reg [AVS_BANKS_BITS-1:0] avs_bankselect_r; reg [AVS_BANKS_BITS-1:0] avs_bankselect_r;
reg [AVS_BURSTW-1:0] avs_burstcount_r; reg [AVS_BURSTW-1:0] avs_burstcount_r;
wire avs_rtq_push = !dram_req_rw && dram_req_valid && dram_req_ready; wire avs_reqq_push = dram_req_valid && dram_req_ready && !dram_req_rw;
wire avs_rtq_pop = dram_rsp_valid && dram_rsp_ready; wire avs_reqq_pop = dram_rsp_valid && dram_rsp_ready;
wire avs_rdq_push = avs_readdatavalid; wire avs_rspq_push = avs_readdatavalid;
wire avs_rdq_pop = avs_rtq_pop; wire avs_rspq_pop = avs_reqq_pop;
wire avs_rdq_empty; wire avs_rspq_empty;
reg [RD_QUEUE_ADDRW-1:0] avs_pending_reads; reg [RD_QUEUE_ADDRW-1:0] avs_pending_reads;
wire [RD_QUEUE_ADDRW-1:0] avs_pending_reads_n; wire [RD_QUEUE_ADDRW-1:0] avs_pending_reads_n;
assign avs_pending_reads_n = avs_pending_reads assign avs_pending_reads_n = avs_pending_reads
+ RD_QUEUE_ADDRW'((avs_rtq_push && !avs_rdq_pop) ? 1 : + RD_QUEUE_ADDRW'((avs_reqq_push && !avs_rspq_pop) ? 1 :
(avs_rdq_pop && !avs_rtq_push) ? -1 : 0); (avs_rspq_pop && !avs_reqq_push) ? -1 : 0);
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
@ -75,9 +75,9 @@ module VX_avs_wrapper #(
) rd_req_queue ( ) rd_req_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.push (avs_rtq_push), .push (avs_reqq_push),
.pop (avs_reqq_pop),
.data_in (dram_req_tag), .data_in (dram_req_tag),
.pop (avs_rtq_pop),
.data_out (dram_rsp_tag), .data_out (dram_rsp_tag),
`UNUSED_PIN (empty), `UNUSED_PIN (empty),
`UNUSED_PIN (full), `UNUSED_PIN (full),
@ -90,37 +90,38 @@ module VX_avs_wrapper #(
) rd_rsp_queue ( ) rd_rsp_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.push (avs_rdq_push), .push (avs_rspq_push),
.data_in (avs_readdata), .pop (avs_rspq_pop),
.pop (avs_rdq_pop), .data_in (avs_readdata),
.data_out (dram_rsp_data), .data_out (dram_rsp_data),
.empty (avs_rdq_empty), .empty (avs_rspq_empty),
`UNUSED_PIN (full), `UNUSED_PIN (full),
`UNUSED_PIN (size) `UNUSED_PIN (size)
); );
assign avs_read = dram_req_valid && !dram_req_rw; wire rsp_queue_ready = (avs_pending_reads != RD_QUEUE_SIZE);
assign avs_write = dram_req_valid && dram_req_rw;
assign avs_read = dram_req_valid && !dram_req_rw && rsp_queue_ready;
assign avs_write = dram_req_valid && dram_req_rw && rsp_queue_ready;
assign avs_address = dram_req_addr; assign avs_address = dram_req_addr;
assign avs_byteenable = dram_req_byteen; assign avs_byteenable = dram_req_byteen;
assign avs_writedata = dram_req_data; assign avs_writedata = dram_req_data;
assign dram_req_ready = !avs_waitrequest assign dram_req_ready = !avs_waitrequest && rsp_queue_ready;
&& (avs_pending_reads < RD_QUEUE_SIZE);
assign avs_burstcount = avs_burstcount_r; assign avs_burstcount = avs_burstcount_r;
assign avs_bankselect = avs_bankselect_r; assign avs_bankselect = avs_bankselect_r;
assign dram_rsp_valid = !avs_rdq_empty; assign dram_rsp_valid = !avs_rspq_empty;
`ifdef DBG_PRINT_AVS `ifdef DBG_PRINT_AVS
always @(posedge clk) begin always @(posedge clk) begin
if (dram_req_valid && dram_req_ready) begin if (dram_req_valid && dram_req_ready) begin
if (dram_req_rw) if (dram_req_rw)
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, dram_req_tag, avs_writedata); $display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `DRAM_TO_BYTE_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, dram_req_data);
else else
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, dram_req_tag, avs_pending_reads_n); $display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, avs_pending_reads_n);
end end
if (dram_rsp_valid && dram_rsp_ready) begin if (dram_rsp_valid && dram_rsp_ready) begin
$display("%t: AVS Rd Rsp: data=%0h, pending=%0d", $time, avs_readdata, avs_pending_reads_n); $display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, dram_rsp_tag, dram_rsp_data, avs_pending_reads_n);
end end
end end
`endif `endif

View file

@ -501,7 +501,6 @@ wire [AVS_REQ_TAGW-1:0] vx_dram_rsp_tag_unqual;
wire cci_dram_rd_req_valid, cci_dram_wr_req_valid; wire cci_dram_rd_req_valid, cci_dram_wr_req_valid;
wire [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr; wire [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout; wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout;
wire [VX_DRAM_LINE_IDX-1:0] vx_dram_req_idx, vx_dram_rsp_idx;
//-- //--
@ -526,20 +525,19 @@ assign vx_dram_req_valid_qual = vx_dram_req_valid && vx_enabled;
assign vx_dram_req_addr_qual = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH]; assign vx_dram_req_addr_qual = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
assign vx_dram_req_idx = vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]; wire [VX_DRAM_LINE_IDX-1:0] vx_dram_req_idx = vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0];
wire [VX_DRAM_LINE_IDX-1:0] vx_dram_rsp_idx = vx_dram_rsp_tag_unqual[VX_DRAM_LINE_IDX-1:0];
assign vx_dram_req_byteen_qual = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]) << (VX_DRAM_LINE_LW-3)); assign vx_dram_req_byteen_qual = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]) << (VX_DRAM_LINE_LW-3));
assign vx_dram_req_data_qual = DRAM_LINE_WIDTH'(vx_dram_req_data) << ((DRAM_LINE_LW'(vx_dram_req_idx)) << VX_DRAM_LINE_LW); assign vx_dram_req_data_qual = DRAM_LINE_WIDTH'(vx_dram_req_data) << ((DRAM_LINE_LW'(vx_dram_req_idx)) << VX_DRAM_LINE_LW);
assign vx_dram_req_tag_qual = {vx_dram_req_tag, vx_dram_req_idx}; assign vx_dram_req_tag_qual = {vx_dram_req_tag, vx_dram_req_idx};
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual[vx_dram_rsp_idx]; assign vx_dram_rsp_data = vx_dram_rsp_data_unqual[vx_dram_rsp_idx];
end else begin end else begin
assign vx_dram_req_idx = VX_DRAM_LINE_IDX'(0);
assign vx_dram_req_byteen_qual = vx_dram_req_byteen; assign vx_dram_req_byteen_qual = vx_dram_req_byteen;
assign vx_dram_req_tag_qual = vx_dram_req_tag; assign vx_dram_req_tag_qual = vx_dram_req_tag;
assign vx_dram_req_data_qual = vx_dram_req_data; assign vx_dram_req_data_qual = vx_dram_req_data;
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual; assign vx_dram_rsp_data = vx_dram_rsp_data_unqual;
end end
assign vx_dram_rsp_idx = vx_dram_rsp_tag_unqual[VX_DRAM_LINE_IDX-1:0];
assign vx_dram_rsp_tag = vx_dram_rsp_tag_unqual[`VX_DRAM_TAG_WIDTH+VX_DRAM_LINE_IDX-1:VX_DRAM_LINE_IDX]; assign vx_dram_rsp_tag = vx_dram_rsp_tag_unqual[`VX_DRAM_TAG_WIDTH+VX_DRAM_LINE_IDX-1:VX_DRAM_LINE_IDX];
//-- //--
@ -723,15 +721,15 @@ always @(posedge clk) begin
cci_rd_req_wait <= 0; // restart new request batch cci_rd_req_wait <= 0; // restart new request batch
end end
`ifdef DBG_PRINT_OPAE `ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr); $display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data);
`endif `endif
end end
if (cci_rdq_pop) begin /*if (cci_rdq_pop) begin
`ifdef DBG_PRINT_OPAE `ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Queue Pop: pending=%0d", $time, cci_pending_reads_next); $display("%t: CCI Rd Queue Pop: pending=%0d", $time, cci_pending_reads_next);
`endif `endif
end end*/
if (cci_dram_wr_req_fire) begin if (cci_dram_wr_req_fire) begin
cci_dram_wr_req_addr_unqual <= cci_dram_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); cci_dram_wr_req_addr_unqual <= cci_dram_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
@ -836,15 +834,15 @@ begin
cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1); cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1);
cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1); cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE `ifdef DBG_PRINT_OPAE
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next); $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, af2cp_sTxPort.c1.data);
`endif `endif
end end
`ifdef DBG_PRINT_OPAE /*`ifdef DBG_PRINT_OPAE
if (cci_wr_rsp_fire) begin if (cci_wr_rsp_fire) begin
$display("%t: CCI Wr Rsp: pending=%0d", $time, cci_pending_writes_next); $display("%t: CCI Wr Rsp: pending=%0d", $time, cci_pending_writes_next);
end end
`endif `endif*/
if (cci_dram_rd_req_fire) begin if (cci_dram_rd_req_fire) begin
cci_dram_rd_req_addr_unqual <= cci_dram_rd_req_addr_unqual + DRAM_ADDR_WIDTH'(1); cci_dram_rd_req_addr_unqual <= cci_dram_rd_req_addr_unqual + DRAM_ADDR_WIDTH'(1);

View file

@ -304,30 +304,60 @@ module VX_cluster #(
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag; wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag;
wire[`NUM_CORES-1:0] core_snp_fwdin_ready; wire[`NUM_CORES-1:0] core_snp_fwdin_ready;
wire snp_fwd_rsp_valid;
wire [`L2DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr;
wire snp_fwd_rsp_invalidate;
wire [`L2SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag;
wire snp_fwd_rsp_ready;
reg [`L2NUM_REQUESTS-1:0] core_dram_rsp_ready_other;
reg core_dram_rsp_ready_all;
always @(*) begin
core_dram_rsp_ready_other = {`L2NUM_REQUESTS{1'b1}};
core_dram_rsp_ready_all = 1'b1;
for (integer i = 0; i < `L2NUM_REQUESTS; i++) begin
for (integer j = 0; j < `L2NUM_REQUESTS; j++) begin
if (i != j) begin
if (0 == (j & 1))
core_dram_rsp_ready_other[i] &= (per_core_D_dram_rsp_ready [(j/2)] | !core_dram_rsp_valid [j]);
else
core_dram_rsp_ready_other[i] &= (per_core_I_dram_rsp_ready [(j/2)] | !core_dram_rsp_valid [j]);
end
end
if (0 == (i & 1))
core_dram_rsp_ready_all &= (per_core_D_dram_rsp_ready [(i/2)] | !core_dram_rsp_valid [i]);
else
core_dram_rsp_ready_all &= (per_core_I_dram_rsp_ready [(i/2)] | !core_dram_rsp_valid [i]);
end
end
for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
assign core_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)]; assign core_dram_req_valid [i] = per_core_D_dram_req_valid [(i/2)];
assign core_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)]; assign core_dram_req_valid [i+1] = per_core_I_dram_req_valid [(i/2)];
assign core_dram_req_rw [i] = per_core_D_dram_req_rw[(i/2)]; assign core_dram_req_rw [i] = per_core_D_dram_req_rw [(i/2)];
assign core_dram_req_rw [i+1] = per_core_I_dram_req_rw[(i/2)]; assign core_dram_req_rw [i+1] = per_core_I_dram_req_rw [(i/2)];
assign core_dram_req_byteen [i] = per_core_D_dram_req_byteen[(i/2)]; assign core_dram_req_byteen [i] = per_core_D_dram_req_byteen [(i/2)];
assign core_dram_req_byteen [i+1] = per_core_I_dram_req_byteen[(i/2)]; assign core_dram_req_byteen [i+1] = per_core_I_dram_req_byteen [(i/2)];
assign core_dram_req_addr [i] = per_core_D_dram_req_addr[(i/2)]; assign core_dram_req_addr [i] = per_core_D_dram_req_addr [(i/2)];
assign core_dram_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)]; assign core_dram_req_addr [i+1] = per_core_I_dram_req_addr [(i/2)];
assign core_dram_req_data [i] = per_core_D_dram_req_data[(i/2)]; assign core_dram_req_data [i] = per_core_D_dram_req_data [(i/2)];
assign core_dram_req_data [i+1] = per_core_I_dram_req_data[(i/2)]; assign core_dram_req_data [i+1] = per_core_I_dram_req_data [(i/2)];
assign core_dram_req_tag [i] = per_core_D_dram_req_tag[(i/2)]; assign core_dram_req_tag [i] = per_core_D_dram_req_tag [(i/2)];
assign core_dram_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)]; assign core_dram_req_tag [i+1] = per_core_I_dram_req_tag [(i/2)];
assign per_core_D_dram_req_ready [(i/2)] = core_dram_req_ready; assign per_core_D_dram_req_ready [(i/2)] = core_dram_req_ready;
assign per_core_I_dram_req_ready [(i/2)] = core_dram_req_ready; assign per_core_I_dram_req_ready [(i/2)] = core_dram_req_ready;
assign per_core_D_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i] && core_dram_rsp_ready; assign per_core_D_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i] & core_dram_rsp_ready_other [i];
assign per_core_I_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i+1] && core_dram_rsp_ready; assign per_core_I_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i+1] & core_dram_rsp_ready_other [i+1];
assign per_core_D_dram_rsp_data [(i/2)] = core_dram_rsp_data[i]; assign per_core_D_dram_rsp_data [(i/2)] = core_dram_rsp_data[i];
assign per_core_I_dram_rsp_data [(i/2)] = core_dram_rsp_data[i+1]; assign per_core_I_dram_rsp_data [(i/2)] = core_dram_rsp_data[i+1];
@ -346,32 +376,63 @@ module VX_cluster #(
assign per_core_snp_rsp_ready [(i/2)] = core_snp_fwdin_ready [(i/2)]; assign per_core_snp_rsp_ready [(i/2)] = core_snp_fwdin_ready [(i/2)];
end end
assign core_dram_rsp_ready = (& per_core_D_dram_rsp_ready) && (& per_core_I_dram_rsp_ready); assign core_dram_rsp_ready = core_dram_rsp_ready_all;
VX_snp_forwarder #(
.CACHE_ID (`L2CACHE_ID),
.NUM_REQUESTS (`NUM_CORES),
.SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
.SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNRQ_SIZE (`L2SNRQ_SIZE)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_invalidate (snp_req_invalidate),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_fwd_rsp_valid),
.snp_rsp_addr (snp_fwd_rsp_addr),
.snp_rsp_invalidate (snp_fwd_rsp_invalidate),
.snp_rsp_tag (snp_fwd_rsp_tag),
.snp_rsp_ready (snp_fwd_rsp_ready),
.snp_fwdout_valid (core_snp_fwdout_valid),
.snp_fwdout_addr (core_snp_fwdout_addr),
.snp_fwdout_invalidate(core_snp_fwdout_invalidate),
.snp_fwdout_tag (core_snp_fwdout_tag),
.snp_fwdout_ready (core_snp_fwdout_ready),
.snp_fwdin_valid (core_snp_fwdin_valid),
.snp_fwdin_tag (core_snp_fwdin_tag),
.snp_fwdin_ready (core_snp_fwdin_ready)
);
VX_cache #( VX_cache #(
.CACHE_ID (`L2CACHE_ID), .CACHE_ID (`L2CACHE_ID),
.CACHE_SIZE (`L2CACHE_SIZE), .CACHE_SIZE (`L2CACHE_SIZE),
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE), .BANK_LINE_SIZE (`L2BANK_LINE_SIZE),
.NUM_BANKS (`L2NUM_BANKS), .NUM_BANKS (`L2NUM_BANKS),
.WORD_SIZE (`L2WORD_SIZE), .WORD_SIZE (`L2WORD_SIZE),
.NUM_REQUESTS (`L2NUM_REQUESTS), .NUM_REQUESTS (`L2NUM_REQUESTS),
.CREQ_SIZE (`L2CREQ_SIZE), .CREQ_SIZE (`L2CREQ_SIZE),
.MRVQ_SIZE (`L2MRVQ_SIZE), .MRVQ_SIZE (`L2MRVQ_SIZE),
.DRFQ_SIZE (`L2DRFQ_SIZE), .DRFQ_SIZE (`L2DRFQ_SIZE),
.SNRQ_SIZE (`L2SNRQ_SIZE), .SNRQ_SIZE (`L2SNRQ_SIZE),
.CWBQ_SIZE (`L2CWBQ_SIZE), .CWBQ_SIZE (`L2CWBQ_SIZE),
.DREQ_SIZE (`L2DREQ_SIZE), .DREQ_SIZE (`L2DREQ_SIZE),
.SNPQ_SIZE (`L2SNPQ_SIZE), .SNPQ_SIZE (`L2SNPQ_SIZE),
.DRAM_ENABLE (1), .DRAM_ENABLE (1),
.FLUSH_ENABLE (1), .FLUSH_ENABLE (1),
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.SNOOP_FORWARDING (1), .CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH), .CORE_TAG_ID_BITS (0),
.CORE_TAG_ID_BITS (0), .DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH), .SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH)
.NUM_SNP_REQUESTS (`NUM_CORES),
.SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH)
) l2cache ( ) l2cache (
`SCOPE_BIND_VX_cluster_l2cache `SCOPE_BIND_VX_cluster_l2cache
@ -409,29 +470,17 @@ module VX_cluster #(
.dram_rsp_ready (dram_rsp_ready), .dram_rsp_ready (dram_rsp_ready),
// Snoop request // Snoop request
.snp_req_valid (snp_req_valid), .snp_req_valid (snp_fwd_rsp_valid),
.snp_req_addr (snp_req_addr), .snp_req_addr (snp_fwd_rsp_addr),
.snp_req_invalidate (snp_req_invalidate), .snp_req_invalidate (snp_fwd_rsp_invalidate),
.snp_req_tag (snp_req_tag), .snp_req_tag (snp_fwd_rsp_tag),
.snp_req_ready (snp_req_ready), .snp_req_ready (snp_fwd_rsp_ready),
// Snoop response // Snoop response
.snp_rsp_valid (snp_rsp_valid), .snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag), .snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready), .snp_rsp_ready (snp_rsp_ready),
// Snoop forwarding out
.snp_fwdout_valid (core_snp_fwdout_valid),
.snp_fwdout_addr (core_snp_fwdout_addr),
.snp_fwdout_invalidate(core_snp_fwdout_invalidate),
.snp_fwdout_tag (core_snp_fwdout_tag),
.snp_fwdout_ready (core_snp_fwdout_ready),
// Snoop forwarding in
.snp_fwdin_valid (core_snp_fwdin_valid),
.snp_fwdin_tag (core_snp_fwdin_tag),
.snp_fwdin_ready (core_snp_fwdin_ready),
// Miss status // Miss status
`UNUSED_PIN (miss_vec) `UNUSED_PIN (miss_vec)
); );
@ -508,11 +557,12 @@ module VX_cluster #(
if (`NUM_CORES > 1) begin if (`NUM_CORES > 1) begin
VX_snp_forwarder #( VX_snp_forwarder #(
.CACHE_ID (`L2CACHE_ID), .CACHE_ID (`L2CACHE_ID),
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE), .NUM_REQUESTS (`NUM_CORES),
.NUM_REQUESTS (`NUM_CORES), .SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.SNRQ_SIZE (`L2SNRQ_SIZE), .DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
.SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH) .SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNRQ_SIZE (`L2SNRQ_SIZE)
) snp_forwarder ( ) snp_forwarder (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View file

@ -8,7 +8,7 @@
`endif `endif
`ifndef NUM_CORES `ifndef NUM_CORES
`define NUM_CORES 2 `define NUM_CORES 4
`endif `endif
`ifndef NUM_WARPS `ifndef NUM_WARPS
@ -23,8 +23,20 @@
`define NUM_BARRIERS 4 `define NUM_BARRIERS 4
`endif `endif
`ifndef L2_ENABLE
`define L2_ENABLE (`NUM_CORES > 2)
`endif
`ifndef L3_ENABLE
`define L3_ENABLE (`NUM_CLUSTERS > 1)
`endif
`ifndef GLOBAL_BLOCK_SIZE `ifndef GLOBAL_BLOCK_SIZE
`define GLOBAL_BLOCK_SIZE 16 `define GLOBAL_BLOCK_SIZE 64
`endif
`ifndef L1_BLOCK_SIZE
`define L1_BLOCK_SIZE 16
`endif `endif
`ifndef STARTUP_ADDR `ifndef STARTUP_ADDR
@ -57,14 +69,6 @@
`define FRAME_BUFFER_SIZE (FRAME_BUFFER_WIDTH * FRAME_BUFFER_HEIGHT) `define FRAME_BUFFER_SIZE (FRAME_BUFFER_WIDTH * FRAME_BUFFER_HEIGHT)
`ifndef L2_ENABLE
`define L2_ENABLE 0
`endif
`ifndef L3_ENABLE
`define L3_ENABLE (`NUM_CLUSTERS > 1)
`endif
`ifndef EXT_M_DISABLE `ifndef EXT_M_DISABLE
`define EXT_M_ENABLE `define EXT_M_ENABLE
`endif `endif
@ -159,7 +163,7 @@
`define CSR_MIMPID 12'hF13 `define CSR_MIMPID 12'hF13
`define CSR_MHARTID 12'hF14 `define CSR_MHARTID 12'hF14
// Pipeline Queues ============================================================ // Pipeline Queues ////////////////////////////////////////////////////////////
// Size of instruction queue // Size of instruction queue
`ifndef IBUF_SIZE `ifndef IBUF_SIZE
@ -181,28 +185,18 @@
`define FPUQ_SIZE 8 `define FPUQ_SIZE 8
`endif `endif
// Dcache Configurable Knobs ================================================== // Dcache Configurable Knobs //////////////////////////////////////////////////
// Size of cache in bytes // Size of cache in bytes
`ifndef DCACHE_SIZE `ifndef DCACHE_SIZE
`define DCACHE_SIZE 4096 `define DCACHE_SIZE 8192
`endif `endif
// Size of line inside a bank in bytes // Number of banks
`ifndef DBANK_LINE_SIZE
`define DBANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
`endif
// Number of banks {1, 2, 4, 8,...}
`ifndef DNUM_BANKS `ifndef DNUM_BANKS
`define DNUM_BANKS 4 `define DNUM_BANKS 4
`endif `endif
// Size of a word in bytes
`ifndef DWORD_SIZE
`define DWORD_SIZE 4
`endif
// Core Request Queue Size // Core Request Queue Size
`ifndef DCREQ_SIZE `ifndef DCREQ_SIZE
`define DCREQ_SIZE `NUM_WARPS `define DCREQ_SIZE `NUM_WARPS
@ -238,21 +232,11 @@
`define DSNRQ_SIZE 8 `define DSNRQ_SIZE 8
`endif `endif
// Icache Configurable Knobs ================================================== // Icache Configurable Knobs //////////////////////////////////////////////////
// Size of cache in bytes // Size of cache in bytes
`ifndef ICACHE_SIZE `ifndef ICACHE_SIZE
`define ICACHE_SIZE 2048 `define ICACHE_SIZE 8192
`endif
// Size of line inside a bank in bytes
`ifndef IBANK_LINE_SIZE
`define IBANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
`endif
// Size of a word in bytes
`ifndef IWORD_SIZE
`define IWORD_SIZE 4
`endif `endif
// Core Request Queue Size // Core Request Queue Size
@ -280,28 +264,18 @@
`define IDRFQ_SIZE 8 `define IDRFQ_SIZE 8
`endif `endif
// SM Configurable Knobs ====================================================== // SM Configurable Knobs //////////////////////////////////////////////////////
// Size of cache in bytes // Size of cache in bytes
`ifndef SCACHE_SIZE `ifndef SCACHE_SIZE
`define SCACHE_SIZE 1024 `define SCACHE_SIZE 4096
`endif `endif
// Size of line inside a bank in bytes // Number of banks
`ifndef SBANK_LINE_SIZE
`define SBANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
`endif
// Number of banks {1, 2, 4, 8,...}
`ifndef SNUM_BANKS `ifndef SNUM_BANKS
`define SNUM_BANKS 4 `define SNUM_BANKS 4
`endif `endif
// Size of a word in bytes
`ifndef SWORD_SIZE
`define SWORD_SIZE 4
`endif
// Core Request Queue Size // Core Request Queue Size
`ifndef SCREQ_SIZE `ifndef SCREQ_SIZE
`define SCREQ_SIZE `NUM_WARPS `define SCREQ_SIZE `NUM_WARPS
@ -312,28 +286,18 @@
`define SCWBQ_SIZE `SCREQ_SIZE `define SCWBQ_SIZE `SCREQ_SIZE
`endif `endif
// L2cache Configurable Knobs ================================================= // L2cache Configurable Knobs /////////////////////////////////////////////////
// Size of cache in bytes // Size of cache in bytes
`ifndef L2CACHE_SIZE `ifndef L2CACHE_SIZE
`define L2CACHE_SIZE 4096 `define L2CACHE_SIZE 131072
`endif `endif
// Size of line inside a bank in bytes // Number of banks
`ifndef L2BANK_LINE_SIZE
`define L2BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
`endif
// Number of banks {1, 2, 4, 8,...}
`ifndef L2NUM_BANKS `ifndef L2NUM_BANKS
`define L2NUM_BANKS 4 `define L2NUM_BANKS 4
`endif `endif
// Size of a word in bytes
`ifndef L2WORD_SIZE
`define L2WORD_SIZE `L2BANK_LINE_SIZE
`endif
// Core Request Queue Size // Core Request Queue Size
`ifndef L2CREQ_SIZE `ifndef L2CREQ_SIZE
`define L2CREQ_SIZE 8 `define L2CREQ_SIZE 8
@ -369,28 +333,18 @@
`define L2SNPQ_SIZE 8 `define L2SNPQ_SIZE 8
`endif `endif
// L3cache Configurable Knobs ================================================= // L3cache Configurable Knobs /////////////////////////////////////////////////
// Size of cache in bytes // Size of cache in bytes
`ifndef L3CACHE_SIZE `ifndef L3CACHE_SIZE
`define L3CACHE_SIZE 8192 `define L3CACHE_SIZE 262144
`endif `endif
// Size of line inside a bank in bytes // Number of banks
`ifndef L3BANK_LINE_SIZE
`define L3BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
`endif
// Number of banks {1, 2, 4, 8,...}
`ifndef L3NUM_BANKS `ifndef L3NUM_BANKS
`define L3NUM_BANKS 4 `define L3NUM_BANKS 4
`endif `endif
// Size of a word in bytes
`ifndef L3WORD_SIZE
`define L3WORD_SIZE `L3BANK_LINE_SIZE
`endif
// Core Request Queue Size // Core Request Queue Size
`ifndef L3CREQ_SIZE `ifndef L3CREQ_SIZE
`define L3CREQ_SIZE 8 `define L3CREQ_SIZE 8

View file

@ -6,203 +6,203 @@
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`define NW_BITS `LOG2UP(`NUM_WARPS) `define NW_BITS `LOG2UP(`NUM_WARPS)
`define NT_BITS `LOG2UP(`NUM_THREADS) `define NT_BITS `LOG2UP(`NUM_THREADS)
`define NC_BITS `LOG2UP(`NUM_CORES) `define NC_BITS `LOG2UP(`NUM_CORES)
`define NB_BITS `LOG2UP(`NUM_BARRIERS) `define NB_BITS `LOG2UP(`NUM_BARRIERS)
`define REQS_BITS `LOG2UP(NUM_REQUESTS) `define REQS_BITS `LOG2UP(NUM_REQUESTS)
`ifdef EXT_F_ENABLE `ifdef EXT_F_ENABLE
`define NUM_REGS 64 `define NUM_REGS 64
`else `else
`define NUM_REGS 32 `define NUM_REGS 32
`endif `endif
`define NR_BITS `LOG2UP(`NUM_REGS) `define NR_BITS `LOG2UP(`NUM_REGS)
`define CSR_ADDR_BITS 12 `define CSR_ADDR_BITS 12
`define CSR_WIDTH 12 `define CSR_WIDTH 12
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`define INST_LUI 7'b0110111 `define INST_LUI 7'b0110111
`define INST_AUIPC 7'b0010111 `define INST_AUIPC 7'b0010111
`define INST_JAL 7'b1101111 `define INST_JAL 7'b1101111
`define INST_JALR 7'b1100111 `define INST_JALR 7'b1100111
`define INST_B 7'b1100011 // branch instructions `define INST_B 7'b1100011 // branch instructions
`define INST_L 7'b0000011 // load instructions `define INST_L 7'b0000011 // load instructions
`define INST_S 7'b0100011 // store instructions `define INST_S 7'b0100011 // store instructions
`define INST_I 7'b0010011 // immediate instructions `define INST_I 7'b0010011 // immediate instructions
`define INST_R 7'b0110011 // register instructions `define INST_R 7'b0110011 // register instructions
`define INST_F 7'b0001111 // Fence instructions `define INST_F 7'b0001111 // Fence instructions
`define INST_SYS 7'b1110011 // system instructions `define INST_SYS 7'b1110011 // system instructions
`define INST_FL 7'b0000111 // float load instruction `define INST_FL 7'b0000111 // float load instruction
`define INST_FS 7'b0100111 // float store instruction `define INST_FS 7'b0100111 // float store instruction
`define INST_FMADD 7'b1000011 `define INST_FMADD 7'b1000011
`define INST_FMSUB 7'b1000111 `define INST_FMSUB 7'b1000111
`define INST_FNMSUB 7'b1001011 `define INST_FNMSUB 7'b1001011
`define INST_FNMADD 7'b1001111 `define INST_FNMADD 7'b1001111
`define INST_FCI 7'b1010011 // float common instructions `define INST_FCI 7'b1010011 // float common instructions
`define INST_GPU 7'b1101011 `define INST_GPU 7'b1101011
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`define BYTEEN_SB 3'h0 `define BYTEEN_SB 3'h0
`define BYTEEN_SH 3'h1 `define BYTEEN_SH 3'h1
`define BYTEEN_SW 3'h2 `define BYTEEN_SW 3'h2
`define BYTEEN_UB 3'h4 `define BYTEEN_UB 3'h4
`define BYTEEN_UH 3'h5 `define BYTEEN_UH 3'h5
`define BYTEEN_BITS 3 `define BYTEEN_BITS 3
`define BYTEEN_TYPE(x) x[1:0] `define BYTEEN_TYPE(x) x[1:0]
`define FRM_RNE 3'b000 // round to nearest even `define FRM_RNE 3'b000 // round to nearest even
`define FRM_RTZ 3'b001 // round to zero `define FRM_RTZ 3'b001 // round to zero
`define FRM_RDN 3'b010 // round to -inf `define FRM_RDN 3'b010 // round to -inf
`define FRM_RUP 3'b011 // round to +inf `define FRM_RUP 3'b011 // round to +inf
`define FRM_RMM 3'b100 // round to nearest max magnitude `define FRM_RMM 3'b100 // round to nearest max magnitude
`define FRM_DYN 3'b111 // dynamic mode `define FRM_DYN 3'b111 // dynamic mode
`define FRM_BITS 3 `define FRM_BITS 3
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`define EX_NOP 3'h0 `define EX_NOP 3'h0
`define EX_ALU 3'h1 `define EX_ALU 3'h1
`define EX_LSU 3'h2 `define EX_LSU 3'h2
`define EX_CSR 3'h3 `define EX_CSR 3'h3
`define EX_MUL 3'h4 `define EX_MUL 3'h4
`define EX_FPU 3'h5 `define EX_FPU 3'h5
`define EX_GPU 3'h6 `define EX_GPU 3'h6
`define EX_BITS 3 `define EX_BITS 3
`define NUM_EXS 6 `define NUM_EXS 6
`define NE_BITS `LOG2UP(`NUM_EXS) `define NE_BITS `LOG2UP(`NUM_EXS)
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`define OP_BITS 4 `define OP_BITS 4
`define MOD_BITS 3 `define MOD_BITS 3
`define ALU_ADD 4'b0000 `define ALU_ADD 4'b0000
`define ALU_LUI 4'b0010 `define ALU_LUI 4'b0010
`define ALU_AUIPC 4'b0011 `define ALU_AUIPC 4'b0011
`define ALU_SLTU 4'b0100 `define ALU_SLTU 4'b0100
`define ALU_SLT 4'b0101 `define ALU_SLT 4'b0101
`define ALU_SRL 4'b1000 `define ALU_SRL 4'b1000
`define ALU_SRA 4'b1001 `define ALU_SRA 4'b1001
`define ALU_SUB 4'b1011 `define ALU_SUB 4'b1011
`define ALU_AND 4'b1100 `define ALU_AND 4'b1100
`define ALU_OR 4'b1101 `define ALU_OR 4'b1101
`define ALU_XOR 4'b1110 `define ALU_XOR 4'b1110
`define ALU_SLL 4'b1111 `define ALU_SLL 4'b1111
`define ALU_OTHER 4'b0111 `define ALU_OTHER 4'b0111
`define ALU_BITS 4 `define ALU_BITS 4
`define ALU_OP(x) x[`ALU_BITS-1:0] `define ALU_OP(x) x[`ALU_BITS-1:0]
`define ALU_OP_CLASS(x) x[3:2] `define ALU_OP_CLASS(x) x[3:2]
`define ALU_SIGNED(x) x[0] `define ALU_SIGNED(x) x[0]
`define BR_EQ 4'b0000 `define BR_EQ 4'b0000
`define BR_NE 4'b0010 `define BR_NE 4'b0010
`define BR_LTU 4'b0100 `define BR_LTU 4'b0100
`define BR_GEU 4'b0110 `define BR_GEU 4'b0110
`define BR_LT 4'b0101 `define BR_LT 4'b0101
`define BR_GE 4'b0111 `define BR_GE 4'b0111
`define BR_JAL 4'b1000 `define BR_JAL 4'b1000
`define BR_JALR 4'b1001 `define BR_JALR 4'b1001
`define BR_ECALL 4'b1010 `define BR_ECALL 4'b1010
`define BR_EBREAK 4'b1011 `define BR_EBREAK 4'b1011
`define BR_MRET 4'b1100 `define BR_MRET 4'b1100
`define BR_SRET 4'b1101 `define BR_SRET 4'b1101
`define BR_DRET 4'b1110 `define BR_DRET 4'b1110
`define BR_OTHER 4'b1111 `define BR_OTHER 4'b1111
`define BR_BITS 4 `define BR_BITS 4
`define BR_OP(x) x[`BR_BITS-1:0] `define BR_OP(x) x[`BR_BITS-1:0]
`define BR_NEG(x) x[1] `define BR_NEG(x) x[1]
`define BR_LESS(x) x[2] `define BR_LESS(x) x[2]
`define BR_STATIC(x) x[3] `define BR_STATIC(x) x[3]
`define ALU_BR_BITS 4 `define ALU_BR_BITS 4
`define ALU_BR_OP(x) x[`ALU_BR_BITS-1:0] `define ALU_BR_OP(x) x[`ALU_BR_BITS-1:0]
`define IS_BR_MOD(x) x[0] `define IS_BR_MOD(x) x[0]
`define LSU_LB {1'b0, `BYTEEN_SB} `define LSU_LB {1'b0, `BYTEEN_SB}
`define LSU_LH {1'b0, `BYTEEN_SH} `define LSU_LH {1'b0, `BYTEEN_SH}
`define LSU_LW {1'b0, `BYTEEN_SW} `define LSU_LW {1'b0, `BYTEEN_SW}
`define LSU_LBU {1'b0, `BYTEEN_UB} `define LSU_LBU {1'b0, `BYTEEN_UB}
`define LSU_LHU {1'b0, `BYTEEN_UH} `define LSU_LHU {1'b0, `BYTEEN_UH}
`define LSU_SB {1'b1, `BYTEEN_SB} `define LSU_SB {1'b1, `BYTEEN_SB}
`define LSU_SH {1'b1, `BYTEEN_SH} `define LSU_SH {1'b1, `BYTEEN_SH}
`define LSU_SW {1'b1, `BYTEEN_SW} `define LSU_SW {1'b1, `BYTEEN_SW}
`define LSU_SBU {1'b1, `BYTEEN_UB} `define LSU_SBU {1'b1, `BYTEEN_UB}
`define LSU_SHU {1'b1, `BYTEEN_UH} `define LSU_SHU {1'b1, `BYTEEN_UH}
`define LSU_BITS 4 `define LSU_BITS 4
`define LSU_RW(x) x[3] `define LSU_RW(x) x[3]
`define LSU_BE(x) x[2:0] `define LSU_BE(x) x[2:0]
`define CSR_RW 2'h0 `define CSR_RW 2'h0
`define CSR_RS 2'h1 `define CSR_RS 2'h1
`define CSR_RC 2'h2 `define CSR_RC 2'h2
`define CSR_OTHER 2'h3 `define CSR_OTHER 2'h3
`define CSR_BITS 2 `define CSR_BITS 2
`define CSR_OP(x) x[`CSR_BITS-1:0] `define CSR_OP(x) x[`CSR_BITS-1:0]
`define MUL_MUL 3'h0 `define MUL_MUL 3'h0
`define MUL_MULH 3'h1 `define MUL_MULH 3'h1
`define MUL_MULHSU 3'h2 `define MUL_MULHSU 3'h2
`define MUL_MULHU 3'h3 `define MUL_MULHU 3'h3
`define MUL_DIV 3'h4 `define MUL_DIV 3'h4
`define MUL_DIVU 3'h5 `define MUL_DIVU 3'h5
`define MUL_REM 3'h6 `define MUL_REM 3'h6
`define MUL_REMU 3'h7 `define MUL_REMU 3'h7
`define MUL_BITS 3 `define MUL_BITS 3
`define MUL_OP(x) x[`MUL_BITS-1:0] `define MUL_OP(x) x[`MUL_BITS-1:0]
`define IS_DIV_OP(x) x[2] `define IS_DIV_OP(x) x[2]
`define FPU_ADD 4'h0 `define FPU_ADD 4'h0
`define FPU_SUB 4'h1 `define FPU_SUB 4'h1
`define FPU_MUL 4'h2 `define FPU_MUL 4'h2
`define FPU_DIV 4'h3 `define FPU_DIV 4'h3
`define FPU_SQRT 4'h4 `define FPU_SQRT 4'h4
`define FPU_MADD 4'h5 `define FPU_MADD 4'h5
`define FPU_MSUB 4'h6 `define FPU_MSUB 4'h6
`define FPU_NMSUB 4'h7 `define FPU_NMSUB 4'h7
`define FPU_NMADD 4'h8 `define FPU_NMADD 4'h8
`define FPU_CVTWS 4'h9 // FCVT.W.S `define FPU_CVTWS 4'h9 // FCVT.W.S
`define FPU_CVTWUS 4'hA // FCVT.WU.S `define FPU_CVTWUS 4'hA // FCVT.WU.S
`define FPU_CVTSW 4'hB // FCVT.S.W `define FPU_CVTSW 4'hB // FCVT.S.W
`define FPU_CVTSWU 4'hC // FCVT.S.WU `define FPU_CVTSWU 4'hC // FCVT.S.WU
`define FPU_CLASS 4'hD `define FPU_CLASS 4'hD
`define FPU_CMP 4'hE `define FPU_CMP 4'hE
`define FPU_MISC 4'hF // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX `define FPU_MISC 4'hF // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
`define FPU_BITS 4 `define FPU_BITS 4
`define FPU_OP(x) x[`FPU_BITS-1:0] `define FPU_OP(x) x[`FPU_BITS-1:0]
`define GPU_TMC 3'h0 `define GPU_TMC 3'h0
`define GPU_WSPAWN 3'h1 `define GPU_WSPAWN 3'h1
`define GPU_SPLIT 3'h2 `define GPU_SPLIT 3'h2
`define GPU_JOIN 3'h3 `define GPU_JOIN 3'h3
`define GPU_BAR 3'h4 `define GPU_BAR 3'h4
`define GPU_OTHER 3'h7 `define GPU_OTHER 3'h7
`define GPU_BITS 3 `define GPU_BITS 3
`define GPU_OP(x) x[`GPU_BITS-1:0] `define GPU_OP(x) x[`GPU_BITS-1:0]
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`ifdef EXT_M_ENABLE `ifdef EXT_M_ENABLE
`define ISA_EXT_M (1 << 12) `define ISA_EXT_M (1 << 12)
`else `else
`define ISA_EXT_M 0 `define ISA_EXT_M 0
`endif `endif
`ifdef EXT_F_ENABLE `ifdef EXT_F_ENABLE
`define ISA_EXT_F (1 << 5) `define ISA_EXT_F (1 << 5)
`else `else
`define ISA_EXT_F 0 `define ISA_EXT_F 0
`endif `endif
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \ `define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
@ -234,144 +234,174 @@
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`ifdef DBG_CACHE_REQ_INFO // pc, rd, wid `ifdef DBG_CACHE_REQ_INFO // pc, rd, wid
`define DBG_CACHE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS) `define DBG_CACHE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS)
`else `else
`define DBG_CACHE_REQ_MDATAW 0 `define DBG_CACHE_REQ_MDATAW 0
`endif `endif
////////////////////////// Dcache Configurable Knobs ////////////////////////// ////////////////////////// Dcache Configurable Knobs //////////////////////////
// Cache ID // Cache ID
`define DCACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0 `define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
// Block size in bytes
`define DBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
// Word size in bytes
`define DWORD_SIZE 4
// TAG sharing enable // TAG sharing enable
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE) `define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
// Core request tag bits // Core request tag bits
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS) `define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
// DRAM request data bits // DRAM request data bits
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8) `define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
// DRAM request address bits // DRAM request address bits
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE)) `define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE))
// DRAM byte enable bits // DRAM byte enable bits
`define DDRAM_BYTEEN_WIDTH `DBANK_LINE_SIZE `define DDRAM_BYTEEN_WIDTH `DBANK_LINE_SIZE
// DRAM request tag bits // DRAM request tag bits
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH `define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Core request size
`define DNUM_REQUESTS `NUM_THREADS `define DNUM_REQUESTS `NUM_THREADS
// Snoop request tag bits // Snoop request tag bits
`define DSNP_TAG_WIDTH ((`NUM_CORES > 1) ? `LOG2UP(`L2SNRQ_SIZE) : `L2SNP_TAG_WIDTH) `define DSNP_TAG_WIDTH ((`NUM_CORES > 1) ? `LOG2UP(`L2SNRQ_SIZE) : `L2SNP_TAG_WIDTH)
////////////////////////// Icache Configurable Knobs ////////////////////////// ////////////////////////// Icache Configurable Knobs //////////////////////////
// Cache ID // Cache ID
`define ICACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1 `define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
// Block size in bytes
`define IBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
// Word size in bytes
`define IWORD_SIZE 4
// Number of banks // Number of banks
`define INUM_BANKS 1 `define INUM_BANKS 1
// Core request address bits // Core request address bits
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE)) `define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
// Core request byte enable bits // Core request byte enable bits
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE `define ICORE_BYTEEN_WIDTH `DWORD_SIZE
// TAG sharing enable // TAG sharing enable
`define ICORE_TAG_ID_BITS `NW_BITS `define ICORE_TAG_ID_BITS `NW_BITS
// Core request tag bits // Core request tag bits
`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS) `define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
// DRAM request data bits // DRAM request data bits
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8) `define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)
// DRAM request address bits // DRAM request address bits
`define IDRAM_ADDR_WIDTH (32 - `CLOG2(`IBANK_LINE_SIZE)) `define IDRAM_ADDR_WIDTH (32 - `CLOG2(`IBANK_LINE_SIZE))
// DRAM byte enable bits // DRAM byte enable bits
`define IDRAM_BYTEEN_WIDTH `IBANK_LINE_SIZE `define IDRAM_BYTEEN_WIDTH `IBANK_LINE_SIZE
// DRAM request tag bits // DRAM request tag bits
`define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH `define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Core request size
`define INUM_REQUESTS 1 `define INUM_REQUESTS 1
////////////////////////// SM Configurable Knobs ////////////////////////////// ////////////////////////// SM Configurable Knobs //////////////////////////////
// Cache ID // Cache ID
`define SCACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2 `define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Block size in bytes
`define SNUM_REQUESTS `NUM_THREADS `define SBANK_LINE_SIZE 4
// Word size in bytes
`define SWORD_SIZE 4
// Core request size
`define SNUM_REQUESTS `NUM_THREADS
// DRAM request address bits // DRAM request address bits
`define SDRAM_ADDR_WIDTH (32 - `CLOG2(`SBANK_LINE_SIZE)) `define SDRAM_ADDR_WIDTH (32 - `CLOG2(`SBANK_LINE_SIZE))
// DRAM request tag bits // DRAM request tag bits
`define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH `define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Core request size
`define SNUM_REQUESTS `NUM_THREADS `define SNUM_REQUESTS `NUM_THREADS
////////////////////////// L2cache Configurable Knobs ///////////////////////// ////////////////////////// L2cache Configurable Knobs /////////////////////////
// Cache ID // Cache ID
`define L2CACHE_ID 32'(`L3_ENABLE) + CLUSTER_ID `define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
// Block size in bytes
`define L2BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
// Word size in bytes
`define L2WORD_SIZE `DBANK_LINE_SIZE
// Core request tag bits // Core request tag bits
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES)) `define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
// DRAM request data bits // DRAM request data bits
`define L2DRAM_LINE_WIDTH (`L2_ENABLE ? (`L2BANK_LINE_SIZE * 8) : `DDRAM_LINE_WIDTH) `define L2DRAM_LINE_WIDTH (`L2BANK_LINE_SIZE * 8)
// DRAM request address bits // DRAM request address bits
`define L2DRAM_ADDR_WIDTH (`L2_ENABLE ? (32 - `CLOG2(`L2BANK_LINE_SIZE)) : `DDRAM_ADDR_WIDTH) `define L2DRAM_ADDR_WIDTH (32 - `CLOG2(`L2BANK_LINE_SIZE))
// DRAM byte enable bits // DRAM byte enable bits
`define L2DRAM_BYTEEN_WIDTH (`L2_ENABLE ? `L2BANK_LINE_SIZE : `DDRAM_BYTEEN_WIDTH) `define L2DRAM_BYTEEN_WIDTH `L2BANK_LINE_SIZE
// DRAM request tag bits // DRAM request tag bits
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2))) `define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
// Snoop request tag bits // Snoop request tag bits
`define L2SNP_TAG_WIDTH (`L3_ENABLE ? `LOG2UP(`L3SNRQ_SIZE) : `L3SNP_TAG_WIDTH) `define L2SNP_TAG_WIDTH (`L3_ENABLE ? `LOG2UP(`L3SNRQ_SIZE) : `L3SNP_TAG_WIDTH)
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Core request size
`define L2NUM_REQUESTS (2 * `NUM_CORES) `define L2NUM_REQUESTS (2 * `NUM_CORES)
////////////////////////// L3cache Configurable Knobs ///////////////////////// ////////////////////////// L3cache Configurable Knobs /////////////////////////
// Cache ID // Cache ID
`define L3CACHE_ID 0 `define L3CACHE_ID 0
// Block size in bytes
`define L3BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
// Word size in bytes
`define L3WORD_SIZE `L2BANK_LINE_SIZE
// Core request tag bits // Core request tag bits
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS)) `define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
// DRAM request data bits // DRAM request data bits
`define L3DRAM_LINE_WIDTH (`L3_ENABLE ? (`L3BANK_LINE_SIZE * 8) : `L2DRAM_LINE_WIDTH) `define L3DRAM_LINE_WIDTH (`L3BANK_LINE_SIZE * 8)
// DRAM request address bits // DRAM request address bits
`define L3DRAM_ADDR_WIDTH (`L3_ENABLE ? (32 - `CLOG2(`L3BANK_LINE_SIZE)) : `L2DRAM_ADDR_WIDTH) `define L3DRAM_ADDR_WIDTH (32 - `CLOG2(`L3BANK_LINE_SIZE))
// DRAM byte enable bits // DRAM byte enable bits
`define L3DRAM_BYTEEN_WIDTH (`L3_ENABLE ? `L3BANK_LINE_SIZE : `L2DRAM_BYTEEN_WIDTH) `define L3DRAM_BYTEEN_WIDTH `L3BANK_LINE_SIZE
// DRAM request tag bits // DRAM request tag bits
`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH) `define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
// Snoop request tag bits // Snoop request tag bits
`define L3SNP_TAG_WIDTH 16 `define L3SNP_TAG_WIDTH 16
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Core request size
`define L3NUM_REQUESTS `NUM_CLUSTERS `define L3NUM_REQUESTS `NUM_CLUSTERS
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////

View file

@ -168,9 +168,9 @@ module VX_ibuffer #(
for (integer i = 0; i < `NUM_WARPS; i++) begin for (integer i = 0; i < `NUM_WARPS; i++) begin
nw += 32'(q_size[i] != 0); nw += 32'(q_size[i] != 0);
end end
assert(nw == 32'(num_warps)) else $display("%t: error: invalid num_warps: nw=%0d, ref=%0d", $time, num_warps, nw); assert(nw == 32'(num_warps)) else $error("%t: error: invalid num_warps: nw=%0d, ref=%0d", $time, num_warps, nw);
assert(~deq_valid || (q_size[deq_wid] != 0)) else $display("%t: error: invalid schedule: wid=%0d", $time, deq_wid); assert(~deq_valid || (q_size[deq_wid] != 0)) else $error("%t: error: invalid schedule: wid=%0d", $time, deq_wid);
assert(~deq_fire || (q_size[deq_wid] != 0)) else $display("%t: error: invalid dequeu: wid=%0d", $time, deq_wid); assert(~deq_fire || (q_size[deq_wid] != 0)) else $error("%t: error: invalid dequeu: wid=%0d", $time, deq_wid);
end end
`endif `endif
end end

View file

@ -75,7 +75,6 @@ module VX_mem_unit # (
.DRAM_ENABLE (0), .DRAM_ENABLE (0),
.FLUSH_ENABLE (0), .FLUSH_ENABLE (0),
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.SNOOP_FORWARDING (0),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH) .DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH)
@ -127,44 +126,31 @@ module VX_mem_unit # (
`UNUSED_PIN (snp_rsp_tag), `UNUSED_PIN (snp_rsp_tag),
.snp_rsp_ready (1'b0), .snp_rsp_ready (1'b0),
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready),
// Miss status // Miss status
`UNUSED_PIN (miss_vec) `UNUSED_PIN (miss_vec)
); );
VX_cache #( VX_cache #(
.CACHE_ID (`DCACHE_ID), .CACHE_ID (`DCACHE_ID),
.CACHE_SIZE (`DCACHE_SIZE), .CACHE_SIZE (`DCACHE_SIZE),
.BANK_LINE_SIZE (`DBANK_LINE_SIZE), .BANK_LINE_SIZE (`DBANK_LINE_SIZE),
.NUM_BANKS (`DNUM_BANKS), .NUM_BANKS (`DNUM_BANKS),
.WORD_SIZE (`DWORD_SIZE), .WORD_SIZE (`DWORD_SIZE),
.NUM_REQUESTS (`DNUM_REQUESTS), .NUM_REQUESTS (`DNUM_REQUESTS),
.CREQ_SIZE (`DCREQ_SIZE), .CREQ_SIZE (`DCREQ_SIZE),
.MRVQ_SIZE (`DMRVQ_SIZE), .MRVQ_SIZE (`DMRVQ_SIZE),
.DRFQ_SIZE (`DDRFQ_SIZE), .DRFQ_SIZE (`DDRFQ_SIZE),
.SNRQ_SIZE (`DSNRQ_SIZE), .SNRQ_SIZE (`DSNRQ_SIZE),
.CWBQ_SIZE (`DCWBQ_SIZE), .CWBQ_SIZE (`DCWBQ_SIZE),
.DREQ_SIZE (`DDREQ_SIZE), .DREQ_SIZE (`DDREQ_SIZE),
.SNPQ_SIZE (`DSNPQ_SIZE), .SNPQ_SIZE (`DSNPQ_SIZE),
.DRAM_ENABLE (1), .DRAM_ENABLE (1),
.FLUSH_ENABLE (1), .FLUSH_ENABLE (1),
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.SNOOP_FORWARDING (0), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH), .SNP_TAG_WIDTH (`DSNP_TAG_WIDTH)
.SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH)
) dcache ( ) dcache (
`SCOPE_BIND_VX_mem_unit_dcache `SCOPE_BIND_VX_mem_unit_dcache
@ -212,18 +198,6 @@ module VX_mem_unit # (
.snp_rsp_valid (dcache_snp_rsp_if.valid), .snp_rsp_valid (dcache_snp_rsp_if.valid),
.snp_rsp_tag (dcache_snp_rsp_if.tag), .snp_rsp_tag (dcache_snp_rsp_if.tag),
.snp_rsp_ready (dcache_snp_rsp_if.ready), .snp_rsp_ready (dcache_snp_rsp_if.ready),
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready),
// Miss status // Miss status
`UNUSED_PIN (miss_vec) `UNUSED_PIN (miss_vec)
@ -246,7 +220,6 @@ module VX_mem_unit # (
.DRAM_ENABLE (1), .DRAM_ENABLE (1),
.FLUSH_ENABLE (0), .FLUSH_ENABLE (0),
.WRITE_ENABLE (0), .WRITE_ENABLE (0),
.SNOOP_FORWARDING (0),
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH), .CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS), .CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH) .DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
@ -298,18 +271,6 @@ module VX_mem_unit # (
`UNUSED_PIN (snp_rsp_tag), `UNUSED_PIN (snp_rsp_tag),
.snp_rsp_ready (1'b0), .snp_rsp_ready (1'b0),
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready),
// Miss status // Miss status
`UNUSED_PIN (miss_vec) `UNUSED_PIN (miss_vec)
); );

View file

@ -320,56 +320,70 @@ module Vortex (
// L3 Cache /////////////////////////////////////////////////////////// // L3 Cache ///////////////////////////////////////////////////////////
wire [`L3NUM_REQUESTS-1:0] cluster_dram_req_valid;
wire [`L3NUM_REQUESTS-1:0] cluster_dram_req_rw;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] cluster_dram_req_byteen;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] cluster_dram_req_addr;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_req_data;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_req_tag;
wire [`L3NUM_REQUESTS-1:0] cluster_dram_rsp_valid; wire [`L3NUM_REQUESTS-1:0] cluster_dram_rsp_valid;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_rsp_data; wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_rsp_data;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_rsp_tag; wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_rsp_tag;
wire cluster_dram_rsp_ready; wire cluster_dram_rsp_ready;
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_valid; wire snp_fwd_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] cluster_snp_fwdout_addr; wire [`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr;
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_invalidate; wire snp_fwd_rsp_invalidate;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] cluster_snp_fwdout_tag; wire [`L3SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag;
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_ready; wire snp_fwd_rsp_ready;
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdin_valid; reg [`L3NUM_REQUESTS-1:0] cluster_dram_rsp_ready_other;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] cluster_snp_fwdin_tag;
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdin_ready;
for (genvar i = 0; i < `L3NUM_REQUESTS; i++) begin always @(*) begin
// Core Request cluster_dram_rsp_ready_other = {`L3NUM_REQUESTS{1'b1}};
assign cluster_dram_req_valid [i] = per_cluster_dram_req_valid [i]; for (integer i = 0; i < `L3NUM_REQUESTS; i++) begin
assign cluster_dram_req_rw [i] = per_cluster_dram_req_rw [i]; for (integer j = 0; j < `L3NUM_REQUESTS; j++) begin
assign cluster_dram_req_byteen [i] = per_cluster_dram_req_byteen[i]; if (i != j)
assign cluster_dram_req_addr [i] = per_cluster_dram_req_addr [i]; cluster_dram_rsp_ready_other[i] &= (per_cluster_dram_rsp_ready [j] | !cluster_dram_rsp_valid [j]);
assign cluster_dram_req_tag [i] = per_cluster_dram_req_tag [i]; end
assign cluster_dram_req_data [i] = per_cluster_dram_req_data [i]; end
// Core Response
assign per_cluster_dram_rsp_valid [i] = cluster_dram_rsp_valid [i] && cluster_dram_rsp_ready;
assign per_cluster_dram_rsp_data [i] = cluster_dram_rsp_data [i];
assign per_cluster_dram_rsp_tag [i] = cluster_dram_rsp_tag [i];
// Snoop Forwarding out
assign per_cluster_snp_req_valid [i] = cluster_snp_fwdout_valid[i];
assign per_cluster_snp_req_addr [i] = cluster_snp_fwdout_addr[i];
assign per_cluster_snp_req_invalidate [i] = cluster_snp_fwdout_invalidate[i];
assign per_cluster_snp_req_tag [i] = cluster_snp_fwdout_tag[i];
assign cluster_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i];
// Snoop Forwarding in
assign cluster_snp_fwdin_valid [i] = per_cluster_snp_rsp_valid [i];
assign cluster_snp_fwdin_tag [i] = per_cluster_snp_rsp_tag [i];
assign per_cluster_snp_rsp_ready [i] = cluster_snp_fwdin_ready [i];
end end
assign cluster_dram_rsp_ready = (& per_cluster_dram_rsp_ready); for (genvar i = 0; i < `L3NUM_REQUESTS; i++) begin
// Core Response
assign per_cluster_dram_rsp_valid [i] = cluster_dram_rsp_valid [i] & cluster_dram_rsp_ready_other [i];
assign per_cluster_dram_rsp_data [i] = cluster_dram_rsp_data [i];
assign per_cluster_dram_rsp_tag [i] = cluster_dram_rsp_tag [i];
end
assign cluster_dram_rsp_ready = & (per_cluster_dram_rsp_ready | ~cluster_dram_rsp_valid);
VX_snp_forwarder #(
.CACHE_ID (`L3CACHE_ID),
.NUM_REQUESTS (`NUM_CLUSTERS),
.SRC_ADDR_WIDTH (`L3DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.SNP_TAG_WIDTH (`L3SNP_TAG_WIDTH),
.SNRQ_SIZE (`L3SNRQ_SIZE)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_invalidate (snp_req_invalidate),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_fwd_rsp_valid),
.snp_rsp_addr (snp_fwd_rsp_addr),
.snp_rsp_invalidate (snp_fwd_rsp_invalidate),
.snp_rsp_tag (snp_fwd_rsp_tag),
.snp_rsp_ready (snp_fwd_rsp_ready),
.snp_fwdout_valid (per_cluster_snp_req_valid),
.snp_fwdout_addr (per_cluster_snp_req_addr),
.snp_fwdout_invalidate(per_cluster_snp_req_invalidate),
.snp_fwdout_tag (per_cluster_snp_req_tag),
.snp_fwdout_ready (per_cluster_snp_req_ready),
.snp_fwdin_valid (per_cluster_snp_rsp_valid),
.snp_fwdin_tag (per_cluster_snp_rsp_tag),
.snp_fwdin_ready (per_cluster_snp_rsp_ready)
);
VX_cache #( VX_cache #(
.CACHE_ID (`L3CACHE_ID), .CACHE_ID (`L3CACHE_ID),
@ -388,13 +402,10 @@ module Vortex (
.DRAM_ENABLE (1), .DRAM_ENABLE (1),
.FLUSH_ENABLE (1), .FLUSH_ENABLE (1),
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.SNOOP_FORWARDING (1),
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH), .CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0), .CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH), .DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH),
.NUM_SNP_REQUESTS (`NUM_CLUSTERS), .SNP_TAG_WIDTH (`L3SNP_TAG_WIDTH)
.SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH)
) l3cache ( ) l3cache (
`SCOPE_BIND_Vortex_l3cache `SCOPE_BIND_Vortex_l3cache
@ -402,12 +413,12 @@ module Vortex (
.reset (reset), .reset (reset),
// Core request // Core request
.core_req_valid (cluster_dram_req_valid), .core_req_valid (per_cluster_dram_req_valid),
.core_req_rw (cluster_dram_req_rw), .core_req_rw (per_cluster_dram_req_rw),
.core_req_byteen (cluster_dram_req_byteen), .core_req_byteen (per_cluster_dram_req_byteen),
.core_req_addr (cluster_dram_req_addr), .core_req_addr (per_cluster_dram_req_addr),
.core_req_data (cluster_dram_req_data), .core_req_data (per_cluster_dram_req_data),
.core_req_tag (cluster_dram_req_tag), .core_req_tag (per_cluster_dram_req_tag),
.core_req_ready (cluster_dram_req_ready), .core_req_ready (cluster_dram_req_ready),
// Core response // Core response
@ -432,29 +443,17 @@ module Vortex (
.dram_rsp_ready (dram_rsp_ready), .dram_rsp_ready (dram_rsp_ready),
// Snoop request // Snoop request
.snp_req_valid (snp_req_valid), .snp_req_valid (snp_fwd_rsp_valid),
.snp_req_addr (snp_req_addr), .snp_req_addr (snp_fwd_rsp_addr),
.snp_req_invalidate (snp_req_invalidate), .snp_req_invalidate (snp_fwd_rsp_invalidate),
.snp_req_tag (snp_req_tag), .snp_req_tag (snp_fwd_rsp_tag),
.snp_req_ready (snp_req_ready), .snp_req_ready (snp_fwd_rsp_ready),
// Snoop response // Snoop response
.snp_rsp_valid (snp_rsp_valid), .snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag), .snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready), .snp_rsp_ready (snp_rsp_ready),
// Snoop forwarding out
.snp_fwdout_valid (cluster_snp_fwdout_valid),
.snp_fwdout_addr (cluster_snp_fwdout_addr),
.snp_fwdout_invalidate(cluster_snp_fwdout_invalidate),
.snp_fwdout_tag (cluster_snp_fwdout_tag),
.snp_fwdout_ready (cluster_snp_fwdout_ready),
// Snoop forwarding in
.snp_fwdin_valid (cluster_snp_fwdin_valid),
.snp_fwdin_tag (cluster_snp_fwdin_tag),
.snp_fwdin_ready (cluster_snp_fwdin_ready),
// Miss status // Miss status
`UNUSED_PIN (miss_vec) `UNUSED_PIN (miss_vec)
); );
@ -497,4 +496,11 @@ module Vortex (
end end
`endif `endif
`ifndef NDEBUG
always @(posedge clk) begin
$fflush(); // flush stdout buffer
end
`endif
endmodule endmodule

View file

@ -47,7 +47,7 @@ module VX_bank #(
parameter CORE_TAG_ID_BITS = 0, parameter CORE_TAG_ID_BITS = 0,
// Snooping request tag width // Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 1 parameter SNP_TAG_WIDTH = 1
) ( ) (
`SCOPE_IO_VX_bank `SCOPE_IO_VX_bank
@ -88,12 +88,12 @@ module VX_bank #(
input wire snp_req_valid, input wire snp_req_valid,
input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr, input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_invalidate, input wire snp_req_invalidate,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag, input wire [SNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready, output wire snp_req_ready,
// Snoop Response // Snoop Response
output wire snp_rsp_valid, output wire snp_rsp_valid,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag, output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready, input wire snp_rsp_ready,
// Misses // Misses
@ -142,13 +142,13 @@ module VX_bank #(
wire [`LINE_ADDR_WIDTH-1:0] snrq_addr_st0; wire [`LINE_ADDR_WIDTH-1:0] snrq_addr_st0;
wire snrq_invalidate_st0; wire snrq_invalidate_st0;
wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st0; wire [SNP_TAG_WIDTH-1:0] snrq_tag_st0;
wire snp_req_fire = snp_req_valid && snp_req_ready; wire snp_req_fire = snp_req_valid && snp_req_ready;
assign snp_req_ready = !snrq_full; assign snp_req_ready = !snrq_full;
VX_generic_queue #( VX_generic_queue #(
.DATAW(`LINE_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH), .DATAW(`LINE_ADDR_WIDTH + 1 + SNP_TAG_WIDTH),
.SIZE(SNRQ_SIZE) .SIZE(SNRQ_SIZE)
) snp_req_queue ( ) snp_req_queue (
.clk (clk), .clk (clk),
@ -352,7 +352,7 @@ module VX_bank #(
|| ((miss_st3 || force_miss_st3) && (addr_st3 == addr_st0)); || ((miss_st3 || force_miss_st3) && (addr_st3 == addr_st0));
`ifdef DBG_CACHE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = inst_meta_st0; assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = inst_meta_st0;
end else begin end else begin
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = 0; assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = 0;
@ -371,7 +371,7 @@ module VX_bank #(
); );
`ifdef DBG_CACHE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1; assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
end else begin end else begin
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = 0; assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = 0;
@ -474,7 +474,7 @@ module VX_bank #(
); );
`ifdef DBG_CACHE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2; assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
end else begin end else begin
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = 0; assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = 0;
@ -574,7 +574,7 @@ module VX_bank #(
); );
`ifdef DBG_CACHE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = inst_meta_st3; assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = inst_meta_st3;
end else begin end else begin
assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = 0; assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = 0;
@ -621,7 +621,7 @@ module VX_bank #(
.NUM_REQUESTS (NUM_REQUESTS), .NUM_REQUESTS (NUM_REQUESTS),
.MRVQ_SIZE (MRVQ_SIZE), .MRVQ_SIZE (MRVQ_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) .SNP_TAG_WIDTH (SNP_TAG_WIDTH)
) cache_miss_resrv ( ) cache_miss_resrv (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -803,12 +803,12 @@ module VX_bank #(
wire snpq_pop = snp_rsp_valid && snp_rsp_ready; wire snpq_pop = snp_rsp_valid && snp_rsp_ready;
wire [SNP_REQ_TAG_WIDTH-1:0] snpq_tag_st3 = SNP_REQ_TAG_WIDTH'(req_tag_st3); wire [SNP_TAG_WIDTH-1:0] snpq_tag_st3 = SNP_TAG_WIDTH'(req_tag_st3);
if (FLUSH_ENABLE) begin if (FLUSH_ENABLE) begin
VX_generic_queue #( VX_generic_queue #(
.DATAW(SNP_REQ_TAG_WIDTH), .DATAW (SNP_TAG_WIDTH),
.SIZE(SNPQ_SIZE) .SIZE (SNPQ_SIZE)
) snp_rsp_queue ( ) snp_rsp_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View file

@ -39,9 +39,6 @@ module VX_cache #(
// Enable cache flush // Enable cache flush
parameter FLUSH_ENABLE = 1, parameter FLUSH_ENABLE = 1,
// Enable snoop forwarding
parameter SNOOP_FORWARDING = 1,
// core request tag size // core request tag size
parameter CORE_TAG_WIDTH = 4, parameter CORE_TAG_WIDTH = 4,
@ -51,14 +48,8 @@ module VX_cache #(
// dram request tag size // dram request tag size
parameter DRAM_TAG_WIDTH = 28, parameter DRAM_TAG_WIDTH = 28,
// Number of snoop forwarding requests
parameter NUM_SNP_REQUESTS = (SNOOP_FORWARDING ? 4 : 1),
// Snooping request tag width // Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = (SNOOP_FORWARDING ? 4 : 1), parameter SNP_TAG_WIDTH = 1
// Snooping forward tag width
parameter SNP_FWD_TAG_WIDTH = (SNOOP_FORWARDING ? 4 : 1)
) ( ) (
`SCOPE_IO_VX_cache `SCOPE_IO_VX_cache
@ -99,28 +90,14 @@ module VX_cache #(
input wire snp_req_valid, input wire snp_req_valid,
input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr, input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_invalidate, input wire snp_req_invalidate,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag, input wire [SNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready, output wire snp_req_ready,
// Snoop response // Snoop response
output wire snp_rsp_valid, output wire snp_rsp_valid,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag, output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready, input wire snp_rsp_ready,
// Snoop Forwarding out
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_valid,
output wire [NUM_SNP_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr,
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_invalidate,
output wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdout_tag,
`IGNORE_WARNINGS_BEGIN
input wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_ready,
// Snoop forwarding in
input wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_valid,
input wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdin_tag,
`IGNORE_WARNINGS_END
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_ready,
output wire [NUM_BANKS-1:0] miss_vec output wire [NUM_BANKS-1:0] miss_vec
); );
@ -146,72 +123,16 @@ module VX_cache #(
wire [NUM_BANKS-1:0] per_bank_snp_req_ready; wire [NUM_BANKS-1:0] per_bank_snp_req_ready;
wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid; wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid;
wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag; wire [NUM_BANKS-1:0][SNP_TAG_WIDTH-1:0] per_bank_snp_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready; wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready;
wire [NUM_BANKS-1:0] per_bank_miss; wire [NUM_BANKS-1:0] per_bank_miss;
assign miss_vec = per_bank_miss; assign miss_vec = per_bank_miss;
wire snp_req_valid_qual;
wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr_qual;
wire snp_req_invalidate_qual;
wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag_qual;
wire snp_req_ready_qual;
if (SNOOP_FORWARDING) begin
VX_snp_forwarder #(
.CACHE_ID (CACHE_ID),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_REQUESTS (NUM_SNP_REQUESTS),
.SNRQ_SIZE (SNRQ_SIZE),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_invalidate (snp_req_invalidate),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_req_valid_qual),
.snp_rsp_addr (snp_req_addr_qual),
.snp_rsp_invalidate (snp_req_invalidate_qual),
.snp_rsp_tag (snp_req_tag_qual),
.snp_rsp_ready (snp_req_ready_qual),
.snp_fwdout_valid (snp_fwdout_valid),
.snp_fwdout_addr (snp_fwdout_addr),
.snp_fwdout_invalidate(snp_fwdout_invalidate),
.snp_fwdout_tag (snp_fwdout_tag),
.snp_fwdout_ready (snp_fwdout_ready),
.snp_fwdin_valid (snp_fwdin_valid),
.snp_fwdin_tag (snp_fwdin_tag),
.snp_fwdin_ready (snp_fwdin_ready)
);
end else begin
assign snp_fwdout_valid = 0;
assign snp_fwdout_addr = 0;
assign snp_fwdout_invalidate = 0;
assign snp_fwdout_tag = 0;
assign snp_fwdin_ready = 0;
assign snp_req_valid_qual = snp_req_valid;
assign snp_req_addr_qual = snp_req_addr;
assign snp_req_invalidate_qual = snp_req_invalidate;
assign snp_req_tag_qual = snp_req_tag;
assign snp_req_ready = snp_req_ready_qual;
end
if (NUM_BANKS == 1) begin if (NUM_BANKS == 1) begin
assign snp_req_ready_qual = per_bank_snp_req_ready; assign snp_req_ready = per_bank_snp_req_ready;
end else begin end else begin
assign snp_req_ready_qual = per_bank_snp_req_ready[`DRAM_ADDR_BANK(snp_req_addr_qual)]; assign snp_req_ready = per_bank_snp_req_ready[`DRAM_ADDR_BANK(snp_req_addr)];
end end
VX_cache_core_req_bank_sel #( VX_cache_core_req_bank_sel #(
@ -221,14 +142,18 @@ module VX_cache #(
.NUM_REQUESTS (NUM_REQUESTS) .NUM_REQUESTS (NUM_REQUESTS)
) cache_core_req_bank_sel ( ) cache_core_req_bank_sel (
.core_req_valid (core_req_valid), .core_req_valid (core_req_valid),
.per_bank_ready (per_bank_core_req_ready),
.core_req_addr (core_req_addr), .core_req_addr (core_req_addr),
.core_req_ready (core_req_ready),
.per_bank_valid (per_bank_valid), .per_bank_valid (per_bank_valid),
.core_req_ready (core_req_ready) .per_bank_ready (per_bank_core_req_ready)
); );
assign dram_req_tag = dram_req_addr; assign dram_req_tag = dram_req_addr;
assign dram_rsp_ready = (& per_bank_dram_rsp_ready); if (NUM_BANKS == 1) begin
assign dram_rsp_ready = per_bank_dram_rsp_ready;
end else begin
assign dram_rsp_ready = per_bank_dram_rsp_ready[`DRAM_ADDR_BANK(dram_rsp_tag)];
end
for (genvar i = 0; i < NUM_BANKS; i++) begin for (genvar i = 0; i < NUM_BANKS; i++) begin
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid; wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
@ -260,11 +185,11 @@ module VX_cache #(
wire curr_bank_snp_req_valid; wire curr_bank_snp_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
wire curr_bank_snp_req_invalidate; wire curr_bank_snp_req_invalidate;
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_req_tag; wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
wire curr_bank_snp_req_ready; wire curr_bank_snp_req_ready;
wire curr_bank_snp_rsp_valid; wire curr_bank_snp_rsp_valid;
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag; wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
wire curr_bank_snp_rsp_ready; wire curr_bank_snp_rsp_ready;
wire curr_bank_miss; wire curr_bank_miss;
@ -310,14 +235,14 @@ module VX_cache #(
// Snoop request // Snoop request
if (NUM_BANKS == 1) begin if (NUM_BANKS == 1) begin
assign curr_bank_snp_req_valid = snp_req_valid_qual; assign curr_bank_snp_req_valid = snp_req_valid;
assign curr_bank_snp_req_addr = snp_req_addr_qual; assign curr_bank_snp_req_addr = snp_req_addr;
end else begin end else begin
assign curr_bank_snp_req_valid = snp_req_valid_qual && (`DRAM_ADDR_BANK(snp_req_addr_qual) == i); assign curr_bank_snp_req_valid = snp_req_valid && (`DRAM_ADDR_BANK(snp_req_addr) == i);
assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr_qual); assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr);
end end
assign curr_bank_snp_req_invalidate = snp_req_invalidate_qual; assign curr_bank_snp_req_invalidate = snp_req_invalidate;
assign curr_bank_snp_req_tag = snp_req_tag_qual; assign curr_bank_snp_req_tag = snp_req_tag;
assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready; assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready;
// Snoop response // Snoop response
@ -348,7 +273,7 @@ module VX_cache #(
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) .SNP_TAG_WIDTH (SNP_TAG_WIDTH)
) bank ( ) bank (
`SCOPE_BIND_VX_cache_bank(i) `SCOPE_BIND_VX_cache_bank(i)
@ -459,9 +384,9 @@ module VX_cache #(
if (FLUSH_ENABLE) begin if (FLUSH_ENABLE) begin
VX_snp_rsp_arb #( VX_snp_rsp_arb #(
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
.BANK_LINE_SIZE (BANK_LINE_SIZE), .BANK_LINE_SIZE (BANK_LINE_SIZE),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) .SNP_TAG_WIDTH (SNP_TAG_WIDTH)
) snp_rsp_arb ( ) snp_rsp_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View file

@ -7,7 +7,7 @@
`include "VX_define.vh" `include "VX_define.vh"
`endif `endif
`define REQ_TAG_WIDTH `MAX(CORE_TAG_WIDTH, SNP_REQ_TAG_WIDTH) `define REQ_TAG_WIDTH `MAX(CORE_TAG_WIDTH, SNP_TAG_WIDTH)
`define REQS_BITS `LOG2UP(NUM_REQUESTS) `define REQS_BITS `LOG2UP(NUM_REQUESTS)

View file

@ -11,27 +11,43 @@ module VX_cache_core_req_bank_sel #(
parameter NUM_REQUESTS = 1 parameter NUM_REQUESTS = 1
) ( ) (
input wire [NUM_REQUESTS-1:0] core_req_valid, input wire [NUM_REQUESTS-1:0] core_req_valid,
`IGNORE_WARNINGS_BEGIN input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, output wire core_req_ready,
`IGNORE_WARNINGS_END
input wire [NUM_BANKS-1:0] per_bank_ready,
output wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid, output wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid,
output wire core_req_ready input wire [NUM_BANKS-1:0] per_bank_ready
); );
if (NUM_BANKS > 1) begin if (NUM_BANKS > 1) begin
reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid_r; reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid_r;
reg [NUM_BANKS-1:0] per_bank_ready_sel; reg [NUM_BANKS-1:0] per_bank_ready_ignore;
reg [NUM_BANKS-1:0] per_bank_ready_other;
always @(*) begin always @(*) begin
per_bank_valid_r = 0; per_bank_valid_r = 0;
per_bank_ready_sel = {NUM_BANKS{1'b1}}; per_bank_ready_other = {NUM_BANKS{1'b1}};
per_bank_ready_ignore = {NUM_BANKS{1'b1}};
for (integer i = 0; i < NUM_BANKS; i++) begin
for (integer j = 0; j < NUM_BANKS; j++) begin
if (i != j)
per_bank_ready_other[i] &= (per_bank_ready[j] | per_bank_ready_ignore[j]);
end
end
for (integer i = 0; i < NUM_REQUESTS; i++) begin for (integer i = 0; i < NUM_REQUESTS; i++) begin
per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i]; per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
per_bank_ready_sel[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 0; per_bank_ready_ignore[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 1'b0;
end end
end end
assign per_bank_valid = per_bank_valid_r;
assign core_req_ready = & (per_bank_ready | per_bank_ready_sel); for (genvar i = 0; i < NUM_BANKS; i++) begin
for (genvar j = 0; j < NUM_REQUESTS; j++) begin
assign per_bank_valid[i][j] = per_bank_valid_r[i][j] & per_bank_ready_other[i];
end
end
assign core_req_ready = & (per_bank_ready | per_bank_ready_ignore);
end else begin end else begin
`UNUSED_VAR (core_req_addr)
assign per_bank_valid = core_req_valid; assign per_bank_valid = core_req_valid;
assign core_req_ready = per_bank_ready; assign core_req_ready = per_bank_ready;
end end

View file

@ -17,7 +17,7 @@ module VX_cache_miss_resrv #(
// core request tag size // core request tag size
parameter CORE_TAG_WIDTH = 1, parameter CORE_TAG_WIDTH = 1,
// Snooping request tag width // Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 1, parameter SNP_TAG_WIDTH = 1,
// size of tag id in core request tag // size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0 parameter CORE_TAG_ID_BITS = 0
) ( ) (

View file

@ -1,33 +1,33 @@
`include "VX_cache_config.vh" `include "VX_cache_config.vh"
module VX_snp_forwarder #( module VX_snp_forwarder #(
parameter CACHE_ID = 0, parameter CACHE_ID = 0,
parameter BANK_LINE_SIZE = 1, parameter SRC_ADDR_WIDTH = 1,
parameter NUM_REQUESTS = 1, parameter DST_ADDR_WIDTH = 1,
parameter SNRQ_SIZE = 1, parameter NUM_REQUESTS = 1,
parameter SNP_REQ_TAG_WIDTH = 1, parameter SNP_TAG_WIDTH = 1,
parameter SNP_FWD_TAG_WIDTH = 1 parameter SNRQ_SIZE = 1
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
// Snoop request // Snoop request
input wire snp_req_valid, input wire snp_req_valid,
input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr, input wire [SRC_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_invalidate, input wire snp_req_invalidate,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag, input wire [SNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready, output wire snp_req_ready,
// Snoop response // Snoop response
output wire snp_rsp_valid, output wire snp_rsp_valid,
output wire [`DRAM_ADDR_WIDTH-1:0] snp_rsp_addr, output wire [SRC_ADDR_WIDTH-1:0] snp_rsp_addr,
output wire snp_rsp_invalidate, output wire snp_rsp_invalidate,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag, output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready, input wire snp_rsp_ready,
// Snoop Forwarding out // Snoop Forwarding out
output wire [NUM_REQUESTS-1:0] snp_fwdout_valid, output wire [NUM_REQUESTS-1:0] snp_fwdout_valid,
output wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr, output wire [NUM_REQUESTS-1:0][DST_ADDR_WIDTH-1:0] snp_fwdout_addr,
output wire [NUM_REQUESTS-1:0] snp_fwdout_invalidate, output wire [NUM_REQUESTS-1:0] snp_fwdout_invalidate,
output wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdout_tag, output wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdout_tag,
input wire [NUM_REQUESTS-1:0] snp_fwdout_ready, input wire [NUM_REQUESTS-1:0] snp_fwdout_ready,
@ -37,30 +37,37 @@ module VX_snp_forwarder #(
input wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdin_tag, input wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdin_tag,
output wire [NUM_REQUESTS-1:0] snp_fwdin_ready output wire [NUM_REQUESTS-1:0] snp_fwdin_ready
); );
localparam ADDR_DIFF = DST_ADDR_WIDTH - SRC_ADDR_WIDTH;
localparam NUM_REQUESTS_QUAL = NUM_REQUESTS * (1 << ADDR_DIFF);
localparam REQ_QUAL_BITS = `LOG2UP(NUM_REQUESTS_QUAL);
`STATIC_ASSERT(NUM_REQUESTS > 1, ("invalid value")) `STATIC_ASSERT(NUM_REQUESTS > 1, ("invalid value"))
reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0]; reg [REQ_QUAL_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr; wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr;
wire sfq_acquire, sfq_release, sfq_full; wire sfq_acquire, sfq_release, sfq_full;
wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdout_tag;
reg [NUM_REQUESTS-1:0] snp_fwdout_ready_other;
wire fwdout_ready;
wire fwdin_valid;
wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdin_tag; wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdin_tag;
wire fwdin_valid;
wire fwdin_ready = snp_rsp_ready || (1 != pending_cntrs[sfq_read_addr]); wire fwdin_ready = snp_rsp_ready || (1 != pending_cntrs[sfq_read_addr]);
wire fwdin_fire = fwdin_valid && fwdin_ready; wire fwdin_fire = fwdin_valid && fwdin_ready;
wire fwdout_ready = (& snp_fwdout_ready); assign snp_rsp_valid = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]);
assign snp_rsp_valid = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]); // send response
assign sfq_read_addr = fwdin_tag; assign sfq_read_addr = fwdin_tag;
assign sfq_acquire = snp_req_valid && !sfq_full && fwdout_ready;
assign sfq_release = snp_rsp_valid && snp_rsp_ready; assign sfq_release = snp_rsp_valid && snp_rsp_ready;
wire snp_req_ready_unqual = !sfq_full && fwdout_ready;
VX_cam_buffer #( VX_cam_buffer #(
.DATAW (`DRAM_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH), .DATAW (SRC_ADDR_WIDTH + 1 + SNP_TAG_WIDTH),
.SIZE (SNRQ_SIZE) .SIZE (SNRQ_SIZE)
) snp_fwd_cam ( ) snp_fwd_cam (
.clk (clk), .clk (clk),
@ -75,9 +82,54 @@ module VX_snp_forwarder #(
.full (sfq_full) .full (sfq_full)
); );
wire [DST_ADDR_WIDTH-1:0] snp_req_addr_qual;
wire dispatch_ready;
if (ADDR_DIFF != 0) begin
reg [`LOG2UP(SNRQ_SIZE)-1:0] fwdout_tag_r;
reg [DST_ADDR_WIDTH-1:0] snp_req_addr_r;
reg dispatch_ready_r;
reg use_cter_r;
always @(posedge clk) begin
if (reset) begin
dispatch_ready_r <= 0;
use_cter_r <= 0;
end else begin
if (snp_req_valid && snp_req_ready_unqual) begin
if (snp_req_addr_r[ADDR_DIFF-1:0] == ((1 << ADDR_DIFF)-2)) begin
dispatch_ready_r <= 1;
end
if (snp_req_addr_r[ADDR_DIFF-1:0] == ((1 << ADDR_DIFF)-1)) begin
dispatch_ready_r <= 0;
use_cter_r <= 0;
end else begin
use_cter_r <= 1;
end
end
end
if (snp_req_valid && snp_req_ready_unqual) begin
snp_req_addr_r <= snp_req_addr_qual + DST_ADDR_WIDTH'(1'b1);
end
if (!use_cter_r) begin
fwdout_tag_r <= sfq_write_addr;
end
end
assign sfq_acquire = snp_req_valid && snp_req_ready_unqual && !use_cter_r;
assign fwdout_tag = use_cter_r ? fwdout_tag_r : sfq_write_addr;
assign snp_req_addr_qual = use_cter_r ? snp_req_addr_r : {snp_req_addr, ADDR_DIFF'(0)};
assign dispatch_ready = dispatch_ready_r;
end else begin
assign sfq_acquire = snp_req_valid && snp_req_ready;
assign fwdout_tag = sfq_write_addr;
assign snp_req_addr_qual = snp_req_addr;
assign dispatch_ready = 1'b1;
end
always @(posedge clk) begin always @(posedge clk) begin
if (sfq_acquire) begin if (sfq_acquire) begin
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS; pending_cntrs[sfq_write_addr] <= NUM_REQUESTS_QUAL;
end end
if (fwdin_fire) begin if (fwdin_fire) begin
pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1; pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1;
@ -85,13 +137,25 @@ module VX_snp_forwarder #(
end end
for (genvar i = 0; i < NUM_REQUESTS; i++) begin for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdout_valid[i] = snp_req_valid && snp_req_ready; assign snp_fwdout_valid[i] = snp_req_valid && snp_fwdout_ready_other[i] && !sfq_full;
assign snp_fwdout_addr[i] = snp_req_addr; assign snp_fwdout_addr[i] = snp_req_addr_qual;
assign snp_fwdout_invalidate[i] = snp_req_invalidate; assign snp_fwdout_invalidate[i] = snp_req_invalidate;
assign snp_fwdout_tag[i] = sfq_write_addr; assign snp_fwdout_tag[i] = fwdout_tag;
end end
assign snp_req_ready = !sfq_full && fwdout_ready; always @(*) begin
snp_fwdout_ready_other = {NUM_REQUESTS{1'b1}};
for (integer i = 0; i < NUM_REQUESTS; i++) begin
for (integer j = 0; j < NUM_REQUESTS; j++) begin
if (i != j)
snp_fwdout_ready_other[i] &= snp_fwdout_ready[j];
end
end
end
assign fwdout_ready = (& snp_fwdout_ready);
assign snp_req_ready = snp_req_ready_unqual && dispatch_ready;
if (NUM_REQUESTS > 1) begin if (NUM_REQUESTS > 1) begin
wire sel_valid; wire sel_valid;

View file

@ -3,17 +3,17 @@
module VX_snp_rsp_arb #( module VX_snp_rsp_arb #(
parameter NUM_BANKS = 1, parameter NUM_BANKS = 1,
parameter BANK_LINE_SIZE = 1, parameter BANK_LINE_SIZE = 1,
parameter SNP_REQ_TAG_WIDTH = 1 parameter SNP_TAG_WIDTH = 1
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid, input wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid,
input wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag, input wire [NUM_BANKS-1:0][SNP_TAG_WIDTH-1:0] per_bank_snp_rsp_tag,
output wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready, output wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready,
output wire snp_rsp_valid, output wire snp_rsp_valid,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag, output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready input wire snp_rsp_ready
); );
if (NUM_BANKS > 1) begin if (NUM_BANKS > 1) begin
@ -35,7 +35,7 @@ module VX_snp_rsp_arb #(
wire stall = ~snp_rsp_ready && snp_rsp_valid; wire stall = ~snp_rsp_ready && snp_rsp_valid;
VX_generic_register #( VX_generic_register #(
.N(1 + SNP_REQ_TAG_WIDTH), .N(1 + SNP_TAG_WIDTH),
.PASSTHRU(NUM_BANKS <= 2) .PASSTHRU(NUM_BANKS <= 2)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),

View file

@ -11,8 +11,10 @@ interface VX_cache_core_rsp_if #(
) (); ) ();
wire [NUM_REQUESTS-1:0] valid; wire [NUM_REQUESTS-1:0] valid;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] data; wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] data;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag; wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag;
wire ready; wire ready;
endinterface endinterface

View file

@ -10,11 +10,13 @@ interface VX_cache_dram_req_if #(
) (); ) ();
wire valid; wire valid;
wire rw; wire rw;
wire [(DRAM_LINE_WIDTH/8)-1:0] byteen; wire [(DRAM_LINE_WIDTH/8)-1:0] byteen;
wire [DRAM_ADDR_WIDTH-1:0] addr; wire [DRAM_ADDR_WIDTH-1:0] addr;
wire [DRAM_LINE_WIDTH-1:0] data; wire [DRAM_LINE_WIDTH-1:0] data;
wire [DRAM_TAG_WIDTH-1:0] tag; wire [DRAM_TAG_WIDTH-1:0] tag;
wire ready; wire ready;
endinterface endinterface

View file

@ -9,8 +9,10 @@ interface VX_cache_dram_rsp_if #(
) (); ) ();
wire valid; wire valid;
wire [DRAM_LINE_WIDTH-1:0] data; wire [DRAM_LINE_WIDTH-1:0] data;
wire [DRAM_TAG_WIDTH-1:0] tag; wire [DRAM_TAG_WIDTH-1:0] tag;
wire ready; wire ready;
endinterface endinterface

View file

@ -9,9 +9,11 @@ interface VX_cache_snp_req_if #(
) (); ) ();
wire valid; wire valid;
wire [DRAM_ADDR_WIDTH-1:0] addr; wire [DRAM_ADDR_WIDTH-1:0] addr;
wire invalidate; wire invalidate;
wire [SNP_TAG_WIDTH-1:0] tag; wire [SNP_TAG_WIDTH-1:0] tag;
wire ready; wire ready;
endinterface endinterface

View file

@ -8,7 +8,9 @@ interface VX_cache_snp_rsp_if #(
) (); ) ();
wire valid; wire valid;
wire [SNP_TAG_WIDTH-1:0] tag; wire [SNP_TAG_WIDTH-1:0] tag;
wire ready; wire ready;
endinterface endinterface

View file

@ -5,14 +5,12 @@
interface VX_cmt_to_csr_if (); interface VX_cmt_to_csr_if ();
wire valid; wire valid;
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
wire [$clog2(`NUM_THREADS+1)-1:0] commit_size; wire [$clog2(`NUM_THREADS+1)-1:0] commit_size;
wire has_fflags;
wire has_fflags; fflags_t fflags;
fflags_t fflags;
endinterface endinterface

View file

@ -6,9 +6,11 @@
interface VX_csr_io_req_if (); interface VX_csr_io_req_if ();
wire valid; wire valid;
wire [`CSR_ADDR_BITS-1:0] addr; wire [`CSR_ADDR_BITS-1:0] addr;
wire rw; wire rw;
wire [31:0] data; wire [31:0] data;
wire ready; wire ready;
endinterface endinterface

View file

@ -6,7 +6,9 @@
interface VX_csr_io_rsp_if (); interface VX_csr_io_rsp_if ();
wire valid; wire valid;
wire [31:0] data; wire [31:0] data;
wire ready; wire ready;
endinterface endinterface

View file

@ -10,18 +10,15 @@ interface VX_decode_if ();
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask; wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC; wire [31:0] PC;
wire [`EX_BITS-1:0] ex_type; wire [`EX_BITS-1:0] ex_type;
wire [`OP_BITS-1:0] op_type; wire [`OP_BITS-1:0] op_type;
wire [`MOD_BITS-1:0] op_mod; wire [`MOD_BITS-1:0] op_mod;
wire wb; wire wb;
wire [`NR_BITS-1:0] rd; wire [`NR_BITS-1:0] rd;
wire [`NR_BITS-1:0] rs1; wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rs2; wire [`NR_BITS-1:0] rs2;
wire [`NR_BITS-1:0] rs3; wire [`NR_BITS-1:0] rs3;
wire [31:0] imm; wire [31:0] imm;
wire rs1_is_PC; wire rs1_is_PC;
wire rs2_is_imm; wire rs2_is_imm;
wire use_rs3; wire use_rs3;

View file

@ -5,13 +5,15 @@
interface VX_exu_to_cmt_if (); interface VX_exu_to_cmt_if ();
wire valid; wire valid;
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask; wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC; wire [31:0] PC;
wire [`NUM_THREADS-1:0][31:0] data; wire [`NUM_THREADS-1:0][31:0] data;
wire [`NR_BITS-1:0] rd; wire [`NR_BITS-1:0] rd;
wire wb; wire wb;
wire ready; wire ready;
endinterface endinterface

View file

@ -5,7 +5,8 @@
interface VX_fpu_to_cmt_if (); interface VX_fpu_to_cmt_if ();
wire valid; wire valid;
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask; wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC; wire [31:0] PC;
@ -14,6 +15,7 @@ interface VX_fpu_to_cmt_if ();
wire wb; wire wb;
wire has_fflags; wire has_fflags;
fflags_t [`NUM_THREADS-1:0] fflags; fflags_t [`NUM_THREADS-1:0] fflags;
wire ready; wire ready;
endinterface endinterface

View file

@ -9,15 +9,13 @@
interface VX_fpu_to_csr_if (); interface VX_fpu_to_csr_if ();
wire valid; wire valid;
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
wire fflags_NV;
wire fflags_NV; wire fflags_DZ;
wire fflags_DZ; wire fflags_OF;
wire fflags_OF; wire fflags_UF;
wire fflags_UF; wire fflags_NX;
wire fflags_NX;
endinterface endinterface

View file

@ -9,7 +9,6 @@ interface VX_gpr_rsp_if ();
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
wire [31:0] PC; wire [31:0] PC;
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
wire [`NUM_THREADS-1:0][31:0] rs1_data; wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data; wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data; wire [`NUM_THREADS-1:0][31:0] rs3_data;

View file

@ -6,9 +6,11 @@
interface VX_ifetch_req_if (); interface VX_ifetch_req_if ();
wire valid; wire valid;
wire [`NUM_THREADS-1:0] tmask; wire [`NUM_THREADS-1:0] tmask;
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
wire [31:0] PC; wire [31:0] PC;
wire ready; wire ready;
endinterface endinterface

View file

@ -5,11 +5,13 @@
interface VX_ifetch_rsp_if (); interface VX_ifetch_rsp_if ();
wire valid; wire valid;
wire [`NUM_THREADS-1:0] tmask; wire [`NUM_THREADS-1:0] tmask;
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
wire [31:0] PC; wire [31:0] PC;
wire [31:0] instr; wire [31:0] instr;
wire ready; wire ready;
endinterface endinterface

View file

@ -10,14 +10,11 @@ interface VX_lsu_req_if ();
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask; wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC; wire [31:0] PC;
wire rw; wire rw;
wire [`BYTEEN_BITS-1:0] byteen; wire [`BYTEEN_BITS-1:0] byteen;
wire [`NUM_THREADS-1:0][31:0] store_data; wire [`NUM_THREADS-1:0][31:0] store_data;
wire [`NUM_THREADS-1:0][31:0] base_addr; wire [`NUM_THREADS-1:0][31:0] base_addr;
wire [31:0] offset; wire [31:0] offset;
wire [`NR_BITS-1:0] rd; wire [`NR_BITS-1:0] rd;
wire wb; wire wb;

View file

@ -5,13 +5,12 @@
interface VX_warp_ctl_if (); interface VX_warp_ctl_if ();
wire valid; wire valid;
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
gpu_tmc_t tmc;
gpu_tmc_t tmc; gpu_wspawn_t wspawn;
gpu_wspawn_t wspawn; gpu_barrier_t barrier;
gpu_barrier_t barrier; gpu_split_t split;
gpu_split_t split;
endinterface endinterface

View file

@ -6,13 +6,12 @@
interface VX_writeback_if (); interface VX_writeback_if ();
wire valid; wire valid;
wire [`NUM_THREADS-1:0] tmask; wire [`NUM_THREADS-1:0] tmask;
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
wire [31:0] PC; wire [31:0] PC;
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
wire [`NR_BITS-1:0] rd; wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] data; wire [`NUM_THREADS-1:0][31:0] data;

View file

@ -5,7 +5,7 @@
interface VX_wstall_if(); interface VX_wstall_if();
wire valid; wire valid;
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
endinterface endinterface

View file

@ -54,7 +54,7 @@ module VX_cam_buffer #(
end else begin end else begin
for (integer i = 0; i < CPORTS; i++) begin for (integer i = 0; i < CPORTS; i++) begin
if (release_slot[i]) begin if (release_slot[i]) begin
assert(0 == free_slots[release_addr[i]]) else $display("%t: freed slot at port %d", $time, release_addr[i]); assert(0 == free_slots[release_addr[i]]) else $error("%t: releasing invalid slot at port %d", $time, release_addr[i]);
end end
end end
free_slots <= free_slots_n; free_slots <= free_slots_n;
@ -63,7 +63,7 @@ module VX_cam_buffer #(
end end
if (acquire_slot) begin if (acquire_slot) begin
assert(1 == free_slots[write_addr]) else $display("%t: inused slot at port %d", $time, write_addr); assert(1 == free_slots[write_addr]) else $error("%t: acquiring used slot at port %d", $time, write_addr);
entries[write_addr] <= write_data; entries[write_addr] <= write_data;
end end
end end

View file

@ -57,34 +57,24 @@ if args.outc != 'none':
print('\n#endif', file=f) print('\n#endif', file=f)
translation_rules = [ translation_rules = [
(re.compile(r'^$'), r''), # preprocessor directives
(re.compile(r'^(\s*)`ifndef\s+([^ ]+)'), r'\1#ifndef \2'), (re.compile(r'^\s*`include .*$'), r''),
(re.compile(r'^(\s*)`define\s+([^ ]+)'), r'\1#define \2'), (re.compile(r'`ifdef'), r'#ifdef'),
(re.compile(r'^(\s*)`include "VX_user_config\.vh"'), r''), (re.compile(r'`ifndef'), r'#ifndef'),
(re.compile(r'^(\s*)`define\s+([^ ]+) (.+)'), r'\1#define \2 \3'), (re.compile(r'`elif'), r'#elif'),
(re.compile(r'^(\s*)`endif\s+'), r'\1#endif'), (re.compile(r'`else'), r'#else'),
(re.compile(r'^(\s*)//(.*)'), r'\1// \2'), (re.compile(r'`define'), r'#define'),
] (re.compile(r'`endif'), r'#endif'),
post_rules = [ # macro expansion
(re.compile(r"\d+'d(\d+)"), r'\1'),
# non-standard C but supported by GCC and Clang
(re.compile(r"\d+'b([01]+)"), r'0b\1'),
(re.compile(r"\d+'h([\da-fA-F]+)"), r'0x\1'),
# fix macro references (does not support escaped identifiers §5.6.1)
(re.compile(r"`([A-Za-z_][$_0-9A-Za-z]*)"), r'\1'), (re.compile(r"`([A-Za-z_][$_0-9A-Za-z]*)"), r'\1'),
# literals
(re.compile(r"\d+'d(\d+)"), r'\1'),
(re.compile(r"\d+'b([01]+)"), r'0b\1'),
(re.compile(r"\d+'h([\da-fA-F]+)"), r'0x\1')
] ]
def post_process_line(line):
for pat, repl in post_rules:
line = pat.sub(repl, line)
return line
in_expansion = False
if args.outc != 'none': if args.outc != 'none':
with open(args.outc, 'a') as f: with open(args.outc, 'a') as f:
print(''' print('''
@ -96,36 +86,14 @@ if args.outc != 'none':
with open(path.join(script_dir, '../rtl/VX_config.vh'), 'r') as r: with open(path.join(script_dir, '../rtl/VX_config.vh'), 'r') as r:
lineno = 0 lineno = 0
for line in r: for line in r:
if in_expansion: for pat, repl in translation_rules:
f.write(post_process_line(line)) match = pat.search(line)
if not line.strip().endswith('\\'): if match:
in_expansion = False line = re.sub(pat, repl, line)
else: #print("*** match @" + str(lineno) + ": " + match.group() + " => " + line)
for pat, repl in translation_rules: f.write(line)
if pat.match(line):
if line.strip().endswith('\\'):
in_expansion = True
f.write(post_process_line(pat.sub(repl, line)))
break
else:
raise ValueError('failed to find rule for: "' + line + '" (' + str(lineno) + ')')
lineno = lineno + 1 lineno = lineno + 1
print(''' print('''
// Misc
#define THREADS_PER_WARP NUM_THREADS
#define WARPS_PER_CORE NUM_WARPS
#define NUM_WI (NUM_WARPS * NUM_THREADS * NUM_CORES_PER_CLUSTER * NUM_CLUSTERS)
// legacy
#define TOTAL_THREADS NUM_WI
#define TOTAL_WARPS (NUM_WARPS * NUM_CORES_PER_CLUSTER * NUM_CLUSTERS)
// COLORS
#define GREEN "\\033[32m"
#define RED "\\033[31m"
#define DEFAULT "\\033[39m"
'''[1:], file=f) '''[1:], file=f)

View file

@ -1,19 +1,19 @@
SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1 SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0
#MULTICORE ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #MULTICORE ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 #MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
# control RTL debug print states # control RTL debug print states
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE

View file

@ -57,11 +57,7 @@ void Simulator::attach_ram(RAM* ram) {
dram_rsp_vec_.clear(); dram_rsp_vec_.clear();
} }
void Simulator::reset() { void Simulator::reset() {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] reset()" << std::endl;
#endif
print_bufs_.clear(); print_bufs_.clear();
dram_rsp_vec_.clear(); dram_rsp_vec_.clear();
@ -96,15 +92,25 @@ void Simulator::reset() {
} }
void Simulator::step() { void Simulator::step() {
vortex_->clk = 0; vortex_->clk = 0;
this->eval(); this->eval();
dram_rsp_ready_ = vortex_->dram_rsp_ready;
snp_req_ready_ = vortex_->snp_req_ready;
csr_io_req_ready_ = vortex_->csr_io_req_ready;
vortex_->clk = 1; vortex_->clk = 1;
this->eval(); this->eval();
this->eval_dram_bus(); this->eval_dram_bus();
this->eval_io_bus(); this->eval_io_bus();
this->eval_csr_bus(); this->eval_csr_bus();
this->eval_snp_bus(); this->eval_snp_bus();
#ifndef NDEBUG
fflush(stdout);
#endif
} }
void Simulator::eval() { void Simulator::eval() {
@ -134,8 +140,7 @@ void Simulator::eval_dram_bus() {
// send DRAM response // send DRAM response
if (dram_rsp_active_ if (dram_rsp_active_
&& vortex_->dram_rsp_valid && vortex_->dram_rsp_valid && dram_rsp_ready_) {
&& vortex_->dram_rsp_ready) {
dram_rsp_active_ = false; dram_rsp_active_ = false;
} }
if (!dram_rsp_active_) { if (!dram_rsp_active_) {
@ -183,7 +188,7 @@ void Simulator::eval_dram_bus() {
} }
} }
vortex_->dram_req_ready = ~dram_stalled; vortex_->dram_req_ready = !dram_stalled;
} }
void Simulator::eval_io_bus() { void Simulator::eval_io_bus() {
@ -207,31 +212,32 @@ void Simulator::eval_io_bus() {
} }
void Simulator::eval_snp_bus() { void Simulator::eval_snp_bus() {
if (snp_req_active_) { if (snp_req_active_) {
if (vortex_->snp_rsp_valid) { if (vortex_->snp_req_valid && snp_req_ready_) {
assert(pending_snp_reqs_ > 0); assert(snp_req_size_);
--pending_snp_reqs_;
#ifdef DBG_PRINT_CACHE_SNP #ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp rsp: tag=" << vortex_->snp_rsp_tag << " pending=" << pending_snp_reqs_ << std::endl; std::cout << std::dec << timestamp << ": [sim] SNP Req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << (snp_req_size_-1) << std::endl;
#endif #endif
} ++vortex_->snp_req_addr;
if (vortex_->snp_req_valid && vortex_->snp_req_ready) { ++vortex_->snp_req_tag;
if (snp_req_size_ != 0) { ++pending_snp_reqs_;
vortex_->snp_req_addr += 1; --snp_req_size_;
vortex_->snp_req_tag += 1; if (0 == snp_req_size_) {
--snp_req_size_; vortex_->snp_req_valid = false;
++pending_snp_reqs_;
#ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
#endif
} else {
vortex_->snp_req_valid = 0;
} }
} }
if (!vortex_->snp_req_valid
&& 0 == pending_snp_reqs_) { if (vortex_->snp_rsp_valid && vortex_->snp_rsp_ready) {
snp_req_active_ = false; assert(pending_snp_reqs_ > 0);
} --pending_snp_reqs_;
if (!vortex_->snp_req_valid && 0 == pending_snp_reqs_) {
vortex_->snp_rsp_ready = false;
snp_req_active_ = false;
}
#ifdef DBG_PRINT_CACHE_SNP
std::cout << std::dec << timestamp << ": [sim] SNP Rsp: tag=" << std::hex << vortex_->snp_rsp_tag << " pending=" << pending_snp_reqs_ << std::endl;
#endif
}
} else { } else {
vortex_->snp_req_valid = 0; vortex_->snp_req_valid = 0;
vortex_->snp_rsp_ready = 0; vortex_->snp_rsp_ready = 0;
@ -240,18 +246,24 @@ void Simulator::eval_snp_bus() {
void Simulator::eval_csr_bus() { void Simulator::eval_csr_bus() {
if (csr_req_active_) { if (csr_req_active_) {
if (vortex_->csr_io_req_rw) { if (vortex_->csr_io_req_valid && csr_io_req_ready_) {
if (vortex_->csr_io_req_ready) { #ifndef NDEBUG
vortex_->snp_req_valid = 0; if (vortex_->csr_io_req_rw)
csr_req_active_ = false; std::cout << std::dec << timestamp << ": [sim] CSR Wr Req: core=" << (int)vortex_->csr_io_req_coreid << ", addr=" << std::hex << vortex_->csr_io_req_addr << ", value=" << vortex_->csr_io_req_data << std::endl;
} else
} else { std::cout << std::dec << timestamp << ": [sim] CSR Rd Req: core=" << (int)vortex_->csr_io_req_coreid << ", addr=" << std::hex << vortex_->csr_io_req_addr << std::endl;
if (vortex_->csr_io_rsp_valid) { #endif
*csr_rsp_value_ = vortex_->csr_io_rsp_data; vortex_->csr_io_req_valid = 0;
vortex_->snp_req_valid = 0; if (vortex_->csr_io_req_rw)
vortex_->csr_io_rsp_ready = 0; csr_req_active_ = false;
csr_req_active_ = false; }
} if (vortex_->csr_io_rsp_valid && vortex_->csr_io_rsp_ready) {
*csr_rsp_value_ = vortex_->csr_io_rsp_data;
vortex_->csr_io_rsp_ready = 0;
csr_req_active_ = false;
#ifndef NDEBUG
std::cout << std::dec << timestamp << ": [sim] CSR Rsp: value=" << vortex_->csr_io_rsp_data << std::endl;
#endif
} }
} else { } else {
vortex_->csr_io_req_valid = 0; vortex_->csr_io_req_valid = 0;
@ -278,33 +290,23 @@ bool Simulator::csr_req_active() const {
} }
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) { void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] flush_caches()" << std::endl;
#endif
if (0 == size) if (0 == size)
return; return;
assert(!vortex_->snp_rsp_valid);
vortex_->snp_req_addr = mem_addr / GLOBAL_BLOCK_SIZE; vortex_->snp_req_addr = mem_addr / GLOBAL_BLOCK_SIZE;
vortex_->snp_req_tag = 0; vortex_->snp_req_tag = 0;
vortex_->snp_req_valid = 1; vortex_->snp_req_valid = 1;
vortex_->snp_rsp_ready = 1; vortex_->snp_rsp_ready = 1;
snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE; snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
--snp_req_size_; pending_snp_reqs_ = 0;
pending_snp_reqs_ = 1;
snp_req_active_ = true; snp_req_active_ = true;
#ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
#endif
} }
void Simulator::set_csr(int core_id, int addr, unsigned value) { void Simulator::set_csr(int core_id, int addr, unsigned value) {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] set_csr()" << std::endl;
#endif
vortex_->csr_io_req_valid = 1; vortex_->csr_io_req_valid = 1;
vortex_->csr_io_req_coreid = core_id; vortex_->csr_io_req_coreid = core_id;
vortex_->csr_io_req_addr = addr; vortex_->csr_io_req_addr = addr;
@ -316,10 +318,6 @@ void Simulator::set_csr(int core_id, int addr, unsigned value) {
} }
void Simulator::get_csr(int core_id, int addr, unsigned *value) { void Simulator::get_csr(int core_id, int addr, unsigned *value) {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] get_csr()" << std::endl;
#endif
vortex_->csr_io_req_valid = 1; vortex_->csr_io_req_valid = 1;
vortex_->csr_io_req_coreid = core_id; vortex_->csr_io_req_coreid = core_id;
vortex_->csr_io_req_addr = addr; vortex_->csr_io_req_addr = addr;
@ -327,12 +325,13 @@ void Simulator::get_csr(int core_id, int addr, unsigned *value) {
vortex_->csr_io_rsp_ready = 1; vortex_->csr_io_rsp_ready = 1;
csr_rsp_value_ = value; csr_rsp_value_ = value;
csr_req_active_ = true; csr_req_active_ = true;
} }
void Simulator::run() { void Simulator::run() {
#ifndef NDEBUG #ifndef NDEBUG
std::cout << timestamp << ": [sim] run()" << std::endl; std::cout << std::dec << timestamp << ": [sim] run()" << std::endl;
#endif #endif
// execute program // execute program

View file

@ -65,7 +65,11 @@ private:
std::list<dram_req_t> dram_rsp_vec_; std::list<dram_req_t> dram_rsp_vec_;
bool dram_rsp_active_; bool dram_rsp_active_;
bool dram_rsp_ready_;
bool snp_req_ready_;
bool csr_io_req_ready_;
bool snp_req_active_; bool snp_req_active_;
bool csr_req_active_; bool csr_req_active_;

View file

@ -3,6 +3,10 @@
#include <fstream> #include <fstream>
#include <iomanip> #include <iomanip>
#define GREEN "\\033[32m"
#define RED "\\033[31m"
#define DEFAULT "\\033[39m"
#define ALL_TESTS #define ALL_TESTS
int main(int argc, char **argv) { int main(int argc, char **argv) {