L2 and L1 using different block size support, RTLsim fixes, dram_rsp_ready optimization

This commit is contained in:
Blaise Tine 2020-11-21 09:47:56 -08:00
parent a7da36c007
commit 1795980a52
50 changed files with 972 additions and 952 deletions

View file

@ -20,19 +20,21 @@ install:
- export PATH=$VERILATOR_ROOT/bin:$PATH
script:
- make -j
- ci/test_runtime.sh
- ci/test_driver.sh
- ci/test_riscv_isa.sh
- ci/test_opencl.sh
- ci/blackbox.sh -run_debug
- ci/blackbox.sh -run_scope
- ci/blackbox.sh -run_1c
- ci/blackbox.sh -run_2c
- ci/blackbox.sh -run_4c
- ci/blackbox.sh -run_4c_l2
- travis_wait 30 ci/blackbox.sh -run_4c_2l2_l3
- travis_wait 30 ci/blackbox.sh -run_8c_4l2_l3
- travis_wait 45 make
- travis_wait 45 ci/test_runtime.sh
- travis_wait 45 ci/test_driver.sh
- travis_wait 45 ci/test_riscv_isa.sh
- travis_wait 45 ci/test_opencl.sh
- travis_wait 45 ci/blackbox.sh --driver=rtlsim
- travis_wait 45 ci/blackbox.sh --driver=vlsim
- travis_wait 45 ci/blackbox.sh --driver=vlsim --scope
- travis_wait 45 ci/blackbox.sh --driver=vlsim --debug
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=1
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=2
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=4
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=4 --l2cache
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=2 --l2cache --clusters=2
- travis_wait 45 ci/blackbox.sh --driver=vlsim --cores=2 --l2cache --clusters=4
after_success:
# Gather code coverage

View file

@ -3,107 +3,130 @@
# exit when any command fails
set -e
run_1c()
show_usage()
{
# test single core
make -C driver/opae/vlsim clean
CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae/vlsim > /dev/null 2>&1
make -C benchmarks/opencl/sgemm run-vlsim
echo "Vortex BlackBox Test Driver v1.0"
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--app=vecadd|sgemm|basic|demo|dogfood][--help]]"
}
run_2c()
{
# test 2 cores
make -C driver/opae/vlsim clean
CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0" make -C driver/opae/vlsim > /dev/null 2>&1
make -C benchmarks/opencl/sgemm run-vlsim
}
DRIVER=vlsim
APP=sgemm
CLUSTERS=1
CORES=2
WARPS=4
THREADS=4
L2=0
DEBUG=0
SCOPE=0
run_4c()
{
# test 4 cores
make -C driver/opae/vlsim clean
CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=0" make -C driver/opae/vlsim > /dev/null 2>&1
make -C benchmarks/opencl/sgemm run-vlsim
}
for i in "$@"
do
case $i in
--driver=*)
DRIVER=${i#*=}
shift
;;
--app=*)
APP=${i#*=}
shift
;;
--clusters=*)
CLUSTERS=${i#*=}
shift
;;
--cores=*)
CORES=${i#*=}
shift
;;
--warps=*)
WARPS=${i#*=}
shift
;;
--threads=*)
THREADS=${i#*=}
shift
;;
--l2cache)
L2=1
shift
;;
--debug)
DEBUG=1
shift
;;
--scope)
SCOPE=1
shift
;;
--help)
show_usage
exit
;;
*)
show_usage
exit
;;
esac
done
run_4c_l2()
{
# test 4 cores with L2
make -C driver/opae/vlsim clean
CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1" make -C driver/opae/vlsim > /dev/null 2>&1
make -C benchmarks/opencl/sgemm run-vlsim
}
case $DRIVER in
rtlsim)
DRIVER_PATH=driver/rtlsim
DRIVER_EXTRA=
;;
vlsim)
DRIVER_PATH=driver/opae
DRIVER_EXTRA=vlsim
;;
*)
echo "invalid driver: $DRIVER"
exit
;;
esac
run_4c_2l2_l3()
{
# test 4 cores with L2 and L3
make -C driver/opae/vlsim clean
CONFIGS="-DNUM_CLUSTERS=2 -DNUM_CORES=2 -DL2_ENABLE=1" make -C driver/opae/vlsim > /dev/null 2>&1
make -C benchmarks/opencl/sgemm run-vlsim
}
case $APP in
sgemm)
APP_PATH=benchmarks/opencl/sgemm
;;
vecadd)
APP_PATH=benchmarks/opencl/vacadd
;;
basic)
APP_PATH=driver/tests/basic
;;
demo)
APP_PATH=driver/tests/demo
;;
dogfood)
APP_PATH=driver/tests/dogfood
;;
*)
echo "invalid app: $APP"
exit
;;
esac
run_8c_4l2_l3()
{
# test 8 cores with L2 and L3
make -C driver/opae/vlsim clean
CONFIGS="-DNUM_CLUSTERS=4 -DNUM_CORES=2 -DL2_ENABLE=1" make -C driver/opae/vlsim > /dev/null 2>&1
make -C benchmarks/opencl/sgemm run-vlsim
}
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2"
run_debug()
{
# test debug build
make -C driver/opae/vlsim clean
DEBUG=1 CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae/vlsim > /dev/null 2>&1
make -C benchmarks/opencl/sgemm run-vlsim > /dev/null 2>&1
}
echo "CONFIGS=$CONFIGS"
run_scope()
{
# test build with scope analyzer
make -C driver/opae clean
SCOPE=1 CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae > /dev/null 2>&1
make -C benchmarks/opencl/sgemm run-vlsim
}
make -C $DRIVER_PATH clean
usage()
{
echo "usage: blackbox [[-run_1c] [-run_2c] [-run_4c] [-run_4c_l2] [-run_4c_2l2_l3] [-run_8c_4l2_l3] [-run_debug] [-run_scope] [-all] [-h|--help]]"
}
if [[ $DEBUG -eq 1 ]]
then
if [[ $SCOPE -eq 1 ]]
then
DEBUG=1 SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH $DRIVER_EXTRA > build.log 2>&1
else
DEBUG=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH $DRIVER_EXTRA > build.log 2>&1
fi
else
if [[ $SCOPE -eq 1 ]]
then
SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH $DRIVER_EXTRA > build.log 2>&1
else
CONFIGS="$CONFIGS" make -C $DRIVER_PATH $DRIVER_EXTRA > build.log 2>&1
fi
fi
while [ "$1" != "" ]; do
case $1 in
-run_1c ) run_1c
;;
-run_2c ) run_2c
;;
-run_4c ) run_4c
;;
-run_4c_l2 ) run_4c_l2
;;
-run_4c_2l2_l3 ) run_4c_2l2_l3
;;
-run_8c_4l2_l3 ) run_8c_4l2_l3
;;
-run_debug ) run_debug
;;
-run_scope ) run_scope
;;
-all ) run_1c
run_2c
run_4c
run_4c_l2
run_4c_2l2_l3
run_8c_4l2_l3
run_debug
run_scope
;;
-h | --help ) usage
exit
;;
* ) usage
exit 1
esac
shift
done
make -C $APP_PATH run-$DRIVER > run.log 2>&1

View file

@ -1,7 +1,7 @@
OPAE_HOME ?= /tools/opae/1.4.0
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw
@ -60,7 +60,7 @@ json: ../../hw/opae/vortex_afu.json
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
fpga: $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
asesim: $(SRCS) $(ASE_DIR)
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)

View file

@ -1,5 +1,5 @@
CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
#CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -I../../../../hw
@ -30,7 +30,7 @@ CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
CFLAGS += -fPIC
CFLAGS += -DUSE_RTLSIM $(CONFIGS)
CFLAGS += -DUSE_VLSIM $(CONFIGS)
CFLAGS += -DDUMP_PERF_STATS
@ -79,7 +79,7 @@ VL_FLAGS += -DNOPAE
CFLAGS += -DNOPAE
# use DPI FPU
#VL_FLAGS += -DFPU_FAST
VL_FLAGS += -DFPU_FAST
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip

View file

@ -206,11 +206,10 @@ void opae_sim::sRxPort_bus() {
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->block.data(), CACHE_BLOCK_SIZE);
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
/*printf("*** [vlsim] read-rsp: addr=%ld, mdata=%d, data=", cci_rd_it->addr, cci_rd_it->mdata);
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
printf("%02x", cci_rd_it->block[CACHE_BLOCK_SIZE-1-i]);
printf("\n");*/
fflush(stdout);
cci_reads_.erase(cci_rd_it);
}
}
@ -225,8 +224,7 @@ void opae_sim::sTxPort_bus() {
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata;
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE);
//printf("*** [vlsim] read-req: addr=%ld, mdata=%d\n", vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
fflush(stdout);
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
cci_reads_.emplace_back(cci_req);
}
@ -265,12 +263,12 @@ void opae_sim::avs_bus() {
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
uint32_t tag = dram_rd_it->tag;
dram_reads_.erase(dram_rd_it);
/*printf("%0ld: VLSIM: DRAM rsp: addr=%x, pending={", timestamp, tag);
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, tag);
for (auto& req : dram_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.tag);
printf(" !%0x", req.tag);
else
printf(" %0x", req.tag);
printf(" %0x", req.tag);
}
printf("}\n");*/
}
@ -288,7 +286,8 @@ void opae_sim::avs_bus() {
// process DRAM requests
if (!dram_stalled) {
if (vortex_afu_->avs_write) {
assert(!vortex_afu_->avs_read || !vortex_afu_->avs_write);
if (vortex_afu_->avs_write) {
assert(0 == vortex_afu_->mem_bank_select);
uint64_t byteen = vortex_afu_->avs_byteenable;
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
@ -307,12 +306,12 @@ void opae_sim::avs_bus() {
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
dram_req.tag = base_addr;
dram_reads_.emplace_back(dram_req);
/*printf("%0ld: VLSIM: DRAM req: addr=%x, pending={", timestamp, base_addr);
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, base_addr);
for (auto& req : dram_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.tag);
printf(" !%0x", req.tag);
else
printf(" %0x", req.tag);
printf(" %0x", req.tag);
}
printf("}\n");*/
}

View file

@ -7,11 +7,11 @@
#include <assert.h>
#include <cmath>
#ifdef USE_VLSIM
#include "vlsim/fpga.h"
#else
#if defined(USE_FPGA) || defined(USE_ASE)
#include <opae/fpga.h>
#include <uuid/uuid.h>
#elif defined(USE_VLSIM)
#include "vlsim/fpga.h"
#endif
#include <vortex.h>

View file

@ -1,5 +1,5 @@
CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
#CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
@ -65,7 +65,7 @@ else
endif
# use DPI FPU
#VL_FLAGS += -DFPU_FAST
VL_FLAGS += -DFPU_FAST
PROJECT = libvortex.so
# PROJECT = libvortex.dylib

View file

@ -92,7 +92,7 @@ int run_test(const kernel_arg_t& kernel_arg,
}
}
if (errors != 0) {
std::cout << "Found " << errors << " errors!" << std::endl;
std::cout << "Found " << std::dec << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl;
return 1;
}

View file

@ -260,7 +260,7 @@ int main(int argc, char *argv[]) {
(void*)vx_host_ptr(src1_buf),
(void*)vx_host_ptr(src2_buf));
if (errors != 0) {
std::cout << "found " << errors << " errors!" << std::endl;
std::cout << "found " << std::dec << errors << " errors!" << std::endl;
std::cout << "Test" << t << "-" << name << " FAILED!" << std::endl << std::flush;
if (stop_on_error) {
cleanup();

View file

@ -14,29 +14,36 @@ union Float_t {
} parts;
};
inline float fround(float x, int32_t precision = 4) {
inline float fround(float x, int32_t precision = 8) {
auto power_of_10 = std::pow(10, precision);
return std::round(x * power_of_10) / power_of_10;
}
inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<float>::epsilon()) {
auto tolerance = std::min(fabs(a), fabs(b)) * eps;
return fabs(a - b) <= tolerance;
inline bool almost_equal_eps(float a, float b, int ulp = 128) {
auto eps = std::numeric_limits<float>::epsilon() * (std::max(fabs(a), fabs(b)) * ulp);
auto d = fabs(a - b);
if (d > eps) {
std::cout << "*** almost_equal_eps: d=" << d << ", eps=" << eps << std::endl;
return false;
}
return true;
}
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) {
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 6) {
Float_t fa{a}, fb{b};
auto d = std::abs(fa.i - fb.i);
if (d > ulp) {
std::cout << "*** float compare: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl;
std::cout << "*** almost_equal_ulp: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl;
return false;
}
return true;
}
inline bool almost_equal(float a, float b) {
if (almost_equal_eps(a, b))
if (a == b)
return true;
/*if (almost_equal_eps(a, b))
return true;*/
return almost_equal_ulp(a, b);
}

View file

@ -45,19 +45,19 @@ module VX_avs_wrapper #(
reg [AVS_BANKS_BITS-1:0] avs_bankselect_r;
reg [AVS_BURSTW-1:0] avs_burstcount_r;
wire avs_rtq_push = !dram_req_rw && dram_req_valid && dram_req_ready;
wire avs_rtq_pop = dram_rsp_valid && dram_rsp_ready;
wire avs_reqq_push = dram_req_valid && dram_req_ready && !dram_req_rw;
wire avs_reqq_pop = dram_rsp_valid && dram_rsp_ready;
wire avs_rdq_push = avs_readdatavalid;
wire avs_rdq_pop = avs_rtq_pop;
wire avs_rdq_empty;
wire avs_rspq_push = avs_readdatavalid;
wire avs_rspq_pop = avs_reqq_pop;
wire avs_rspq_empty;
reg [RD_QUEUE_ADDRW-1:0] avs_pending_reads;
wire [RD_QUEUE_ADDRW-1:0] avs_pending_reads_n;
assign avs_pending_reads_n = avs_pending_reads
+ RD_QUEUE_ADDRW'((avs_rtq_push && !avs_rdq_pop) ? 1 :
(avs_rdq_pop && !avs_rtq_push) ? -1 : 0);
+ RD_QUEUE_ADDRW'((avs_reqq_push && !avs_rspq_pop) ? 1 :
(avs_rspq_pop && !avs_reqq_push) ? -1 : 0);
always @(posedge clk) begin
if (reset) begin
@ -75,9 +75,9 @@ module VX_avs_wrapper #(
) rd_req_queue (
.clk (clk),
.reset (reset),
.push (avs_rtq_push),
.push (avs_reqq_push),
.pop (avs_reqq_pop),
.data_in (dram_req_tag),
.pop (avs_rtq_pop),
.data_out (dram_rsp_tag),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
@ -90,37 +90,38 @@ module VX_avs_wrapper #(
) rd_rsp_queue (
.clk (clk),
.reset (reset),
.push (avs_rdq_push),
.data_in (avs_readdata),
.pop (avs_rdq_pop),
.push (avs_rspq_push),
.pop (avs_rspq_pop),
.data_in (avs_readdata),
.data_out (dram_rsp_data),
.empty (avs_rdq_empty),
.empty (avs_rspq_empty),
`UNUSED_PIN (full),
`UNUSED_PIN (size)
);
assign avs_read = dram_req_valid && !dram_req_rw;
assign avs_write = dram_req_valid && dram_req_rw;
wire rsp_queue_ready = (avs_pending_reads != RD_QUEUE_SIZE);
assign avs_read = dram_req_valid && !dram_req_rw && rsp_queue_ready;
assign avs_write = dram_req_valid && dram_req_rw && rsp_queue_ready;
assign avs_address = dram_req_addr;
assign avs_byteenable = dram_req_byteen;
assign avs_writedata = dram_req_data;
assign dram_req_ready = !avs_waitrequest
&& (avs_pending_reads < RD_QUEUE_SIZE);
assign dram_req_ready = !avs_waitrequest && rsp_queue_ready;
assign avs_burstcount = avs_burstcount_r;
assign avs_bankselect = avs_bankselect_r;
assign dram_rsp_valid = !avs_rdq_empty;
assign dram_rsp_valid = !avs_rspq_empty;
`ifdef DBG_PRINT_AVS
always @(posedge clk) begin
if (dram_req_valid && dram_req_ready) begin
if (dram_req_rw)
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, dram_req_tag, avs_writedata);
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `DRAM_TO_BYTE_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, dram_req_data);
else
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, dram_req_tag, avs_pending_reads_n);
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, avs_pending_reads_n);
end
if (dram_rsp_valid && dram_rsp_ready) begin
$display("%t: AVS Rd Rsp: data=%0h, pending=%0d", $time, avs_readdata, avs_pending_reads_n);
$display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, dram_rsp_tag, dram_rsp_data, avs_pending_reads_n);
end
end
`endif

View file

@ -501,7 +501,6 @@ wire [AVS_REQ_TAGW-1:0] vx_dram_rsp_tag_unqual;
wire cci_dram_rd_req_valid, cci_dram_wr_req_valid;
wire [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout;
wire [VX_DRAM_LINE_IDX-1:0] vx_dram_req_idx, vx_dram_rsp_idx;
//--
@ -526,20 +525,19 @@ assign vx_dram_req_valid_qual = vx_dram_req_valid && vx_enabled;
assign vx_dram_req_addr_qual = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
assign vx_dram_req_idx = vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0];
wire [VX_DRAM_LINE_IDX-1:0] vx_dram_req_idx = vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0];
wire [VX_DRAM_LINE_IDX-1:0] vx_dram_rsp_idx = vx_dram_rsp_tag_unqual[VX_DRAM_LINE_IDX-1:0];
assign vx_dram_req_byteen_qual = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]) << (VX_DRAM_LINE_LW-3));
assign vx_dram_req_data_qual = DRAM_LINE_WIDTH'(vx_dram_req_data) << ((DRAM_LINE_LW'(vx_dram_req_idx)) << VX_DRAM_LINE_LW);
assign vx_dram_req_tag_qual = {vx_dram_req_tag, vx_dram_req_idx};
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual[vx_dram_rsp_idx];
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual[vx_dram_rsp_idx];
end else begin
assign vx_dram_req_idx = VX_DRAM_LINE_IDX'(0);
assign vx_dram_req_byteen_qual = vx_dram_req_byteen;
assign vx_dram_req_tag_qual = vx_dram_req_tag;
assign vx_dram_req_data_qual = vx_dram_req_data;
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual;
end
assign vx_dram_rsp_idx = vx_dram_rsp_tag_unqual[VX_DRAM_LINE_IDX-1:0];
assign vx_dram_rsp_tag = vx_dram_rsp_tag_unqual[`VX_DRAM_TAG_WIDTH+VX_DRAM_LINE_IDX-1:VX_DRAM_LINE_IDX];
//--
@ -723,15 +721,15 @@ always @(posedge clk) begin
cci_rd_req_wait <= 0; // restart new request batch
end
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr);
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data);
`endif
end
if (cci_rdq_pop) begin
/*if (cci_rdq_pop) begin
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Queue Pop: pending=%0d", $time, cci_pending_reads_next);
`endif
end
end*/
if (cci_dram_wr_req_fire) begin
cci_dram_wr_req_addr_unqual <= cci_dram_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
@ -836,15 +834,15 @@ begin
cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1);
cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next);
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, af2cp_sTxPort.c1.data);
`endif
end
`ifdef DBG_PRINT_OPAE
/*`ifdef DBG_PRINT_OPAE
if (cci_wr_rsp_fire) begin
$display("%t: CCI Wr Rsp: pending=%0d", $time, cci_pending_writes_next);
end
`endif
`endif*/
if (cci_dram_rd_req_fire) begin
cci_dram_rd_req_addr_unqual <= cci_dram_rd_req_addr_unqual + DRAM_ADDR_WIDTH'(1);

View file

@ -304,30 +304,60 @@ module VX_cluster #(
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag;
wire[`NUM_CORES-1:0] core_snp_fwdin_ready;
wire snp_fwd_rsp_valid;
wire [`L2DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr;
wire snp_fwd_rsp_invalidate;
wire [`L2SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag;
wire snp_fwd_rsp_ready;
reg [`L2NUM_REQUESTS-1:0] core_dram_rsp_ready_other;
reg core_dram_rsp_ready_all;
always @(*) begin
core_dram_rsp_ready_other = {`L2NUM_REQUESTS{1'b1}};
core_dram_rsp_ready_all = 1'b1;
for (integer i = 0; i < `L2NUM_REQUESTS; i++) begin
for (integer j = 0; j < `L2NUM_REQUESTS; j++) begin
if (i != j) begin
if (0 == (j & 1))
core_dram_rsp_ready_other[i] &= (per_core_D_dram_rsp_ready [(j/2)] | !core_dram_rsp_valid [j]);
else
core_dram_rsp_ready_other[i] &= (per_core_I_dram_rsp_ready [(j/2)] | !core_dram_rsp_valid [j]);
end
end
if (0 == (i & 1))
core_dram_rsp_ready_all &= (per_core_D_dram_rsp_ready [(i/2)] | !core_dram_rsp_valid [i]);
else
core_dram_rsp_ready_all &= (per_core_I_dram_rsp_ready [(i/2)] | !core_dram_rsp_valid [i]);
end
end
for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
assign core_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
assign core_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];
assign core_dram_req_valid [i] = per_core_D_dram_req_valid [(i/2)];
assign core_dram_req_valid [i+1] = per_core_I_dram_req_valid [(i/2)];
assign core_dram_req_rw [i] = per_core_D_dram_req_rw[(i/2)];
assign core_dram_req_rw [i+1] = per_core_I_dram_req_rw[(i/2)];
assign core_dram_req_rw [i] = per_core_D_dram_req_rw [(i/2)];
assign core_dram_req_rw [i+1] = per_core_I_dram_req_rw [(i/2)];
assign core_dram_req_byteen [i] = per_core_D_dram_req_byteen[(i/2)];
assign core_dram_req_byteen [i+1] = per_core_I_dram_req_byteen[(i/2)];
assign core_dram_req_byteen [i] = per_core_D_dram_req_byteen [(i/2)];
assign core_dram_req_byteen [i+1] = per_core_I_dram_req_byteen [(i/2)];
assign core_dram_req_addr [i] = per_core_D_dram_req_addr[(i/2)];
assign core_dram_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)];
assign core_dram_req_addr [i] = per_core_D_dram_req_addr [(i/2)];
assign core_dram_req_addr [i+1] = per_core_I_dram_req_addr [(i/2)];
assign core_dram_req_data [i] = per_core_D_dram_req_data[(i/2)];
assign core_dram_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
assign core_dram_req_data [i] = per_core_D_dram_req_data [(i/2)];
assign core_dram_req_data [i+1] = per_core_I_dram_req_data [(i/2)];
assign core_dram_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
assign core_dram_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
assign core_dram_req_tag [i] = per_core_D_dram_req_tag [(i/2)];
assign core_dram_req_tag [i+1] = per_core_I_dram_req_tag [(i/2)];
assign per_core_D_dram_req_ready [(i/2)] = core_dram_req_ready;
assign per_core_I_dram_req_ready [(i/2)] = core_dram_req_ready;
assign per_core_D_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i] && core_dram_rsp_ready;
assign per_core_I_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i+1] && core_dram_rsp_ready;
assign per_core_D_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i] & core_dram_rsp_ready_other [i];
assign per_core_I_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i+1] & core_dram_rsp_ready_other [i+1];
assign per_core_D_dram_rsp_data [(i/2)] = core_dram_rsp_data[i];
assign per_core_I_dram_rsp_data [(i/2)] = core_dram_rsp_data[i+1];
@ -346,32 +376,63 @@ module VX_cluster #(
assign per_core_snp_rsp_ready [(i/2)] = core_snp_fwdin_ready [(i/2)];
end
assign core_dram_rsp_ready = (& per_core_D_dram_rsp_ready) && (& per_core_I_dram_rsp_ready);
assign core_dram_rsp_ready = core_dram_rsp_ready_all;
VX_snp_forwarder #(
.CACHE_ID (`L2CACHE_ID),
.NUM_REQUESTS (`NUM_CORES),
.SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
.SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNRQ_SIZE (`L2SNRQ_SIZE)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_invalidate (snp_req_invalidate),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_fwd_rsp_valid),
.snp_rsp_addr (snp_fwd_rsp_addr),
.snp_rsp_invalidate (snp_fwd_rsp_invalidate),
.snp_rsp_tag (snp_fwd_rsp_tag),
.snp_rsp_ready (snp_fwd_rsp_ready),
.snp_fwdout_valid (core_snp_fwdout_valid),
.snp_fwdout_addr (core_snp_fwdout_addr),
.snp_fwdout_invalidate(core_snp_fwdout_invalidate),
.snp_fwdout_tag (core_snp_fwdout_tag),
.snp_fwdout_ready (core_snp_fwdout_ready),
.snp_fwdin_valid (core_snp_fwdin_valid),
.snp_fwdin_tag (core_snp_fwdin_tag),
.snp_fwdin_ready (core_snp_fwdin_ready)
);
VX_cache #(
.CACHE_ID (`L2CACHE_ID),
.CACHE_SIZE (`L2CACHE_SIZE),
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE),
.NUM_BANKS (`L2NUM_BANKS),
.WORD_SIZE (`L2WORD_SIZE),
.NUM_REQUESTS (`L2NUM_REQUESTS),
.CREQ_SIZE (`L2CREQ_SIZE),
.MRVQ_SIZE (`L2MRVQ_SIZE),
.DRFQ_SIZE (`L2DRFQ_SIZE),
.SNRQ_SIZE (`L2SNRQ_SIZE),
.CWBQ_SIZE (`L2CWBQ_SIZE),
.DREQ_SIZE (`L2DREQ_SIZE),
.SNPQ_SIZE (`L2SNPQ_SIZE),
.DRAM_ENABLE (1),
.FLUSH_ENABLE (1),
.WRITE_ENABLE (1),
.SNOOP_FORWARDING (1),
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.NUM_SNP_REQUESTS (`NUM_CORES),
.SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH)
.CACHE_ID (`L2CACHE_ID),
.CACHE_SIZE (`L2CACHE_SIZE),
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE),
.NUM_BANKS (`L2NUM_BANKS),
.WORD_SIZE (`L2WORD_SIZE),
.NUM_REQUESTS (`L2NUM_REQUESTS),
.CREQ_SIZE (`L2CREQ_SIZE),
.MRVQ_SIZE (`L2MRVQ_SIZE),
.DRFQ_SIZE (`L2DRFQ_SIZE),
.SNRQ_SIZE (`L2SNRQ_SIZE),
.CWBQ_SIZE (`L2CWBQ_SIZE),
.DREQ_SIZE (`L2DREQ_SIZE),
.SNPQ_SIZE (`L2SNPQ_SIZE),
.DRAM_ENABLE (1),
.FLUSH_ENABLE (1),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH)
) l2cache (
`SCOPE_BIND_VX_cluster_l2cache
@ -409,29 +470,17 @@ module VX_cluster #(
.dram_rsp_ready (dram_rsp_ready),
// Snoop request
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_invalidate (snp_req_invalidate),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_req_valid (snp_fwd_rsp_valid),
.snp_req_addr (snp_fwd_rsp_addr),
.snp_req_invalidate (snp_fwd_rsp_invalidate),
.snp_req_tag (snp_fwd_rsp_tag),
.snp_req_ready (snp_fwd_rsp_ready),
// Snoop response
.snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready),
// Snoop forwarding out
.snp_fwdout_valid (core_snp_fwdout_valid),
.snp_fwdout_addr (core_snp_fwdout_addr),
.snp_fwdout_invalidate(core_snp_fwdout_invalidate),
.snp_fwdout_tag (core_snp_fwdout_tag),
.snp_fwdout_ready (core_snp_fwdout_ready),
// Snoop forwarding in
.snp_fwdin_valid (core_snp_fwdin_valid),
.snp_fwdin_tag (core_snp_fwdin_tag),
.snp_fwdin_ready (core_snp_fwdin_ready),
// Miss status
`UNUSED_PIN (miss_vec)
);
@ -508,11 +557,12 @@ module VX_cluster #(
if (`NUM_CORES > 1) begin
VX_snp_forwarder #(
.CACHE_ID (`L2CACHE_ID),
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE),
.NUM_REQUESTS (`NUM_CORES),
.SNRQ_SIZE (`L2SNRQ_SIZE),
.SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH)
.CACHE_ID (`L2CACHE_ID),
.NUM_REQUESTS (`NUM_CORES),
.SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
.SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNRQ_SIZE (`L2SNRQ_SIZE)
) snp_forwarder (
.clk (clk),
.reset (reset),

View file

@ -8,7 +8,7 @@
`endif
`ifndef NUM_CORES
`define NUM_CORES 2
`define NUM_CORES 4
`endif
`ifndef NUM_WARPS
@ -23,8 +23,20 @@
`define NUM_BARRIERS 4
`endif
`ifndef L2_ENABLE
`define L2_ENABLE (`NUM_CORES > 2)
`endif
`ifndef L3_ENABLE
`define L3_ENABLE (`NUM_CLUSTERS > 1)
`endif
`ifndef GLOBAL_BLOCK_SIZE
`define GLOBAL_BLOCK_SIZE 16
`define GLOBAL_BLOCK_SIZE 64
`endif
`ifndef L1_BLOCK_SIZE
`define L1_BLOCK_SIZE 16
`endif
`ifndef STARTUP_ADDR
@ -57,14 +69,6 @@
`define FRAME_BUFFER_SIZE (FRAME_BUFFER_WIDTH * FRAME_BUFFER_HEIGHT)
`ifndef L2_ENABLE
`define L2_ENABLE 0
`endif
`ifndef L3_ENABLE
`define L3_ENABLE (`NUM_CLUSTERS > 1)
`endif
`ifndef EXT_M_DISABLE
`define EXT_M_ENABLE
`endif
@ -159,7 +163,7 @@
`define CSR_MIMPID 12'hF13
`define CSR_MHARTID 12'hF14
// Pipeline Queues ============================================================
// Pipeline Queues ////////////////////////////////////////////////////////////
// Size of instruction queue
`ifndef IBUF_SIZE
@ -181,28 +185,18 @@
`define FPUQ_SIZE 8
`endif
// Dcache Configurable Knobs ==================================================
// Dcache Configurable Knobs //////////////////////////////////////////////////
// Size of cache in bytes
`ifndef DCACHE_SIZE
`define DCACHE_SIZE 4096
`define DCACHE_SIZE 8192
`endif
// Size of line inside a bank in bytes
`ifndef DBANK_LINE_SIZE
`define DBANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
`endif
// Number of banks {1, 2, 4, 8,...}
// Number of banks
`ifndef DNUM_BANKS
`define DNUM_BANKS 4
`endif
// Size of a word in bytes
`ifndef DWORD_SIZE
`define DWORD_SIZE 4
`endif
// Core Request Queue Size
`ifndef DCREQ_SIZE
`define DCREQ_SIZE `NUM_WARPS
@ -238,21 +232,11 @@
`define DSNRQ_SIZE 8
`endif
// Icache Configurable Knobs ==================================================
// Icache Configurable Knobs //////////////////////////////////////////////////
// Size of cache in bytes
`ifndef ICACHE_SIZE
`define ICACHE_SIZE 2048
`endif
// Size of line inside a bank in bytes
`ifndef IBANK_LINE_SIZE
`define IBANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
`endif
// Size of a word in bytes
`ifndef IWORD_SIZE
`define IWORD_SIZE 4
`define ICACHE_SIZE 8192
`endif
// Core Request Queue Size
@ -280,28 +264,18 @@
`define IDRFQ_SIZE 8
`endif
// SM Configurable Knobs ======================================================
// SM Configurable Knobs //////////////////////////////////////////////////////
// Size of cache in bytes
`ifndef SCACHE_SIZE
`define SCACHE_SIZE 1024
`define SCACHE_SIZE 4096
`endif
// Size of line inside a bank in bytes
`ifndef SBANK_LINE_SIZE
`define SBANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
`endif
// Number of banks {1, 2, 4, 8,...}
// Number of banks
`ifndef SNUM_BANKS
`define SNUM_BANKS 4
`endif
// Size of a word in bytes
`ifndef SWORD_SIZE
`define SWORD_SIZE 4
`endif
// Core Request Queue Size
`ifndef SCREQ_SIZE
`define SCREQ_SIZE `NUM_WARPS
@ -312,28 +286,18 @@
`define SCWBQ_SIZE `SCREQ_SIZE
`endif
// L2cache Configurable Knobs =================================================
// L2cache Configurable Knobs /////////////////////////////////////////////////
// Size of cache in bytes
`ifndef L2CACHE_SIZE
`define L2CACHE_SIZE 4096
`define L2CACHE_SIZE 131072
`endif
// Size of line inside a bank in bytes
`ifndef L2BANK_LINE_SIZE
`define L2BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
`endif
// Number of banks {1, 2, 4, 8,...}
// Number of banks
`ifndef L2NUM_BANKS
`define L2NUM_BANKS 4
`endif
// Size of a word in bytes
`ifndef L2WORD_SIZE
`define L2WORD_SIZE `L2BANK_LINE_SIZE
`endif
// Core Request Queue Size
`ifndef L2CREQ_SIZE
`define L2CREQ_SIZE 8
@ -369,28 +333,18 @@
`define L2SNPQ_SIZE 8
`endif
// L3cache Configurable Knobs =================================================
// L3cache Configurable Knobs /////////////////////////////////////////////////
// Size of cache in bytes
`ifndef L3CACHE_SIZE
`define L3CACHE_SIZE 8192
`define L3CACHE_SIZE 262144
`endif
// Size of line inside a bank in bytes
`ifndef L3BANK_LINE_SIZE
`define L3BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
`endif
// Number of banks {1, 2, 4, 8,...}
// Number of banks
`ifndef L3NUM_BANKS
`define L3NUM_BANKS 4
`endif
// Size of a word in bytes
`ifndef L3WORD_SIZE
`define L3WORD_SIZE `L3BANK_LINE_SIZE
`endif
// Core Request Queue Size
`ifndef L3CREQ_SIZE
`define L3CREQ_SIZE 8

View file

@ -6,203 +6,203 @@
///////////////////////////////////////////////////////////////////////////////
`define NW_BITS `LOG2UP(`NUM_WARPS)
`define NW_BITS `LOG2UP(`NUM_WARPS)
`define NT_BITS `LOG2UP(`NUM_THREADS)
`define NT_BITS `LOG2UP(`NUM_THREADS)
`define NC_BITS `LOG2UP(`NUM_CORES)
`define NC_BITS `LOG2UP(`NUM_CORES)
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
`ifdef EXT_F_ENABLE
`define NUM_REGS 64
`define NUM_REGS 64
`else
`define NUM_REGS 32
`define NUM_REGS 32
`endif
`define NR_BITS `LOG2UP(`NUM_REGS)
`define NR_BITS `LOG2UP(`NUM_REGS)
`define CSR_ADDR_BITS 12
`define CSR_ADDR_BITS 12
`define CSR_WIDTH 12
`define CSR_WIDTH 12
///////////////////////////////////////////////////////////////////////////////
`define INST_LUI 7'b0110111
`define INST_AUIPC 7'b0010111
`define INST_JAL 7'b1101111
`define INST_JALR 7'b1100111
`define INST_B 7'b1100011 // branch instructions
`define INST_L 7'b0000011 // load instructions
`define INST_S 7'b0100011 // store instructions
`define INST_I 7'b0010011 // immediate instructions
`define INST_R 7'b0110011 // register instructions
`define INST_F 7'b0001111 // Fence instructions
`define INST_SYS 7'b1110011 // system instructions
`define INST_LUI 7'b0110111
`define INST_AUIPC 7'b0010111
`define INST_JAL 7'b1101111
`define INST_JALR 7'b1100111
`define INST_B 7'b1100011 // branch instructions
`define INST_L 7'b0000011 // load instructions
`define INST_S 7'b0100011 // store instructions
`define INST_I 7'b0010011 // immediate instructions
`define INST_R 7'b0110011 // register instructions
`define INST_F 7'b0001111 // Fence instructions
`define INST_SYS 7'b1110011 // system instructions
`define INST_FL 7'b0000111 // float load instruction
`define INST_FS 7'b0100111 // float store instruction
`define INST_FMADD 7'b1000011
`define INST_FMSUB 7'b1000111
`define INST_FNMSUB 7'b1001011
`define INST_FNMADD 7'b1001111
`define INST_FCI 7'b1010011 // float common instructions
`define INST_FL 7'b0000111 // float load instruction
`define INST_FS 7'b0100111 // float store instruction
`define INST_FMADD 7'b1000011
`define INST_FMSUB 7'b1000111
`define INST_FNMSUB 7'b1001011
`define INST_FNMADD 7'b1001111
`define INST_FCI 7'b1010011 // float common instructions
`define INST_GPU 7'b1101011
`define INST_GPU 7'b1101011
///////////////////////////////////////////////////////////////////////////////
`define BYTEEN_SB 3'h0
`define BYTEEN_SH 3'h1
`define BYTEEN_SW 3'h2
`define BYTEEN_UB 3'h4
`define BYTEEN_UH 3'h5
`define BYTEEN_BITS 3
`define BYTEEN_TYPE(x) x[1:0]
`define BYTEEN_SB 3'h0
`define BYTEEN_SH 3'h1
`define BYTEEN_SW 3'h2
`define BYTEEN_UB 3'h4
`define BYTEEN_UH 3'h5
`define BYTEEN_BITS 3
`define BYTEEN_TYPE(x) x[1:0]
`define FRM_RNE 3'b000 // round to nearest even
`define FRM_RTZ 3'b001 // round to zero
`define FRM_RDN 3'b010 // round to -inf
`define FRM_RUP 3'b011 // round to +inf
`define FRM_RMM 3'b100 // round to nearest max magnitude
`define FRM_DYN 3'b111 // dynamic mode
`define FRM_BITS 3
`define FRM_RNE 3'b000 // round to nearest even
`define FRM_RTZ 3'b001 // round to zero
`define FRM_RDN 3'b010 // round to -inf
`define FRM_RUP 3'b011 // round to +inf
`define FRM_RMM 3'b100 // round to nearest max magnitude
`define FRM_DYN 3'b111 // dynamic mode
`define FRM_BITS 3
///////////////////////////////////////////////////////////////////////////////
`define EX_NOP 3'h0
`define EX_ALU 3'h1
`define EX_LSU 3'h2
`define EX_CSR 3'h3
`define EX_MUL 3'h4
`define EX_FPU 3'h5
`define EX_GPU 3'h6
`define EX_BITS 3
`define EX_NOP 3'h0
`define EX_ALU 3'h1
`define EX_LSU 3'h2
`define EX_CSR 3'h3
`define EX_MUL 3'h4
`define EX_FPU 3'h5
`define EX_GPU 3'h6
`define EX_BITS 3
`define NUM_EXS 6
`define NE_BITS `LOG2UP(`NUM_EXS)
`define NUM_EXS 6
`define NE_BITS `LOG2UP(`NUM_EXS)
///////////////////////////////////////////////////////////////////////////////
`define OP_BITS 4
`define MOD_BITS 3
`define OP_BITS 4
`define MOD_BITS 3
`define ALU_ADD 4'b0000
`define ALU_LUI 4'b0010
`define ALU_AUIPC 4'b0011
`define ALU_SLTU 4'b0100
`define ALU_SLT 4'b0101
`define ALU_SRL 4'b1000
`define ALU_SRA 4'b1001
`define ALU_SUB 4'b1011
`define ALU_AND 4'b1100
`define ALU_OR 4'b1101
`define ALU_XOR 4'b1110
`define ALU_SLL 4'b1111
`define ALU_OTHER 4'b0111
`define ALU_BITS 4
`define ALU_OP(x) x[`ALU_BITS-1:0]
`define ALU_ADD 4'b0000
`define ALU_LUI 4'b0010
`define ALU_AUIPC 4'b0011
`define ALU_SLTU 4'b0100
`define ALU_SLT 4'b0101
`define ALU_SRL 4'b1000
`define ALU_SRA 4'b1001
`define ALU_SUB 4'b1011
`define ALU_AND 4'b1100
`define ALU_OR 4'b1101
`define ALU_XOR 4'b1110
`define ALU_SLL 4'b1111
`define ALU_OTHER 4'b0111
`define ALU_BITS 4
`define ALU_OP(x) x[`ALU_BITS-1:0]
`define ALU_OP_CLASS(x) x[3:2]
`define ALU_SIGNED(x) x[0]
`define ALU_SIGNED(x) x[0]
`define BR_EQ 4'b0000
`define BR_NE 4'b0010
`define BR_LTU 4'b0100
`define BR_GEU 4'b0110
`define BR_LT 4'b0101
`define BR_GE 4'b0111
`define BR_JAL 4'b1000
`define BR_JALR 4'b1001
`define BR_ECALL 4'b1010
`define BR_EBREAK 4'b1011
`define BR_MRET 4'b1100
`define BR_SRET 4'b1101
`define BR_DRET 4'b1110
`define BR_OTHER 4'b1111
`define BR_BITS 4
`define BR_OP(x) x[`BR_BITS-1:0]
`define BR_NEG(x) x[1]
`define BR_LESS(x) x[2]
`define BR_STATIC(x) x[3]
`define ALU_BR_BITS 4
`define ALU_BR_OP(x) x[`ALU_BR_BITS-1:0]
`define IS_BR_MOD(x) x[0]
`define BR_EQ 4'b0000
`define BR_NE 4'b0010
`define BR_LTU 4'b0100
`define BR_GEU 4'b0110
`define BR_LT 4'b0101
`define BR_GE 4'b0111
`define BR_JAL 4'b1000
`define BR_JALR 4'b1001
`define BR_ECALL 4'b1010
`define BR_EBREAK 4'b1011
`define BR_MRET 4'b1100
`define BR_SRET 4'b1101
`define BR_DRET 4'b1110
`define BR_OTHER 4'b1111
`define BR_BITS 4
`define BR_OP(x) x[`BR_BITS-1:0]
`define BR_NEG(x) x[1]
`define BR_LESS(x) x[2]
`define BR_STATIC(x) x[3]
`define ALU_BR_BITS 4
`define ALU_BR_OP(x) x[`ALU_BR_BITS-1:0]
`define IS_BR_MOD(x) x[0]
`define LSU_LB {1'b0, `BYTEEN_SB}
`define LSU_LH {1'b0, `BYTEEN_SH}
`define LSU_LW {1'b0, `BYTEEN_SW}
`define LSU_LBU {1'b0, `BYTEEN_UB}
`define LSU_LHU {1'b0, `BYTEEN_UH}
`define LSU_SB {1'b1, `BYTEEN_SB}
`define LSU_SH {1'b1, `BYTEEN_SH}
`define LSU_SW {1'b1, `BYTEEN_SW}
`define LSU_SBU {1'b1, `BYTEEN_UB}
`define LSU_SHU {1'b1, `BYTEEN_UH}
`define LSU_BITS 4
`define LSU_RW(x) x[3]
`define LSU_BE(x) x[2:0]
`define LSU_LB {1'b0, `BYTEEN_SB}
`define LSU_LH {1'b0, `BYTEEN_SH}
`define LSU_LW {1'b0, `BYTEEN_SW}
`define LSU_LBU {1'b0, `BYTEEN_UB}
`define LSU_LHU {1'b0, `BYTEEN_UH}
`define LSU_SB {1'b1, `BYTEEN_SB}
`define LSU_SH {1'b1, `BYTEEN_SH}
`define LSU_SW {1'b1, `BYTEEN_SW}
`define LSU_SBU {1'b1, `BYTEEN_UB}
`define LSU_SHU {1'b1, `BYTEEN_UH}
`define LSU_BITS 4
`define LSU_RW(x) x[3]
`define LSU_BE(x) x[2:0]
`define CSR_RW 2'h0
`define CSR_RS 2'h1
`define CSR_RC 2'h2
`define CSR_OTHER 2'h3
`define CSR_BITS 2
`define CSR_OP(x) x[`CSR_BITS-1:0]
`define CSR_RW 2'h0
`define CSR_RS 2'h1
`define CSR_RC 2'h2
`define CSR_OTHER 2'h3
`define CSR_BITS 2
`define CSR_OP(x) x[`CSR_BITS-1:0]
`define MUL_MUL 3'h0
`define MUL_MULH 3'h1
`define MUL_MULHSU 3'h2
`define MUL_MULHU 3'h3
`define MUL_DIV 3'h4
`define MUL_DIVU 3'h5
`define MUL_REM 3'h6
`define MUL_REMU 3'h7
`define MUL_BITS 3
`define MUL_OP(x) x[`MUL_BITS-1:0]
`define IS_DIV_OP(x) x[2]
`define MUL_MUL 3'h0
`define MUL_MULH 3'h1
`define MUL_MULHSU 3'h2
`define MUL_MULHU 3'h3
`define MUL_DIV 3'h4
`define MUL_DIVU 3'h5
`define MUL_REM 3'h6
`define MUL_REMU 3'h7
`define MUL_BITS 3
`define MUL_OP(x) x[`MUL_BITS-1:0]
`define IS_DIV_OP(x) x[2]
`define FPU_ADD 4'h0
`define FPU_SUB 4'h1
`define FPU_MUL 4'h2
`define FPU_DIV 4'h3
`define FPU_SQRT 4'h4
`define FPU_MADD 4'h5
`define FPU_MSUB 4'h6
`define FPU_NMSUB 4'h7
`define FPU_NMADD 4'h8
`define FPU_CVTWS 4'h9 // FCVT.W.S
`define FPU_CVTWUS 4'hA // FCVT.WU.S
`define FPU_CVTSW 4'hB // FCVT.S.W
`define FPU_CVTSWU 4'hC // FCVT.S.WU
`define FPU_CLASS 4'hD
`define FPU_CMP 4'hE
`define FPU_MISC 4'hF // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
`define FPU_BITS 4
`define FPU_OP(x) x[`FPU_BITS-1:0]
`define FPU_ADD 4'h0
`define FPU_SUB 4'h1
`define FPU_MUL 4'h2
`define FPU_DIV 4'h3
`define FPU_SQRT 4'h4
`define FPU_MADD 4'h5
`define FPU_MSUB 4'h6
`define FPU_NMSUB 4'h7
`define FPU_NMADD 4'h8
`define FPU_CVTWS 4'h9 // FCVT.W.S
`define FPU_CVTWUS 4'hA // FCVT.WU.S
`define FPU_CVTSW 4'hB // FCVT.S.W
`define FPU_CVTSWU 4'hC // FCVT.S.WU
`define FPU_CLASS 4'hD
`define FPU_CMP 4'hE
`define FPU_MISC 4'hF // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
`define FPU_BITS 4
`define FPU_OP(x) x[`FPU_BITS-1:0]
`define GPU_TMC 3'h0
`define GPU_WSPAWN 3'h1
`define GPU_SPLIT 3'h2
`define GPU_JOIN 3'h3
`define GPU_BAR 3'h4
`define GPU_OTHER 3'h7
`define GPU_BITS 3
`define GPU_OP(x) x[`GPU_BITS-1:0]
`define GPU_TMC 3'h0
`define GPU_WSPAWN 3'h1
`define GPU_SPLIT 3'h2
`define GPU_JOIN 3'h3
`define GPU_BAR 3'h4
`define GPU_OTHER 3'h7
`define GPU_BITS 3
`define GPU_OP(x) x[`GPU_BITS-1:0]
///////////////////////////////////////////////////////////////////////////////
`ifdef EXT_M_ENABLE
`define ISA_EXT_M (1 << 12)
`define ISA_EXT_M (1 << 12)
`else
`define ISA_EXT_M 0
`define ISA_EXT_M 0
`endif
`ifdef EXT_F_ENABLE
`define ISA_EXT_F (1 << 5)
`define ISA_EXT_F (1 << 5)
`else
`define ISA_EXT_F 0
`define ISA_EXT_F 0
`endif
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
@ -234,144 +234,174 @@
///////////////////////////////////////////////////////////////////////////////
`ifdef DBG_CACHE_REQ_INFO // pc, rd, wid
`define DBG_CACHE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS)
`ifdef DBG_CACHE_REQ_INFO // pc, rd, wid
`define DBG_CACHE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS)
`else
`define DBG_CACHE_REQ_MDATAW 0
`define DBG_CACHE_REQ_MDATAW 0
`endif
////////////////////////// Dcache Configurable Knobs //////////////////////////
// Cache ID
`define DCACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
// Block size in bytes
`define DBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
// Word size in bytes
`define DWORD_SIZE 4
// TAG sharing enable
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
// Core request tag bits
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
// DRAM request data bits
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
// DRAM request address bits
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE))
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE))
// DRAM byte enable bits
`define DDRAM_BYTEEN_WIDTH `DBANK_LINE_SIZE
`define DDRAM_BYTEEN_WIDTH `DBANK_LINE_SIZE
// DRAM request tag bits
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define DNUM_REQUESTS `NUM_THREADS
// Core request size
`define DNUM_REQUESTS `NUM_THREADS
// Snoop request tag bits
`define DSNP_TAG_WIDTH ((`NUM_CORES > 1) ? `LOG2UP(`L2SNRQ_SIZE) : `L2SNP_TAG_WIDTH)
`define DSNP_TAG_WIDTH ((`NUM_CORES > 1) ? `LOG2UP(`L2SNRQ_SIZE) : `L2SNP_TAG_WIDTH)
////////////////////////// Icache Configurable Knobs //////////////////////////
// Cache ID
`define ICACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
// Block size in bytes
`define IBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
// Word size in bytes
`define IWORD_SIZE 4
// Number of banks
`define INUM_BANKS 1
`define INUM_BANKS 1
// Core request address bits
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
// Core request byte enable bits
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
// TAG sharing enable
`define ICORE_TAG_ID_BITS `NW_BITS
`define ICORE_TAG_ID_BITS `NW_BITS
// Core request tag bits
`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
// DRAM request data bits
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)
// DRAM request address bits
`define IDRAM_ADDR_WIDTH (32 - `CLOG2(`IBANK_LINE_SIZE))
`define IDRAM_ADDR_WIDTH (32 - `CLOG2(`IBANK_LINE_SIZE))
// DRAM byte enable bits
`define IDRAM_BYTEEN_WIDTH `IBANK_LINE_SIZE
`define IDRAM_BYTEEN_WIDTH `IBANK_LINE_SIZE
// DRAM request tag bits
`define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH
`define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define INUM_REQUESTS 1
// Core request size
`define INUM_REQUESTS 1
////////////////////////// SM Configurable Knobs //////////////////////////////
// Cache ID
`define SCACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2
`define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define SNUM_REQUESTS `NUM_THREADS
// Block size in bytes
`define SBANK_LINE_SIZE 4
// Word size in bytes
`define SWORD_SIZE 4
// Core request size
`define SNUM_REQUESTS `NUM_THREADS
// DRAM request address bits
`define SDRAM_ADDR_WIDTH (32 - `CLOG2(`SBANK_LINE_SIZE))
`define SDRAM_ADDR_WIDTH (32 - `CLOG2(`SBANK_LINE_SIZE))
// DRAM request tag bits
`define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH
// DRAM request tag bits
`define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define SNUM_REQUESTS `NUM_THREADS
// Core request size
`define SNUM_REQUESTS `NUM_THREADS
////////////////////////// L2cache Configurable Knobs /////////////////////////
// Cache ID
`define L2CACHE_ID 32'(`L3_ENABLE) + CLUSTER_ID
`define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
// Block size in bytes
`define L2BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
// Word size in bytes
`define L2WORD_SIZE `DBANK_LINE_SIZE
// Core request tag bits
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
// DRAM request data bits
`define L2DRAM_LINE_WIDTH (`L2_ENABLE ? (`L2BANK_LINE_SIZE * 8) : `DDRAM_LINE_WIDTH)
`define L2DRAM_LINE_WIDTH (`L2BANK_LINE_SIZE * 8)
// DRAM request address bits
`define L2DRAM_ADDR_WIDTH (`L2_ENABLE ? (32 - `CLOG2(`L2BANK_LINE_SIZE)) : `DDRAM_ADDR_WIDTH)
`define L2DRAM_ADDR_WIDTH (32 - `CLOG2(`L2BANK_LINE_SIZE))
// DRAM byte enable bits
`define L2DRAM_BYTEEN_WIDTH (`L2_ENABLE ? `L2BANK_LINE_SIZE : `DDRAM_BYTEEN_WIDTH)
`define L2DRAM_BYTEEN_WIDTH `L2BANK_LINE_SIZE
// DRAM request tag bits
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
// Snoop request tag bits
`define L2SNP_TAG_WIDTH (`L3_ENABLE ? `LOG2UP(`L3SNRQ_SIZE) : `L3SNP_TAG_WIDTH)
`define L2SNP_TAG_WIDTH (`L3_ENABLE ? `LOG2UP(`L3SNRQ_SIZE) : `L3SNP_TAG_WIDTH)
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define L2NUM_REQUESTS (2 * `NUM_CORES)
// Core request size
`define L2NUM_REQUESTS (2 * `NUM_CORES)
////////////////////////// L3cache Configurable Knobs /////////////////////////
// Cache ID
`define L3CACHE_ID 0
`define L3CACHE_ID 0
// Block size in bytes
`define L3BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
// Word size in bytes
`define L3WORD_SIZE `L2BANK_LINE_SIZE
// Core request tag bits
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
// DRAM request data bits
`define L3DRAM_LINE_WIDTH (`L3_ENABLE ? (`L3BANK_LINE_SIZE * 8) : `L2DRAM_LINE_WIDTH)
`define L3DRAM_LINE_WIDTH (`L3BANK_LINE_SIZE * 8)
// DRAM request address bits
`define L3DRAM_ADDR_WIDTH (`L3_ENABLE ? (32 - `CLOG2(`L3BANK_LINE_SIZE)) : `L2DRAM_ADDR_WIDTH)
`define L3DRAM_ADDR_WIDTH (32 - `CLOG2(`L3BANK_LINE_SIZE))
// DRAM byte enable bits
`define L3DRAM_BYTEEN_WIDTH (`L3_ENABLE ? `L3BANK_LINE_SIZE : `L2DRAM_BYTEEN_WIDTH)
`define L3DRAM_BYTEEN_WIDTH `L3BANK_LINE_SIZE
// DRAM request tag bits
`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
// Snoop request tag bits
`define L3SNP_TAG_WIDTH 16
`define L3SNP_TAG_WIDTH 16
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define L3NUM_REQUESTS `NUM_CLUSTERS
// Core request size
`define L3NUM_REQUESTS `NUM_CLUSTERS
///////////////////////////////////////////////////////////////////////////////

View file

@ -168,9 +168,9 @@ module VX_ibuffer #(
for (integer i = 0; i < `NUM_WARPS; i++) begin
nw += 32'(q_size[i] != 0);
end
assert(nw == 32'(num_warps)) else $display("%t: error: invalid num_warps: nw=%0d, ref=%0d", $time, num_warps, nw);
assert(~deq_valid || (q_size[deq_wid] != 0)) else $display("%t: error: invalid schedule: wid=%0d", $time, deq_wid);
assert(~deq_fire || (q_size[deq_wid] != 0)) else $display("%t: error: invalid dequeu: wid=%0d", $time, deq_wid);
assert(nw == 32'(num_warps)) else $error("%t: error: invalid num_warps: nw=%0d, ref=%0d", $time, num_warps, nw);
assert(~deq_valid || (q_size[deq_wid] != 0)) else $error("%t: error: invalid schedule: wid=%0d", $time, deq_wid);
assert(~deq_fire || (q_size[deq_wid] != 0)) else $error("%t: error: invalid dequeu: wid=%0d", $time, deq_wid);
end
`endif
end

View file

@ -75,7 +75,6 @@ module VX_mem_unit # (
.DRAM_ENABLE (0),
.FLUSH_ENABLE (0),
.WRITE_ENABLE (1),
.SNOOP_FORWARDING (0),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH)
@ -127,44 +126,31 @@ module VX_mem_unit # (
`UNUSED_PIN (snp_rsp_tag),
.snp_rsp_ready (1'b0),
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready),
// Miss status
`UNUSED_PIN (miss_vec)
);
VX_cache #(
.CACHE_ID (`DCACHE_ID),
.CACHE_SIZE (`DCACHE_SIZE),
.BANK_LINE_SIZE (`DBANK_LINE_SIZE),
.NUM_BANKS (`DNUM_BANKS),
.WORD_SIZE (`DWORD_SIZE),
.NUM_REQUESTS (`DNUM_REQUESTS),
.CREQ_SIZE (`DCREQ_SIZE),
.MRVQ_SIZE (`DMRVQ_SIZE),
.DRFQ_SIZE (`DDRFQ_SIZE),
.SNRQ_SIZE (`DSNRQ_SIZE),
.CWBQ_SIZE (`DCWBQ_SIZE),
.DREQ_SIZE (`DDREQ_SIZE),
.SNPQ_SIZE (`DSNPQ_SIZE),
.DRAM_ENABLE (1),
.FLUSH_ENABLE (1),
.WRITE_ENABLE (1),
.SNOOP_FORWARDING (0),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH)
.CACHE_ID (`DCACHE_ID),
.CACHE_SIZE (`DCACHE_SIZE),
.BANK_LINE_SIZE (`DBANK_LINE_SIZE),
.NUM_BANKS (`DNUM_BANKS),
.WORD_SIZE (`DWORD_SIZE),
.NUM_REQUESTS (`DNUM_REQUESTS),
.CREQ_SIZE (`DCREQ_SIZE),
.MRVQ_SIZE (`DMRVQ_SIZE),
.DRFQ_SIZE (`DDRFQ_SIZE),
.SNRQ_SIZE (`DSNRQ_SIZE),
.CWBQ_SIZE (`DCWBQ_SIZE),
.DREQ_SIZE (`DDREQ_SIZE),
.SNPQ_SIZE (`DSNPQ_SIZE),
.DRAM_ENABLE (1),
.FLUSH_ENABLE (1),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.SNP_TAG_WIDTH (`DSNP_TAG_WIDTH)
) dcache (
`SCOPE_BIND_VX_mem_unit_dcache
@ -212,18 +198,6 @@ module VX_mem_unit # (
.snp_rsp_valid (dcache_snp_rsp_if.valid),
.snp_rsp_tag (dcache_snp_rsp_if.tag),
.snp_rsp_ready (dcache_snp_rsp_if.ready),
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready),
// Miss status
`UNUSED_PIN (miss_vec)
@ -246,7 +220,6 @@ module VX_mem_unit # (
.DRAM_ENABLE (1),
.FLUSH_ENABLE (0),
.WRITE_ENABLE (0),
.SNOOP_FORWARDING (0),
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
@ -298,18 +271,6 @@ module VX_mem_unit # (
`UNUSED_PIN (snp_rsp_tag),
.snp_rsp_ready (1'b0),
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready),
// Miss status
`UNUSED_PIN (miss_vec)
);

View file

@ -320,56 +320,70 @@ module Vortex (
// L3 Cache ///////////////////////////////////////////////////////////
wire [`L3NUM_REQUESTS-1:0] cluster_dram_req_valid;
wire [`L3NUM_REQUESTS-1:0] cluster_dram_req_rw;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] cluster_dram_req_byteen;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] cluster_dram_req_addr;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_req_data;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_req_tag;
wire [`L3NUM_REQUESTS-1:0] cluster_dram_rsp_valid;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_rsp_data;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_rsp_tag;
wire cluster_dram_rsp_ready;
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] cluster_snp_fwdout_addr;
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_invalidate;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] cluster_snp_fwdout_tag;
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_ready;
wire snp_fwd_rsp_valid;
wire [`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr;
wire snp_fwd_rsp_invalidate;
wire [`L3SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag;
wire snp_fwd_rsp_ready;
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdin_valid;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] cluster_snp_fwdin_tag;
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdin_ready;
reg [`L3NUM_REQUESTS-1:0] cluster_dram_rsp_ready_other;
for (genvar i = 0; i < `L3NUM_REQUESTS; i++) begin
// Core Request
assign cluster_dram_req_valid [i] = per_cluster_dram_req_valid [i];
assign cluster_dram_req_rw [i] = per_cluster_dram_req_rw [i];
assign cluster_dram_req_byteen [i] = per_cluster_dram_req_byteen[i];
assign cluster_dram_req_addr [i] = per_cluster_dram_req_addr [i];
assign cluster_dram_req_tag [i] = per_cluster_dram_req_tag [i];
assign cluster_dram_req_data [i] = per_cluster_dram_req_data [i];
// Core Response
assign per_cluster_dram_rsp_valid [i] = cluster_dram_rsp_valid [i] && cluster_dram_rsp_ready;
assign per_cluster_dram_rsp_data [i] = cluster_dram_rsp_data [i];
assign per_cluster_dram_rsp_tag [i] = cluster_dram_rsp_tag [i];
// Snoop Forwarding out
assign per_cluster_snp_req_valid [i] = cluster_snp_fwdout_valid[i];
assign per_cluster_snp_req_addr [i] = cluster_snp_fwdout_addr[i];
assign per_cluster_snp_req_invalidate [i] = cluster_snp_fwdout_invalidate[i];
assign per_cluster_snp_req_tag [i] = cluster_snp_fwdout_tag[i];
assign cluster_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i];
// Snoop Forwarding in
assign cluster_snp_fwdin_valid [i] = per_cluster_snp_rsp_valid [i];
assign cluster_snp_fwdin_tag [i] = per_cluster_snp_rsp_tag [i];
assign per_cluster_snp_rsp_ready [i] = cluster_snp_fwdin_ready [i];
always @(*) begin
cluster_dram_rsp_ready_other = {`L3NUM_REQUESTS{1'b1}};
for (integer i = 0; i < `L3NUM_REQUESTS; i++) begin
for (integer j = 0; j < `L3NUM_REQUESTS; j++) begin
if (i != j)
cluster_dram_rsp_ready_other[i] &= (per_cluster_dram_rsp_ready [j] | !cluster_dram_rsp_valid [j]);
end
end
end
assign cluster_dram_rsp_ready = (& per_cluster_dram_rsp_ready);
for (genvar i = 0; i < `L3NUM_REQUESTS; i++) begin
// Core Response
assign per_cluster_dram_rsp_valid [i] = cluster_dram_rsp_valid [i] & cluster_dram_rsp_ready_other [i];
assign per_cluster_dram_rsp_data [i] = cluster_dram_rsp_data [i];
assign per_cluster_dram_rsp_tag [i] = cluster_dram_rsp_tag [i];
end
assign cluster_dram_rsp_ready = & (per_cluster_dram_rsp_ready | ~cluster_dram_rsp_valid);
VX_snp_forwarder #(
.CACHE_ID (`L3CACHE_ID),
.NUM_REQUESTS (`NUM_CLUSTERS),
.SRC_ADDR_WIDTH (`L3DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.SNP_TAG_WIDTH (`L3SNP_TAG_WIDTH),
.SNRQ_SIZE (`L3SNRQ_SIZE)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_invalidate (snp_req_invalidate),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_fwd_rsp_valid),
.snp_rsp_addr (snp_fwd_rsp_addr),
.snp_rsp_invalidate (snp_fwd_rsp_invalidate),
.snp_rsp_tag (snp_fwd_rsp_tag),
.snp_rsp_ready (snp_fwd_rsp_ready),
.snp_fwdout_valid (per_cluster_snp_req_valid),
.snp_fwdout_addr (per_cluster_snp_req_addr),
.snp_fwdout_invalidate(per_cluster_snp_req_invalidate),
.snp_fwdout_tag (per_cluster_snp_req_tag),
.snp_fwdout_ready (per_cluster_snp_req_ready),
.snp_fwdin_valid (per_cluster_snp_rsp_valid),
.snp_fwdin_tag (per_cluster_snp_rsp_tag),
.snp_fwdin_ready (per_cluster_snp_rsp_ready)
);
VX_cache #(
.CACHE_ID (`L3CACHE_ID),
@ -388,13 +402,10 @@ module Vortex (
.DRAM_ENABLE (1),
.FLUSH_ENABLE (1),
.WRITE_ENABLE (1),
.SNOOP_FORWARDING (1),
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH),
.NUM_SNP_REQUESTS (`NUM_CLUSTERS),
.SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH)
.SNP_TAG_WIDTH (`L3SNP_TAG_WIDTH)
) l3cache (
`SCOPE_BIND_Vortex_l3cache
@ -402,12 +413,12 @@ module Vortex (
.reset (reset),
// Core request
.core_req_valid (cluster_dram_req_valid),
.core_req_rw (cluster_dram_req_rw),
.core_req_byteen (cluster_dram_req_byteen),
.core_req_addr (cluster_dram_req_addr),
.core_req_data (cluster_dram_req_data),
.core_req_tag (cluster_dram_req_tag),
.core_req_valid (per_cluster_dram_req_valid),
.core_req_rw (per_cluster_dram_req_rw),
.core_req_byteen (per_cluster_dram_req_byteen),
.core_req_addr (per_cluster_dram_req_addr),
.core_req_data (per_cluster_dram_req_data),
.core_req_tag (per_cluster_dram_req_tag),
.core_req_ready (cluster_dram_req_ready),
// Core response
@ -432,29 +443,17 @@ module Vortex (
.dram_rsp_ready (dram_rsp_ready),
// Snoop request
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_invalidate (snp_req_invalidate),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_req_valid (snp_fwd_rsp_valid),
.snp_req_addr (snp_fwd_rsp_addr),
.snp_req_invalidate (snp_fwd_rsp_invalidate),
.snp_req_tag (snp_fwd_rsp_tag),
.snp_req_ready (snp_fwd_rsp_ready),
// Snoop response
.snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready),
// Snoop forwarding out
.snp_fwdout_valid (cluster_snp_fwdout_valid),
.snp_fwdout_addr (cluster_snp_fwdout_addr),
.snp_fwdout_invalidate(cluster_snp_fwdout_invalidate),
.snp_fwdout_tag (cluster_snp_fwdout_tag),
.snp_fwdout_ready (cluster_snp_fwdout_ready),
// Snoop forwarding in
.snp_fwdin_valid (cluster_snp_fwdin_valid),
.snp_fwdin_tag (cluster_snp_fwdin_tag),
.snp_fwdin_ready (cluster_snp_fwdin_ready),
// Miss status
`UNUSED_PIN (miss_vec)
);
@ -497,4 +496,11 @@ module Vortex (
end
`endif
`ifndef NDEBUG
always @(posedge clk) begin
$fflush(); // flush stdout buffer
end
`endif
endmodule

View file

@ -47,7 +47,7 @@ module VX_bank #(
parameter CORE_TAG_ID_BITS = 0,
// Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 1
parameter SNP_TAG_WIDTH = 1
) (
`SCOPE_IO_VX_bank
@ -88,12 +88,12 @@ module VX_bank #(
input wire snp_req_valid,
input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_invalidate,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
input wire [SNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
// Snoop Response
output wire snp_rsp_valid,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// Misses
@ -142,13 +142,13 @@ module VX_bank #(
wire [`LINE_ADDR_WIDTH-1:0] snrq_addr_st0;
wire snrq_invalidate_st0;
wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st0;
wire [SNP_TAG_WIDTH-1:0] snrq_tag_st0;
wire snp_req_fire = snp_req_valid && snp_req_ready;
assign snp_req_ready = !snrq_full;
VX_generic_queue #(
.DATAW(`LINE_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH),
.DATAW(`LINE_ADDR_WIDTH + 1 + SNP_TAG_WIDTH),
.SIZE(SNRQ_SIZE)
) snp_req_queue (
.clk (clk),
@ -352,7 +352,7 @@ module VX_bank #(
|| ((miss_st3 || force_miss_st3) && (addr_st3 == addr_st0));
`ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = inst_meta_st0;
end else begin
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = 0;
@ -371,7 +371,7 @@ module VX_bank #(
);
`ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
end else begin
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = 0;
@ -474,7 +474,7 @@ module VX_bank #(
);
`ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
end else begin
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = 0;
@ -574,7 +574,7 @@ module VX_bank #(
);
`ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = inst_meta_st3;
end else begin
assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = 0;
@ -621,7 +621,7 @@ module VX_bank #(
.NUM_REQUESTS (NUM_REQUESTS),
.MRVQ_SIZE (MRVQ_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
.SNP_TAG_WIDTH (SNP_TAG_WIDTH)
) cache_miss_resrv (
.clk (clk),
.reset (reset),
@ -803,12 +803,12 @@ module VX_bank #(
wire snpq_pop = snp_rsp_valid && snp_rsp_ready;
wire [SNP_REQ_TAG_WIDTH-1:0] snpq_tag_st3 = SNP_REQ_TAG_WIDTH'(req_tag_st3);
wire [SNP_TAG_WIDTH-1:0] snpq_tag_st3 = SNP_TAG_WIDTH'(req_tag_st3);
if (FLUSH_ENABLE) begin
VX_generic_queue #(
.DATAW(SNP_REQ_TAG_WIDTH),
.SIZE(SNPQ_SIZE)
.DATAW (SNP_TAG_WIDTH),
.SIZE (SNPQ_SIZE)
) snp_rsp_queue (
.clk (clk),
.reset (reset),

View file

@ -39,9 +39,6 @@ module VX_cache #(
// Enable cache flush
parameter FLUSH_ENABLE = 1,
// Enable snoop forwarding
parameter SNOOP_FORWARDING = 1,
// core request tag size
parameter CORE_TAG_WIDTH = 4,
@ -51,14 +48,8 @@ module VX_cache #(
// dram request tag size
parameter DRAM_TAG_WIDTH = 28,
// Number of snoop forwarding requests
parameter NUM_SNP_REQUESTS = (SNOOP_FORWARDING ? 4 : 1),
// Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = (SNOOP_FORWARDING ? 4 : 1),
// Snooping forward tag width
parameter SNP_FWD_TAG_WIDTH = (SNOOP_FORWARDING ? 4 : 1)
parameter SNP_TAG_WIDTH = 1
) (
`SCOPE_IO_VX_cache
@ -99,28 +90,14 @@ module VX_cache #(
input wire snp_req_valid,
input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_invalidate,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
input wire [SNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
// Snoop response
output wire snp_rsp_valid,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// Snoop Forwarding out
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_valid,
output wire [NUM_SNP_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr,
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_invalidate,
output wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdout_tag,
`IGNORE_WARNINGS_BEGIN
input wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_ready,
// Snoop forwarding in
input wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_valid,
input wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdin_tag,
`IGNORE_WARNINGS_END
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_ready,
output wire [NUM_BANKS-1:0] miss_vec
);
@ -146,72 +123,16 @@ module VX_cache #(
wire [NUM_BANKS-1:0] per_bank_snp_req_ready;
wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid;
wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag;
wire [NUM_BANKS-1:0][SNP_TAG_WIDTH-1:0] per_bank_snp_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready;
wire [NUM_BANKS-1:0] per_bank_miss;
assign miss_vec = per_bank_miss;
wire snp_req_valid_qual;
wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr_qual;
wire snp_req_invalidate_qual;
wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag_qual;
wire snp_req_ready_qual;
if (SNOOP_FORWARDING) begin
VX_snp_forwarder #(
.CACHE_ID (CACHE_ID),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_REQUESTS (NUM_SNP_REQUESTS),
.SNRQ_SIZE (SNRQ_SIZE),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_invalidate (snp_req_invalidate),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_req_valid_qual),
.snp_rsp_addr (snp_req_addr_qual),
.snp_rsp_invalidate (snp_req_invalidate_qual),
.snp_rsp_tag (snp_req_tag_qual),
.snp_rsp_ready (snp_req_ready_qual),
.snp_fwdout_valid (snp_fwdout_valid),
.snp_fwdout_addr (snp_fwdout_addr),
.snp_fwdout_invalidate(snp_fwdout_invalidate),
.snp_fwdout_tag (snp_fwdout_tag),
.snp_fwdout_ready (snp_fwdout_ready),
.snp_fwdin_valid (snp_fwdin_valid),
.snp_fwdin_tag (snp_fwdin_tag),
.snp_fwdin_ready (snp_fwdin_ready)
);
end else begin
assign snp_fwdout_valid = 0;
assign snp_fwdout_addr = 0;
assign snp_fwdout_invalidate = 0;
assign snp_fwdout_tag = 0;
assign snp_fwdin_ready = 0;
assign snp_req_valid_qual = snp_req_valid;
assign snp_req_addr_qual = snp_req_addr;
assign snp_req_invalidate_qual = snp_req_invalidate;
assign snp_req_tag_qual = snp_req_tag;
assign snp_req_ready = snp_req_ready_qual;
end
assign miss_vec = per_bank_miss;
if (NUM_BANKS == 1) begin
assign snp_req_ready_qual = per_bank_snp_req_ready;
assign snp_req_ready = per_bank_snp_req_ready;
end else begin
assign snp_req_ready_qual = per_bank_snp_req_ready[`DRAM_ADDR_BANK(snp_req_addr_qual)];
assign snp_req_ready = per_bank_snp_req_ready[`DRAM_ADDR_BANK(snp_req_addr)];
end
VX_cache_core_req_bank_sel #(
@ -221,14 +142,18 @@ module VX_cache #(
.NUM_REQUESTS (NUM_REQUESTS)
) cache_core_req_bank_sel (
.core_req_valid (core_req_valid),
.per_bank_ready (per_bank_core_req_ready),
.core_req_addr (core_req_addr),
.core_req_ready (core_req_ready),
.per_bank_valid (per_bank_valid),
.core_req_ready (core_req_ready)
.per_bank_ready (per_bank_core_req_ready)
);
assign dram_req_tag = dram_req_addr;
assign dram_rsp_ready = (& per_bank_dram_rsp_ready);
if (NUM_BANKS == 1) begin
assign dram_rsp_ready = per_bank_dram_rsp_ready;
end else begin
assign dram_rsp_ready = per_bank_dram_rsp_ready[`DRAM_ADDR_BANK(dram_rsp_tag)];
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
@ -260,11 +185,11 @@ module VX_cache #(
wire curr_bank_snp_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
wire curr_bank_snp_req_invalidate;
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
wire curr_bank_snp_req_ready;
wire curr_bank_snp_rsp_valid;
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
wire curr_bank_snp_rsp_ready;
wire curr_bank_miss;
@ -310,14 +235,14 @@ module VX_cache #(
// Snoop request
if (NUM_BANKS == 1) begin
assign curr_bank_snp_req_valid = snp_req_valid_qual;
assign curr_bank_snp_req_addr = snp_req_addr_qual;
assign curr_bank_snp_req_valid = snp_req_valid;
assign curr_bank_snp_req_addr = snp_req_addr;
end else begin
assign curr_bank_snp_req_valid = snp_req_valid_qual && (`DRAM_ADDR_BANK(snp_req_addr_qual) == i);
assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr_qual);
assign curr_bank_snp_req_valid = snp_req_valid && (`DRAM_ADDR_BANK(snp_req_addr) == i);
assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr);
end
assign curr_bank_snp_req_invalidate = snp_req_invalidate_qual;
assign curr_bank_snp_req_tag = snp_req_tag_qual;
assign curr_bank_snp_req_invalidate = snp_req_invalidate;
assign curr_bank_snp_req_tag = snp_req_tag;
assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready;
// Snoop response
@ -348,7 +273,7 @@ module VX_cache #(
.WRITE_ENABLE (WRITE_ENABLE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
.SNP_TAG_WIDTH (SNP_TAG_WIDTH)
) bank (
`SCOPE_BIND_VX_cache_bank(i)
@ -459,9 +384,9 @@ module VX_cache #(
if (FLUSH_ENABLE) begin
VX_snp_rsp_arb #(
.NUM_BANKS (NUM_BANKS),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
.NUM_BANKS (NUM_BANKS),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.SNP_TAG_WIDTH (SNP_TAG_WIDTH)
) snp_rsp_arb (
.clk (clk),
.reset (reset),

View file

@ -7,7 +7,7 @@
`include "VX_define.vh"
`endif
`define REQ_TAG_WIDTH `MAX(CORE_TAG_WIDTH, SNP_REQ_TAG_WIDTH)
`define REQ_TAG_WIDTH `MAX(CORE_TAG_WIDTH, SNP_TAG_WIDTH)
`define REQS_BITS `LOG2UP(NUM_REQUESTS)

View file

@ -11,27 +11,43 @@ module VX_cache_core_req_bank_sel #(
parameter NUM_REQUESTS = 1
) (
input wire [NUM_REQUESTS-1:0] core_req_valid,
`IGNORE_WARNINGS_BEGIN
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
`IGNORE_WARNINGS_END
input wire [NUM_BANKS-1:0] per_bank_ready,
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
output wire core_req_ready,
output wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid,
output wire core_req_ready
input wire [NUM_BANKS-1:0] per_bank_ready
);
if (NUM_BANKS > 1) begin
reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid_r;
reg [NUM_BANKS-1:0] per_bank_ready_sel;
reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid_r;
reg [NUM_BANKS-1:0] per_bank_ready_ignore;
reg [NUM_BANKS-1:0] per_bank_ready_other;
always @(*) begin
per_bank_valid_r = 0;
per_bank_ready_sel = {NUM_BANKS{1'b1}};
per_bank_valid_r = 0;
per_bank_ready_other = {NUM_BANKS{1'b1}};
per_bank_ready_ignore = {NUM_BANKS{1'b1}};
for (integer i = 0; i < NUM_BANKS; i++) begin
for (integer j = 0; j < NUM_BANKS; j++) begin
if (i != j)
per_bank_ready_other[i] &= (per_bank_ready[j] | per_bank_ready_ignore[j]);
end
end
for (integer i = 0; i < NUM_REQUESTS; i++) begin
per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
per_bank_ready_sel[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 0;
per_bank_ready_ignore[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 1'b0;
end
end
assign per_bank_valid = per_bank_valid_r;
assign core_req_ready = & (per_bank_ready | per_bank_ready_sel);
for (genvar i = 0; i < NUM_BANKS; i++) begin
for (genvar j = 0; j < NUM_REQUESTS; j++) begin
assign per_bank_valid[i][j] = per_bank_valid_r[i][j] & per_bank_ready_other[i];
end
end
assign core_req_ready = & (per_bank_ready | per_bank_ready_ignore);
end else begin
`UNUSED_VAR (core_req_addr)
assign per_bank_valid = core_req_valid;
assign core_req_ready = per_bank_ready;
end

View file

@ -17,7 +17,7 @@ module VX_cache_miss_resrv #(
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 1,
parameter SNP_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0
) (

View file

@ -1,33 +1,33 @@
`include "VX_cache_config.vh"
module VX_snp_forwarder #(
parameter CACHE_ID = 0,
parameter BANK_LINE_SIZE = 1,
parameter NUM_REQUESTS = 1,
parameter SNRQ_SIZE = 1,
parameter SNP_REQ_TAG_WIDTH = 1,
parameter SNP_FWD_TAG_WIDTH = 1
parameter CACHE_ID = 0,
parameter SRC_ADDR_WIDTH = 1,
parameter DST_ADDR_WIDTH = 1,
parameter NUM_REQUESTS = 1,
parameter SNP_TAG_WIDTH = 1,
parameter SNRQ_SIZE = 1
) (
input wire clk,
input wire reset,
// Snoop request
input wire snp_req_valid,
input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire [SRC_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_invalidate,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
input wire [SNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
// Snoop response
output wire snp_rsp_valid,
output wire [`DRAM_ADDR_WIDTH-1:0] snp_rsp_addr,
output wire [SRC_ADDR_WIDTH-1:0] snp_rsp_addr,
output wire snp_rsp_invalidate,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// Snoop Forwarding out
output wire [NUM_REQUESTS-1:0] snp_fwdout_valid,
output wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr,
output wire [NUM_REQUESTS-1:0][DST_ADDR_WIDTH-1:0] snp_fwdout_addr,
output wire [NUM_REQUESTS-1:0] snp_fwdout_invalidate,
output wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdout_tag,
input wire [NUM_REQUESTS-1:0] snp_fwdout_ready,
@ -37,30 +37,37 @@ module VX_snp_forwarder #(
input wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdin_tag,
output wire [NUM_REQUESTS-1:0] snp_fwdin_ready
);
localparam ADDR_DIFF = DST_ADDR_WIDTH - SRC_ADDR_WIDTH;
localparam NUM_REQUESTS_QUAL = NUM_REQUESTS * (1 << ADDR_DIFF);
localparam REQ_QUAL_BITS = `LOG2UP(NUM_REQUESTS_QUAL);
`STATIC_ASSERT(NUM_REQUESTS > 1, ("invalid value"))
reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
reg [REQ_QUAL_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr;
wire sfq_acquire, sfq_release, sfq_full;
wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdout_tag;
reg [NUM_REQUESTS-1:0] snp_fwdout_ready_other;
wire fwdout_ready;
wire fwdin_valid;
wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdin_tag;
wire fwdin_valid;
wire fwdin_ready = snp_rsp_ready || (1 != pending_cntrs[sfq_read_addr]);
wire fwdin_fire = fwdin_valid && fwdin_ready;
wire fwdout_ready = (& snp_fwdout_ready);
assign snp_rsp_valid = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]); // send response
assign snp_rsp_valid = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]);
assign sfq_read_addr = fwdin_tag;
assign sfq_acquire = snp_req_valid && !sfq_full && fwdout_ready;
assign sfq_release = snp_rsp_valid && snp_rsp_ready;
wire snp_req_ready_unqual = !sfq_full && fwdout_ready;
VX_cam_buffer #(
.DATAW (`DRAM_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH),
.DATAW (SRC_ADDR_WIDTH + 1 + SNP_TAG_WIDTH),
.SIZE (SNRQ_SIZE)
) snp_fwd_cam (
.clk (clk),
@ -75,9 +82,54 @@ module VX_snp_forwarder #(
.full (sfq_full)
);
wire [DST_ADDR_WIDTH-1:0] snp_req_addr_qual;
wire dispatch_ready;
if (ADDR_DIFF != 0) begin
reg [`LOG2UP(SNRQ_SIZE)-1:0] fwdout_tag_r;
reg [DST_ADDR_WIDTH-1:0] snp_req_addr_r;
reg dispatch_ready_r;
reg use_cter_r;
always @(posedge clk) begin
if (reset) begin
dispatch_ready_r <= 0;
use_cter_r <= 0;
end else begin
if (snp_req_valid && snp_req_ready_unqual) begin
if (snp_req_addr_r[ADDR_DIFF-1:0] == ((1 << ADDR_DIFF)-2)) begin
dispatch_ready_r <= 1;
end
if (snp_req_addr_r[ADDR_DIFF-1:0] == ((1 << ADDR_DIFF)-1)) begin
dispatch_ready_r <= 0;
use_cter_r <= 0;
end else begin
use_cter_r <= 1;
end
end
end
if (snp_req_valid && snp_req_ready_unqual) begin
snp_req_addr_r <= snp_req_addr_qual + DST_ADDR_WIDTH'(1'b1);
end
if (!use_cter_r) begin
fwdout_tag_r <= sfq_write_addr;
end
end
assign sfq_acquire = snp_req_valid && snp_req_ready_unqual && !use_cter_r;
assign fwdout_tag = use_cter_r ? fwdout_tag_r : sfq_write_addr;
assign snp_req_addr_qual = use_cter_r ? snp_req_addr_r : {snp_req_addr, ADDR_DIFF'(0)};
assign dispatch_ready = dispatch_ready_r;
end else begin
assign sfq_acquire = snp_req_valid && snp_req_ready;
assign fwdout_tag = sfq_write_addr;
assign snp_req_addr_qual = snp_req_addr;
assign dispatch_ready = 1'b1;
end
always @(posedge clk) begin
if (sfq_acquire) begin
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS;
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS_QUAL;
end
if (fwdin_fire) begin
pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1;
@ -85,13 +137,25 @@ module VX_snp_forwarder #(
end
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdout_valid[i] = snp_req_valid && snp_req_ready;
assign snp_fwdout_addr[i] = snp_req_addr;
assign snp_fwdout_valid[i] = snp_req_valid && snp_fwdout_ready_other[i] && !sfq_full;
assign snp_fwdout_addr[i] = snp_req_addr_qual;
assign snp_fwdout_invalidate[i] = snp_req_invalidate;
assign snp_fwdout_tag[i] = sfq_write_addr;
assign snp_fwdout_tag[i] = fwdout_tag;
end
assign snp_req_ready = !sfq_full && fwdout_ready;
always @(*) begin
snp_fwdout_ready_other = {NUM_REQUESTS{1'b1}};
for (integer i = 0; i < NUM_REQUESTS; i++) begin
for (integer j = 0; j < NUM_REQUESTS; j++) begin
if (i != j)
snp_fwdout_ready_other[i] &= snp_fwdout_ready[j];
end
end
end
assign fwdout_ready = (& snp_fwdout_ready);
assign snp_req_ready = snp_req_ready_unqual && dispatch_ready;
if (NUM_REQUESTS > 1) begin
wire sel_valid;

View file

@ -3,17 +3,17 @@
module VX_snp_rsp_arb #(
parameter NUM_BANKS = 1,
parameter BANK_LINE_SIZE = 1,
parameter SNP_REQ_TAG_WIDTH = 1
parameter SNP_TAG_WIDTH = 1
) (
input wire clk,
input wire reset,
input wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid,
input wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag,
input wire [NUM_BANKS-1:0][SNP_TAG_WIDTH-1:0] per_bank_snp_rsp_tag,
output wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready,
output wire snp_rsp_valid,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready
);
if (NUM_BANKS > 1) begin
@ -35,7 +35,7 @@ module VX_snp_rsp_arb #(
wire stall = ~snp_rsp_ready && snp_rsp_valid;
VX_generic_register #(
.N(1 + SNP_REQ_TAG_WIDTH),
.N(1 + SNP_TAG_WIDTH),
.PASSTHRU(NUM_BANKS <= 2)
) pipe_reg (
.clk (clk),

View file

@ -11,8 +11,10 @@ interface VX_cache_core_rsp_if #(
) ();
wire [NUM_REQUESTS-1:0] valid;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] data;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag;
wire ready;
endinterface

View file

@ -10,11 +10,13 @@ interface VX_cache_dram_req_if #(
) ();
wire valid;
wire rw;
wire [(DRAM_LINE_WIDTH/8)-1:0] byteen;
wire [DRAM_ADDR_WIDTH-1:0] addr;
wire [DRAM_LINE_WIDTH-1:0] data;
wire [DRAM_TAG_WIDTH-1:0] tag;
wire [DRAM_TAG_WIDTH-1:0] tag;
wire ready;
endinterface

View file

@ -9,8 +9,10 @@ interface VX_cache_dram_rsp_if #(
) ();
wire valid;
wire [DRAM_LINE_WIDTH-1:0] data;
wire [DRAM_TAG_WIDTH-1:0] tag;
wire ready;
endinterface

View file

@ -9,9 +9,11 @@ interface VX_cache_snp_req_if #(
) ();
wire valid;
wire [DRAM_ADDR_WIDTH-1:0] addr;
wire invalidate;
wire [SNP_TAG_WIDTH-1:0] tag;
wire [SNP_TAG_WIDTH-1:0] tag;
wire ready;
endinterface

View file

@ -8,7 +8,9 @@ interface VX_cache_snp_rsp_if #(
) ();
wire valid;
wire [SNP_TAG_WIDTH-1:0] tag;
wire ready;
endinterface

View file

@ -5,14 +5,12 @@
interface VX_cmt_to_csr_if ();
wire valid;
wire valid;
wire [`NW_BITS-1:0] wid;
wire [$clog2(`NUM_THREADS+1)-1:0] commit_size;
wire has_fflags;
fflags_t fflags;
wire has_fflags;
fflags_t fflags;
endinterface

View file

@ -6,9 +6,11 @@
interface VX_csr_io_req_if ();
wire valid;
wire [`CSR_ADDR_BITS-1:0] addr;
wire rw;
wire [31:0] data;
wire ready;
endinterface

View file

@ -6,7 +6,9 @@
interface VX_csr_io_rsp_if ();
wire valid;
wire [31:0] data;
wire ready;
endinterface

View file

@ -10,18 +10,15 @@ interface VX_decode_if ();
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`EX_BITS-1:0] ex_type;
wire [`OP_BITS-1:0] op_type;
wire [`MOD_BITS-1:0] op_mod;
wire wb;
wire [`NR_BITS-1:0] rd;
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rs2;
wire [`NR_BITS-1:0] rs3;
wire [31:0] imm;
wire [31:0] imm;
wire rs1_is_PC;
wire rs2_is_imm;
wire use_rs3;

View file

@ -5,13 +5,15 @@
interface VX_exu_to_cmt_if ();
wire valid;
wire valid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`NUM_THREADS-1:0][31:0] data;
wire [`NR_BITS-1:0] rd;
wire wb;
wire ready;
endinterface

View file

@ -5,7 +5,8 @@
interface VX_fpu_to_cmt_if ();
wire valid;
wire valid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
@ -14,6 +15,7 @@ interface VX_fpu_to_cmt_if ();
wire wb;
wire has_fflags;
fflags_t [`NUM_THREADS-1:0] fflags;
wire ready;
endinterface

View file

@ -9,15 +9,13 @@
interface VX_fpu_to_csr_if ();
wire valid;
wire valid;
wire [`NW_BITS-1:0] wid;
wire fflags_NV;
wire fflags_DZ;
wire fflags_OF;
wire fflags_UF;
wire fflags_NX;
wire fflags_NV;
wire fflags_DZ;
wire fflags_OF;
wire fflags_UF;
wire fflags_NX;
endinterface

View file

@ -9,7 +9,6 @@ interface VX_gpr_rsp_if ();
wire [`NW_BITS-1:0] wid;
wire [31:0] PC;
`IGNORE_WARNINGS_END
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;

View file

@ -6,9 +6,11 @@
interface VX_ifetch_req_if ();
wire valid;
wire [`NUM_THREADS-1:0] tmask;
wire [`NW_BITS-1:0] wid;
wire [31:0] PC;
wire ready;
endinterface

View file

@ -5,11 +5,13 @@
interface VX_ifetch_rsp_if ();
wire valid;
wire valid;
wire [`NUM_THREADS-1:0] tmask;
wire [`NW_BITS-1:0] wid;
wire [31:0] PC;
wire [31:0] instr;
wire ready;
endinterface

View file

@ -10,14 +10,11 @@ interface VX_lsu_req_if ();
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire rw;
wire [`BYTEEN_BITS-1:0] byteen;
wire [`NUM_THREADS-1:0][31:0] store_data;
wire [`NUM_THREADS-1:0][31:0] base_addr;
wire [31:0] offset;
wire [31:0] offset;
wire [`NR_BITS-1:0] rd;
wire wb;

View file

@ -5,13 +5,12 @@
interface VX_warp_ctl_if ();
wire valid;
wire valid;
wire [`NW_BITS-1:0] wid;
gpu_tmc_t tmc;
gpu_wspawn_t wspawn;
gpu_barrier_t barrier;
gpu_split_t split;
gpu_tmc_t tmc;
gpu_wspawn_t wspawn;
gpu_barrier_t barrier;
gpu_split_t split;
endinterface

View file

@ -6,13 +6,12 @@
interface VX_writeback_if ();
wire valid;
wire [`NUM_THREADS-1:0] tmask;
wire [`NW_BITS-1:0] wid;
`IGNORE_WARNINGS_BEGIN
wire [31:0] PC;
`IGNORE_WARNINGS_END
wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] data;

View file

@ -5,7 +5,7 @@
interface VX_wstall_if();
wire valid;
wire valid;
wire [`NW_BITS-1:0] wid;
endinterface

View file

@ -54,7 +54,7 @@ module VX_cam_buffer #(
end else begin
for (integer i = 0; i < CPORTS; i++) begin
if (release_slot[i]) begin
assert(0 == free_slots[release_addr[i]]) else $display("%t: freed slot at port %d", $time, release_addr[i]);
assert(0 == free_slots[release_addr[i]]) else $error("%t: releasing invalid slot at port %d", $time, release_addr[i]);
end
end
free_slots <= free_slots_n;
@ -63,7 +63,7 @@ module VX_cam_buffer #(
end
if (acquire_slot) begin
assert(1 == free_slots[write_addr]) else $display("%t: inused slot at port %d", $time, write_addr);
assert(1 == free_slots[write_addr]) else $error("%t: acquiring used slot at port %d", $time, write_addr);
entries[write_addr] <= write_data;
end
end

View file

@ -57,34 +57,24 @@ if args.outc != 'none':
print('\n#endif', file=f)
translation_rules = [
(re.compile(r'^$'), r''),
(re.compile(r'^(\s*)`ifndef\s+([^ ]+)'), r'\1#ifndef \2'),
(re.compile(r'^(\s*)`define\s+([^ ]+)'), r'\1#define \2'),
(re.compile(r'^(\s*)`include "VX_user_config\.vh"'), r''),
(re.compile(r'^(\s*)`define\s+([^ ]+) (.+)'), r'\1#define \2 \3'),
(re.compile(r'^(\s*)`endif\s+'), r'\1#endif'),
(re.compile(r'^(\s*)//(.*)'), r'\1// \2'),
]
# preprocessor directives
(re.compile(r'^\s*`include .*$'), r''),
(re.compile(r'`ifdef'), r'#ifdef'),
(re.compile(r'`ifndef'), r'#ifndef'),
(re.compile(r'`elif'), r'#elif'),
(re.compile(r'`else'), r'#else'),
(re.compile(r'`define'), r'#define'),
(re.compile(r'`endif'), r'#endif'),
post_rules = [
(re.compile(r"\d+'d(\d+)"), r'\1'),
# non-standard C but supported by GCC and Clang
(re.compile(r"\d+'b([01]+)"), r'0b\1'),
(re.compile(r"\d+'h([\da-fA-F]+)"), r'0x\1'),
# fix macro references (does not support escaped identifiers §5.6.1)
# macro expansion
(re.compile(r"`([A-Za-z_][$_0-9A-Za-z]*)"), r'\1'),
# literals
(re.compile(r"\d+'d(\d+)"), r'\1'),
(re.compile(r"\d+'b([01]+)"), r'0b\1'),
(re.compile(r"\d+'h([\da-fA-F]+)"), r'0x\1')
]
def post_process_line(line):
for pat, repl in post_rules:
line = pat.sub(repl, line)
return line
in_expansion = False
if args.outc != 'none':
with open(args.outc, 'a') as f:
print('''
@ -96,36 +86,14 @@ if args.outc != 'none':
with open(path.join(script_dir, '../rtl/VX_config.vh'), 'r') as r:
lineno = 0
for line in r:
if in_expansion:
f.write(post_process_line(line))
if not line.strip().endswith('\\'):
in_expansion = False
else:
for pat, repl in translation_rules:
if pat.match(line):
if line.strip().endswith('\\'):
in_expansion = True
f.write(post_process_line(pat.sub(repl, line)))
break
else:
raise ValueError('failed to find rule for: "' + line + '" (' + str(lineno) + ')')
for pat, repl in translation_rules:
match = pat.search(line)
if match:
line = re.sub(pat, repl, line)
#print("*** match @" + str(lineno) + ": " + match.group() + " => " + line)
f.write(line)
lineno = lineno + 1
print('''
// Misc
#define THREADS_PER_WARP NUM_THREADS
#define WARPS_PER_CORE NUM_WARPS
#define NUM_WI (NUM_WARPS * NUM_THREADS * NUM_CORES_PER_CLUSTER * NUM_CLUSTERS)
// legacy
#define TOTAL_THREADS NUM_WI
#define TOTAL_WARPS (NUM_WARPS * NUM_CORES_PER_CLUSTER * NUM_CLUSTERS)
// COLORS
#define GREEN "\\033[32m"
#define RED "\\033[31m"
#define DEFAULT "\\033[39m"
'''[1:], file=f)

View file

@ -1,19 +1,19 @@
SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1
SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0
#MULTICORE ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
# control RTL debug print states
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE

View file

@ -57,11 +57,7 @@ void Simulator::attach_ram(RAM* ram) {
dram_rsp_vec_.clear();
}
void Simulator::reset() {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] reset()" << std::endl;
#endif
void Simulator::reset() {
print_bufs_.clear();
dram_rsp_vec_.clear();
@ -96,15 +92,25 @@ void Simulator::reset() {
}
void Simulator::step() {
vortex_->clk = 0;
this->eval();
dram_rsp_ready_ = vortex_->dram_rsp_ready;
snp_req_ready_ = vortex_->snp_req_ready;
csr_io_req_ready_ = vortex_->csr_io_req_ready;
vortex_->clk = 1;
this->eval();
this->eval_dram_bus();
this->eval_io_bus();
this->eval_csr_bus();
this->eval_snp_bus();
#ifndef NDEBUG
fflush(stdout);
#endif
}
void Simulator::eval() {
@ -134,8 +140,7 @@ void Simulator::eval_dram_bus() {
// send DRAM response
if (dram_rsp_active_
&& vortex_->dram_rsp_valid
&& vortex_->dram_rsp_ready) {
&& vortex_->dram_rsp_valid && dram_rsp_ready_) {
dram_rsp_active_ = false;
}
if (!dram_rsp_active_) {
@ -183,7 +188,7 @@ void Simulator::eval_dram_bus() {
}
}
vortex_->dram_req_ready = ~dram_stalled;
vortex_->dram_req_ready = !dram_stalled;
}
void Simulator::eval_io_bus() {
@ -207,31 +212,32 @@ void Simulator::eval_io_bus() {
}
void Simulator::eval_snp_bus() {
if (snp_req_active_) {
if (vortex_->snp_rsp_valid) {
assert(pending_snp_reqs_ > 0);
--pending_snp_reqs_;
if (snp_req_active_) {
if (vortex_->snp_req_valid && snp_req_ready_) {
assert(snp_req_size_);
#ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp rsp: tag=" << vortex_->snp_rsp_tag << " pending=" << pending_snp_reqs_ << std::endl;
std::cout << std::dec << timestamp << ": [sim] SNP Req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << (snp_req_size_-1) << std::endl;
#endif
}
if (vortex_->snp_req_valid && vortex_->snp_req_ready) {
if (snp_req_size_ != 0) {
vortex_->snp_req_addr += 1;
vortex_->snp_req_tag += 1;
--snp_req_size_;
++pending_snp_reqs_;
#ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
#endif
} else {
vortex_->snp_req_valid = 0;
++vortex_->snp_req_addr;
++vortex_->snp_req_tag;
++pending_snp_reqs_;
--snp_req_size_;
if (0 == snp_req_size_) {
vortex_->snp_req_valid = false;
}
}
if (!vortex_->snp_req_valid
&& 0 == pending_snp_reqs_) {
snp_req_active_ = false;
}
if (vortex_->snp_rsp_valid && vortex_->snp_rsp_ready) {
assert(pending_snp_reqs_ > 0);
--pending_snp_reqs_;
if (!vortex_->snp_req_valid && 0 == pending_snp_reqs_) {
vortex_->snp_rsp_ready = false;
snp_req_active_ = false;
}
#ifdef DBG_PRINT_CACHE_SNP
std::cout << std::dec << timestamp << ": [sim] SNP Rsp: tag=" << std::hex << vortex_->snp_rsp_tag << " pending=" << pending_snp_reqs_ << std::endl;
#endif
}
} else {
vortex_->snp_req_valid = 0;
vortex_->snp_rsp_ready = 0;
@ -240,18 +246,24 @@ void Simulator::eval_snp_bus() {
void Simulator::eval_csr_bus() {
if (csr_req_active_) {
if (vortex_->csr_io_req_rw) {
if (vortex_->csr_io_req_ready) {
vortex_->snp_req_valid = 0;
csr_req_active_ = false;
}
} else {
if (vortex_->csr_io_rsp_valid) {
*csr_rsp_value_ = vortex_->csr_io_rsp_data;
vortex_->snp_req_valid = 0;
vortex_->csr_io_rsp_ready = 0;
csr_req_active_ = false;
}
if (vortex_->csr_io_req_valid && csr_io_req_ready_) {
#ifndef NDEBUG
if (vortex_->csr_io_req_rw)
std::cout << std::dec << timestamp << ": [sim] CSR Wr Req: core=" << (int)vortex_->csr_io_req_coreid << ", addr=" << std::hex << vortex_->csr_io_req_addr << ", value=" << vortex_->csr_io_req_data << std::endl;
else
std::cout << std::dec << timestamp << ": [sim] CSR Rd Req: core=" << (int)vortex_->csr_io_req_coreid << ", addr=" << std::hex << vortex_->csr_io_req_addr << std::endl;
#endif
vortex_->csr_io_req_valid = 0;
if (vortex_->csr_io_req_rw)
csr_req_active_ = false;
}
if (vortex_->csr_io_rsp_valid && vortex_->csr_io_rsp_ready) {
*csr_rsp_value_ = vortex_->csr_io_rsp_data;
vortex_->csr_io_rsp_ready = 0;
csr_req_active_ = false;
#ifndef NDEBUG
std::cout << std::dec << timestamp << ": [sim] CSR Rsp: value=" << vortex_->csr_io_rsp_data << std::endl;
#endif
}
} else {
vortex_->csr_io_req_valid = 0;
@ -278,33 +290,23 @@ bool Simulator::csr_req_active() const {
}
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] flush_caches()" << std::endl;
#endif
if (0 == size)
return;
assert(!vortex_->snp_rsp_valid);
vortex_->snp_req_addr = mem_addr / GLOBAL_BLOCK_SIZE;
vortex_->snp_req_tag = 0;
vortex_->snp_req_valid = 1;
vortex_->snp_rsp_ready = 1;
snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
--snp_req_size_;
pending_snp_reqs_ = 1;
snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
pending_snp_reqs_ = 0;
snp_req_active_ = true;
#ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
#endif
}
void Simulator::set_csr(int core_id, int addr, unsigned value) {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] set_csr()" << std::endl;
#endif
vortex_->csr_io_req_valid = 1;
vortex_->csr_io_req_coreid = core_id;
vortex_->csr_io_req_addr = addr;
@ -316,10 +318,6 @@ void Simulator::set_csr(int core_id, int addr, unsigned value) {
}
void Simulator::get_csr(int core_id, int addr, unsigned *value) {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] get_csr()" << std::endl;
#endif
vortex_->csr_io_req_valid = 1;
vortex_->csr_io_req_coreid = core_id;
vortex_->csr_io_req_addr = addr;
@ -327,12 +325,13 @@ void Simulator::get_csr(int core_id, int addr, unsigned *value) {
vortex_->csr_io_rsp_ready = 1;
csr_rsp_value_ = value;
csr_req_active_ = true;
}
void Simulator::run() {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] run()" << std::endl;
std::cout << std::dec << timestamp << ": [sim] run()" << std::endl;
#endif
// execute program

View file

@ -65,7 +65,11 @@ private:
std::list<dram_req_t> dram_rsp_vec_;
bool dram_rsp_active_;
bool dram_rsp_ready_;
bool snp_req_ready_;
bool csr_io_req_ready_;
bool snp_req_active_;
bool csr_req_active_;

View file

@ -3,6 +3,10 @@
#include <fstream>
#include <iomanip>
#define GREEN "\\033[32m"
#define RED "\\033[31m"
#define DEFAULT "\\033[39m"
#define ALL_TESTS
int main(int argc, char **argv) {