per_bank_miss added to VX_cache.v

This commit is contained in:
trmontgomery 2020-11-02 12:07:10 -05:00
commit 4151ee197b
68 changed files with 2410 additions and 1634 deletions

View file

@ -4,10 +4,12 @@ all:
$(MAKE) -C driver
$(MAKE) -C runtime
$(MAKE) -C simX
$(MAKE) -C benchmarks/opencl
clean:
$(MAKE) -C hw clean
$(MAKE) -C driver clean
$(MAKE) -C simX clean
$(MAKE) -C runtime clean
$(MAKE) -C benchmarks/opencl clean

View file

@ -106,7 +106,7 @@ int main (int argc, char **argv) {
size_t kernel_size;
cl_int binary_status;
srand(time(NULL));
srand(50);
// read kernel binary from file
if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size))

Binary file not shown.

View file

@ -1,11 +1,14 @@
OPAE_HOME ?= /tools/opae/1.4.0
#CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw
LDFLAGS += -L$(OPAE_HOME)/lib
#SCOPE=1
# stack execution protection
LDFLAGS +=-z noexecstack
@ -21,9 +24,6 @@ CXXFLAGS += -fPIC
# Dump perf stats
CXXFLAGS += -DDUMP_PERF_STATS
# Enable scope analyzer
#CXXFLAGS += -DSCOPE
LDFLAGS += -shared
FPGA_LIBS += -luuid -lopae-c
@ -32,8 +32,6 @@ ASE_LIBS += -luuid -lopae-c-ase
VLSIM_LIBS += -lopae-c-vlsim
LIB_DIR=../lib
ASE_DIR = ase
VLSIM_DIR = vlsim
@ -46,7 +44,14 @@ PROJECT_VLSIM = $(VLSIM_DIR)/libvortex.so
AFU_JSON_INFO = vortex_afu.h
SRCS = vortex.cpp vx_scope.cpp ../common/vx_utils.cpp
SRCS = vortex.cpp ../common/vx_utils.cpp
# Enable scope analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
SRCS += vx_scope.cpp
SET_SCOPE = SCOPE=1
endif
all: vlsim
@ -57,14 +62,14 @@ json: ../../hw/opae/vortex_afu.json
fpga: $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
ase: $(SRCS) $(ASE_DIR)
asesim: $(SRCS) $(ASE_DIR)
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
vlsim: $(SRCS) opae-vlsim
$(CXX) $(CXXFLAGS) -L./vlsim -DUSE_VLSIM $(SRCS) $(LDFLAGS) $(VLSIM_LIBS) -o $(PROJECT_VLSIM)
vlsim: $(SRCS) vlsim-hw
$(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -L./vlsim $(VLSIM_LIBS) -o $(PROJECT_VLSIM)
opae-vlsim:
$(MAKE) -C vlsim
vlsim-hw:
$(SET_SCOPE) $(MAKE) -C vlsim
vortex.o: vortex.cpp
$(CXX) $(CXXFLAGS) -c vortex.cpp -o $@

1
driver/opae/vlsim/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/obj_dir/*

View file

@ -12,6 +12,8 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO
@ -22,6 +24,7 @@ CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
#DEBUG=1
#SCOPE=1
CFLAGS += -fPIC
@ -34,7 +37,9 @@ LDFLAGS += -shared -pthread
TOP = vortex_afu_shim
RTL_DIR = ../../../hw/rtl
RTL_DIR=../../../hw/rtl
SCRIPT_DIR=../../../hw/scripts
SRCS = fpga.cpp opae_sim.cpp
SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp
@ -42,7 +47,7 @@ SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/svdpi -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS)
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS)
VL_FLAGS += -Wno-DECLFILENAME
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += verilator.vlt
@ -53,29 +58,42 @@ VL_FLAGS += verilator.vlt
# Debugigng
ifdef DEBUG
VL_FLAGS += -DVCD_OUTPUT --assert --trace $(DBG_FLAGS)
VL_FLAGS += -DVCD_OUTPUT --assert --trace-fst --trace-threads 1 $(DBG_FLAGS)
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CFLAGS += -DNDEBUG
endif
# Enable scope analyzer
ifdef SCOPE
VL_FLAGS += -DSCOPE
CFLAGS += -DSCOPE
SCOPE_VH = $(RTL_DIR)/scope-defs.vh
endif
# use our OPAE shim
VL_FLAGS += -DNOPAE
CFLAGS += -DNOPAE
# Enable scope analyzer
#VL_FLAGS += -DSCOPE
#CFLAGS += -DSCOPE
# use DPI FPU
#VL_FLAGS += -DFPU_FAST
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
PROJECT = libopae-c-vlsim.so
all: $(PROJECT)
# generate scope data
scope: $(RTL_DIR)/scope-defs.vh
$(RTL_DIR)/scope-defs.vh: $(SCRIPT_DIR)/scope.json
$(SCRIPT_DIR)/scope.py $(RTL_INCLUDE) $(CONFIGS) -cc ../scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
$(PROJECT): $(SRCS)
$(PROJECT): $(SRCS) $(SCOPE_VH)
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk
clean:
rm -rf $(PROJECT) obj_dir
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh

View file

@ -31,9 +31,9 @@ opae_sim::opae_sim() {
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC();
trace_ = new VerilatedFstC();
vortex_afu_->trace(trace_, 99);
trace_->open("trace.vcd");
trace_->open("trace.fst");
#endif
this->reset();
@ -85,6 +85,19 @@ void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) {
*ioaddr = host_buffers_[wsid].ioaddr;
}
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
std::lock_guard<std::mutex> guard(mutex_);
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
this->step();
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid);
*value = vortex_afu_->af2cp_sTxPort_c2_data;
}
void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
std::lock_guard<std::mutex> guard(mutex_);
@ -94,20 +107,7 @@ void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value)
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, &value, 8);
this->step();
assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid);
}
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
std::lock_guard<std::mutex> guard(mutex_);
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
this->step();
assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid);
assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid);
*value = vortex_afu_->af2cp_sTxPort_c2_data;
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
}
void opae_sim::flush() {
@ -117,24 +117,45 @@ void opae_sim::flush() {
///////////////////////////////////////////////////////////////////////////////
void opae_sim::reset() {
vortex_afu_->reset = 1;
this->step();
vortex_afu_->reset = 0;
host_buffers_.clear();
dram_reads_.clear();
cci_reads_.clear();
cci_writes_.clear();
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0;
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0;
vortex_afu_->avs_readdatavalid = 0;
vortex_afu_->avs_waitrequest = 0;
vortex_afu_->reset = 1;
vortex_afu_->clk = 0;
this->eval();
vortex_afu_->clk = 1;
this->eval();
vortex_afu_->reset = 0;
// Turn on assertion after reset
Verilated::assertOn(true);
}
void opae_sim::step() {
vortex_afu_->clk = 0;
this->eval();
vortex_afu_->clk = 1;
this->eval();
this->sRxPort_bus();
this->sTxPort_bus();
this->avs_bus();
vortex_afu_->clk = 0;
this->eval();
vortex_afu_->clk = 1;
this->eval();
#ifndef NDEBUG
fflush(stdout);
#endif
}
void opae_sim::eval() {
@ -145,100 +166,104 @@ void opae_sim::eval() {
++timestamp;
}
void opae_sim::sRxPort_bus() {
void opae_sim::sRxPort_bus() {
// check mmio request
bool mmio_req_enabled = vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid
|| vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid;
// schedule CCI read responses
int cci_rd_index = -1;
for (int i = 0; i < cci_reads_.size(); i++) {
if (cci_reads_[i].cycles_left > 0) {
cci_reads_[i].cycles_left -= 1;
}
if ((cci_rd_index == -1)
&& (cci_reads_[i].cycles_left == 0)) {
cci_rd_index = i;
std::list<cci_rd_req_t>::iterator cci_rd_it(cci_reads_.end());
for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) {
if (it->cycles_left > 0)
it->cycles_left -= 1;
if ((cci_rd_it == ie) && (it->cycles_left == 0)) {
cci_rd_it = it;
}
}
// schedule CCI write responses
int cci_wr_index = -1;
for (int i = 0; i < cci_writes_.size(); i++) {
if (cci_writes_[i].cycles_left > 0) {
cci_writes_[i].cycles_left -= 1;
std::list<cci_wr_req_t>::iterator cci_wr_it(cci_writes_.end());
for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) {
if (it->cycles_left > 0)
it->cycles_left -= 1;
if ((cci_wr_it == ie) && (it->cycles_left == 0)) {
cci_wr_it = it;
}
if ((cci_wr_index == -1)
&& (cci_writes_[i].cycles_left == 0)) {
cci_wr_index = i;
}
}
// send CCI read response
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
if (cci_rd_index != -1) {
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_reads_[cci_rd_index].block.data(), CACHE_BLOCK_SIZE);
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_reads_[cci_rd_index].mdata;
cci_reads_.erase(cci_reads_.begin() + cci_rd_index);
}
// send CCI write response
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
if (cci_wr_index != -1) {
if (cci_wr_it != cci_writes_.end()) {
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 1;
vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_writes_[cci_wr_index].mdata;
cci_writes_.erase(cci_writes_.begin() + cci_wr_index);
vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata;
cci_writes_.erase(cci_wr_it);
}
// mmio
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
// send CCI read response (ensure mmio disabled)
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
if (!mmio_req_enabled
&& (cci_rd_it != cci_reads_.end())) {
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->block.data(), CACHE_BLOCK_SIZE);
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
/*printf("*** [vlsim] read-rsp: addr=%ld, mdata=%d, data=", cci_rd_it->addr, cci_rd_it->mdata);
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
printf("%02x", cci_rd_it->block[CACHE_BLOCK_SIZE-1-i]);
printf("\n");*/
fflush(stdout);
cci_reads_.erase(cci_rd_it);
}
}
void opae_sim::sTxPort_bus() {
// check read queue size
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= CCI_RQ_SIZE);
// check write queue size
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= CCI_WQ_SIZE);
// process read requests
if (vortex_afu_->af2cp_sTxPort_c0_valid && !vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull) {
if (vortex_afu_->af2cp_sTxPort_c0_valid) {
assert(!vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull);
cci_rd_req_t cci_req;
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
cci_req.addr = vortex_afu_->af2cp_sTxPort_c0_hdr_address;
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata;
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE);
cci_reads_.push_back(cci_req);
//printf("*** [vlsim] read-req: addr=%ld, mdata=%d\n", vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
fflush(stdout);
cci_reads_.emplace_back(cci_req);
}
// process write requests
if (vortex_afu_->af2cp_sTxPort_c1_valid && !vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull) {
if (vortex_afu_->af2cp_sTxPort_c1_valid) {
assert(!vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull);
cci_wr_req_t cci_req;
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c1_hdr_mdata;
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE);
memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE);
cci_writes_.push_back(cci_req);
cci_writes_.emplace_back(cci_req);
}
// check queues overflow
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1));
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1));
}
void opae_sim::avs_bus() {
// schedule DRAM read responses
int dram_rd_index = -1;
for (int i = 0; i < dram_reads_.size(); i++) {
if (dram_reads_[i].cycles_left > 0) {
dram_reads_[i].cycles_left -= 1;
std::list<dram_rd_req_t>::iterator dram_rd_it(dram_reads_.end());
for (auto it = dram_reads_.begin(), ie = dram_reads_.end(); it != ie; ++it) {
if (it->cycles_left > 0) {
it->cycles_left -= 1;
}
if ((dram_rd_index == -1)
&& (dram_reads_[i].cycles_left == 0)) {
dram_rd_index = i;
if ((it != ie) && (it->cycles_left == 0)) {
dram_rd_it = it;
}
}
// send DRAM response
vortex_afu_->avs_readdatavalid = 0;
if (dram_rd_index != -1) {
if (dram_rd_it != dram_reads_.end()) {
vortex_afu_->avs_readdatavalid = 1;
memcpy(vortex_afu_->avs_readdata, dram_reads_[dram_rd_index].block.data(), CACHE_BLOCK_SIZE);
dram_reads_.erase(dram_reads_.begin() + dram_rd_index);
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
dram_reads_.erase(dram_rd_it);
}
// handle DRAM stalls
@ -271,7 +296,7 @@ void opae_sim::avs_bus() {
dram_req.cycles_left = DRAM_LATENCY;
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
dram_reads_.push_back(dram_req);
dram_reads_.emplace_back(dram_req);
}
}

View file

@ -5,7 +5,7 @@
#include "verilated.h"
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
#include <verilated_fst_c.h>
#endif
#include <VX_config.h>
@ -13,7 +13,7 @@
#include <ostream>
#include <future>
#include <vector>
#include <list>
#include <unordered_map>
#define CACHE_BLOCK_SIZE 64
@ -41,18 +41,19 @@ private:
typedef struct {
int cycles_left;
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
unsigned tag;
uint32_t tag;
} dram_rd_req_t;
typedef struct {
int cycles_left;
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
unsigned mdata;
uint64_t addr;
uint32_t mdata;
} cci_rd_req_t;
typedef struct {
int cycles_left;
unsigned mdata;
uint32_t mdata;
} cci_wr_req_t;
typedef struct {
@ -76,17 +77,17 @@ private:
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
std::vector<dram_rd_req_t> dram_reads_;
std::list<dram_rd_req_t> dram_reads_;
std::vector<cci_rd_req_t> cci_reads_;
std::list<cci_rd_req_t> cci_reads_;
std::vector<cci_wr_req_t> cci_writes_;
std::list<cci_wr_req_t> cci_writes_;
std::mutex mutex_;
RAM ram_;
Vvortex_afu_shim *vortex_afu_;
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
VerilatedFstC *trace_;
#endif
};

View file

@ -87,7 +87,7 @@ t_if_ccip_Tx af2cp_sTxPort;
vortex_afu #(
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
) vortex_afu (
) afu (
.clk(clk),
.reset(reset),
.cp2af_sRxPort(cp2af_sRxPort),

View file

@ -31,7 +31,7 @@
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("OPAE Error: '%s' returned %d, %s!\n", \
printf("[VXDRV] Error: '%s' returned %d, %s!\n", \
#_expr, (int)res, fpgaErrStr(res)); \
return -1; \
} while (false)
@ -118,7 +118,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
*value = STARTUP_ADDR;
break;
default:
fprintf(stderr, "invalid caps id: %d\n", caps_id);
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
std::abort();
return -1;
}
@ -156,7 +156,7 @@ extern int vx_dev_open(vx_device_h* hdevice) {
fpgaDestroyProperties(&filter);
if (num_matches < 1) {
fprintf(stderr, "Accelerator %s not found!\n", AFU_ACCEL_UUID);
fprintf(stderr, "[VXDRV] Error: accelerator %s not found!\n", AFU_ACCEL_UUID);
return -1;
}
@ -197,9 +197,10 @@ extern int vx_dev_open(vx_device_h* hdevice) {
fpgaClose(accel_handle);
return ret;
}
fprintf(stdout, "DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n",
#ifndef NDEBUG
fprintf(stdout, "[VXDRV] DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n",
device->implementation_id, device->num_cores, device->num_warps, device->num_threads);
#endif
}
#ifdef SCOPE
@ -236,18 +237,18 @@ extern int vx_dev_close(vx_device_h hdevice) {
int ret = vx_get_perf(hdevice, core_id, &instrs, &cycles);
assert(ret == 0);
float IPC = (float)(double(instrs) / double(cycles));
fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
fprintf(stdout, "[VXDRV] PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
total_instrs += instrs;
total_cycles = std::max<uint64_t>(total_cycles, cycles);
}
float IPC = (float)(double(total_instrs) / double(total_cycles));
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
} else {
uint64_t instrs, cycles;
int ret = vx_get_perf(hdevice, 0, &instrs, &cycles);
float IPC = (float)(double(instrs) / double(cycles));
assert(ret == 0);
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
}
#endif
@ -373,7 +374,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &data));
if (0 == data || 0 == timeout) {
if (data != 0) {
fprintf(stdout, "ready-wait timed out: status=%ld\n", data);
fprintf(stdout, "[VXDRV] ready-wait timed out: status=%ld\n", data);
}
break;
}
@ -509,12 +510,6 @@ extern int vx_start(vx_device_h hdevice) {
// start execution
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN));
#ifdef SCOPE
sleep(15);
vx_scope_stop(device->fpga, 0);
exit(0);
#endif
return 0;
}
@ -547,7 +542,7 @@ extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* valu
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
return -1;
// write CSR value
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id));

View file

@ -4,6 +4,9 @@
#include <chrono>
#include <vector>
#include <assert.h>
#include <chrono>
#include <thread>
#include <mutex>
#ifdef USE_VLSIM
#include "vlsim/fpga.h"
@ -14,6 +17,9 @@
#include <VX_config.h>
#include "vx_scope.h"
#include "vortex_afu.h"
#include "scope-defs.h"
#define SCOPE_FRAME_WIDTH 1768
#define CHECK_RES(_expr) \
do { \
@ -28,140 +34,72 @@
#define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4)
#define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4)
struct scope_signal_t {
int width;
const char* name;
};
#define CMD_GET_VALID 0
#define CMD_GET_DATA 1
#define CMD_GET_WIDTH 2
#define CMD_GET_COUNT 3
#define CMD_SET_DELAY 4
#define CMD_SET_STOP 5
#define CMD_GET_OFFSET 6
constexpr int ilog2(int n) {
return (n > 1) ? 1 + ilog2(n >> 1) : 0;
}
static constexpr int num_modules = sizeof(scope_modules) / sizeof(scope_module_t);
static constexpr int NW_BITS = ilog2(NUM_WARPS);
#ifdef EXT_F_ENABLE
static constexpr int NR_BITS = ilog2(64);
#else
static constexpr int NR_BITS = ilog2(32);
#endif
static constexpr int EX_BITS = 3;
static constexpr int OP_BITS = 4;
static constexpr int MOD_BITS = 3;
static constexpr int ICORE_TAG_WIDTH = NW_BITS;
static constexpr int DCORE_TAG_WIDTH = ilog2(LSUQ_SIZE);
static constexpr scope_signal_t scope_signals[] = {
{ 32, "dram_req_addr" },
{ 1, "dram_req_rw" },
{ 16, "dram_req_byteen" },
{ 128, "dram_req_data" },
{ 29, "dram_req_tag" },
{ 128, "dram_rsp_data" },
{ 29, "dram_rsp_tag" },
{ 32, "snp_req_addr" },
{ 1, "snp_req_invalidate" },
{ 16, "snp_req_tag" },
{ 16, "snp_rsp_tag" },
{ NW_BITS, "icache_req_wid" },
{ 32, "icache_req_addr" },
{ ICORE_TAG_WIDTH, "icache_req_tag" },
{ 32, "icache_rsp_data" },
{ ICORE_TAG_WIDTH, "icache_rsp_tag" },
{ NW_BITS, "dcache_req_wid" },
{ 32, "dcache_req_pc" },
{ NUM_THREADS * 32, "dcache_req_addr" },
{ 1, "dcache_req_rw" },
{ NUM_THREADS * 4, "dcache_req_byteen" },
{ NUM_THREADS * 32, "dcache_req_data" },
{ DCORE_TAG_WIDTH, "dcache_req_tag" },
{ NUM_THREADS * 32, "dcache_rsp_data" },
{ DCORE_TAG_WIDTH, "dcache_rsp_tag" },
{ NW_BITS, "issue_wid" },
{ NUM_THREADS, "issue_tmask" },
{ 32, "issue_pc" },
{ EX_BITS, "issue_ex_type" },
{ OP_BITS, "issue_op_type" },
{ MOD_BITS, "issue_op_mod" },
{ 1, "issue_wb" },
{ NR_BITS, "issue_rd" },
{ NR_BITS, "issue_rs1" },
{ NR_BITS, "issue_rs2" },
{ NR_BITS, "issue_rs3" },
{ 32, "issue_imm" },
{ 1, "issue_rs1_is_pc" },
{ 1, "issue_rs2_is_imm" },
{ NW_BITS, "gpr_rsp_wid" },
{ 32, "gpr_rsp_pc" },
{ NUM_THREADS * 32, "gpr_rsp_a" },
{ NUM_THREADS * 32, "gpr_rsp_b" },
{ NUM_THREADS * 32, "gpr_rsp_c" },
{ NW_BITS, "writeback_wid" },
{ 32, "writeback_pc" },
{ NR_BITS, "writeback_rd" },
{ NUM_THREADS * 32, "writeback_data" },
{ 32, "bank_addr_st0" },
{ 32, "bank_addr_st1" },
{ 32, "bank_addr_st2" },
{ 1, "scope_bank_is_mrvq_st1" },
{ 1, "scope_bank_miss_st1" },
{ 1, "scope_bank_dirty_st1" },
{ 1, "scope_bank_force_miss_st1" },
///////////////////////////////////////////////////////////////////////////
{ 1, "dram_req_valid" },
{ 1, "dram_req_ready" },
{ 1, "dram_rsp_valid" },
{ 1, "dram_rsp_ready" },
{ 1, "snp_req_valid" },
{ 1, "snp_req_ready" },
{ 1, "snp_rsp_valid" },
{ 1, "snp_rsp_ready" },
{ 1, "icache_req_valid" },
{ 1, "icache_req_ready" },
{ 1, "icache_rsp_valid" },
{ 1, "icache_rsp_ready" },
{ NUM_THREADS, "dcache_req_valid" },
{ 1, "dcache_req_ready" },
{ NUM_THREADS, "dcache_rsp_valid" },
{ 1, "dcache_rsp_ready" },
{ 1, "bank_valid_st0" },
{ 1, "bank_valid_st1" },
{ 1, "bank_valid_st2" },
{ 1, "bank_stall_pipe" },
{ 1, "issue_valid" },
{ 1, "issue_ready" },
{ 1, "gpr_rsp_valid" },
{ 1, "writeback_valid" },
{ 1, "scoreboard_delay" },
{ 1, "gpr_delay" },
{ 1, "execute_delay" },
{ 1, "busy" },
};
static constexpr int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t);
static constexpr int num_taps = sizeof(scope_taps) / sizeof(scope_tap_t);
constexpr int calcFrameWidth(int index = 0) {
return (index < num_signals) ? (scope_signals[index].width + calcFrameWidth(index + 1)) : 0;
return (index < num_taps) ? (scope_taps[index].width + calcFrameWidth(index + 1)) : 0;
}
static constexpr int fwidth = calcFrameWidth();
static_assert(fwidth == 1766, "invalid size");
#ifdef HANG_TIMEOUT
static std::thread g_timeout_thread;
static std::mutex g_timeout_mutex;
static void timeout_callback(fpga_handle fpga) {
std::this_thread::sleep_for(std::chrono::seconds{60});
vx_scope_stop(fpga, HANG_TIMEOUT);
fpgaClose(fpga);
exit(0);
}
#endif
uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) {
while (delta != 0) {
ofs << '#' << timestamp++ << std::endl;
ofs << "b0 0" << std::endl;
ofs << '#' << timestamp++ << std::endl;
ofs << "b1 0" << std::endl;
--delta;
}
return timestamp;
}
void dump_taps(std::ofstream& ofs, int module) {
for (int i = 0; i < num_taps; ++i) {
auto& tap = scope_taps[i];
if (tap.module != module)
continue;
ofs << "$var reg " << tap.width << " " << (i + 1) << " " << tap.name << " $end" << std::endl;
}
}
void dump_module(std::ofstream& ofs, int parent) {
for (auto& module : scope_modules) {
if (module.parent != parent)
continue;
if (module.name[0] == '*') {
ofs << "$var reg 1 0 clk $end" << std::endl;
} else {
ofs << "$scope module " << module.name << " $end" << std::endl;
}
dump_module(ofs, module.index);
dump_taps(ofs, module.index);
if (module.name[0] != '*') {
ofs << "$upscope $end" << std::endl;
}
}
}
int vx_scope_start(fpga_handle hfpga, uint64_t delay) {
if (nullptr == hfpga)
@ -169,36 +107,55 @@ int vx_scope_start(fpga_handle hfpga, uint64_t delay) {
if (delay != uint64_t(-1)) {
// set start delay
uint64_t cmd_delay = ((delay << 3) | 4);
uint64_t cmd_delay = ((delay << 3) | CMD_SET_DELAY);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_delay));
std::cout << "scope start delay: " << delay << std::endl;
}
#ifdef HANG_TIMEOUT
g_timeout_thread = std::thread(timeout_callback, hfpga);
g_timeout_thread.detach();
#endif
return 0;
}
int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
#ifdef HANG_TIMEOUT
if (!g_timeout_mutex.try_lock())
return 0;
#endif
if (nullptr == hfpga)
return -1;
if (delay != uint64_t(-1)) {
// stop recording
uint64_t cmd_stop = ((delay << 3) | 5);
uint64_t cmd_stop = ((delay << 3) | CMD_SET_STOP);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop));
std::cout << "scope stop delay: " << delay << std::endl;
}
std::ofstream ofs("vx_scope.vcd");
ofs << "$version Generated by Vortex Scope $end" << std::endl;
ofs << "$timescale 1 ns $end" << std::endl;
ofs << "$var reg 1 0 clk $end" << std::endl;
ofs << "$scope module TOP $end" << std::endl;
dump_module(ofs, -1);
dump_taps(ofs, -1);
ofs << "$upscope $end" << std::endl;
ofs << "enddefinitions $end" << std::endl;
uint64_t frame_width, max_frames, data_valid;
uint64_t frame_width, max_frames, data_valid, offset, delta;
uint64_t timestamp = 0;
uint64_t frame_offset = 0;
uint64_t frame_no = 0;
int signal_id = 0;
int signal_offset = 0;
// wait for recording to terminate
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 0));
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
do {
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
if (data_valid)
@ -208,65 +165,50 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
std::cout << "scope trace dump begin..." << std::endl;
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 2));
// get frame width
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_WIDTH));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &frame_width));
std::cout << "scope::frame_width=" << std::dec << frame_width << std::endl;
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 3));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &max_frames));
std::cout << "scope::max_frames=" << std::dec << max_frames << std::endl;
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 1));
std::cout << "scope::frame_width=" << std::dec << frame_width << std::endl;
if (fwidth != (int)frame_width) {
std::cerr << "invalid frame_width: expecting " << std::dec << fwidth << "!" << std::endl;
std::abort();
}
// get max frames
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_COUNT));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &max_frames));
std::cout << "scope::max_frames=" << std::dec << max_frames << std::endl;
// get offset
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_OFFSET));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &offset));
// get data
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA));
// print clock header
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta));
timestamp = print_clock(ofs, offset + delta + 2, timestamp);
signal_id = num_taps;
std::vector<char> signal_data(frame_width+1);
uint64_t frame_offset = 0;
uint64_t frame_no = 0;
uint64_t timestamp = 0;
int signal_id = 0;
int signal_offset = 0;
auto print_header = [&] () {
ofs << '#' << timestamp++ << std::endl;
ofs << "b0 0" << std::endl;
ofs << '#' << timestamp++ << std::endl;
ofs << "b1 0" << std::endl;
uint64_t delta;
fpga_result res = fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta);
assert(res == FPGA_OK);
while (delta != 0) {
ofs << '#' << timestamp++ << std::endl;
ofs << "b0 0" << std::endl;
ofs << '#' << timestamp++ << std::endl;
ofs << "b1 0" << std::endl;
--delta;
}
signal_id = num_signals;
};
print_header();
do {
if (frame_no == (max_frames-1)) {
// verify last frame is valid
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 0));
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
assert(data_valid == 1);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 1));
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA));
}
// read next data words
uint64_t word;
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &word));
do {
int signal_width = scope_signals[signal_id-1].width;
int signal_width = scope_taps[signal_id-1].width;
int word_offset = frame_offset % 64;
signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0';
@ -285,17 +227,26 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
assert(0 == signal_offset);
frame_offset = 0;
++frame_no;
if (frame_no != max_frames) {
print_header();
}
if (frame_no != max_frames) {
// print clock header
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta));
timestamp = print_clock(ofs, delta + 1, timestamp);
signal_id = num_taps;
if (0 == (frame_no % 100)) {
std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::endl;
}
}
}
} while ((frame_offset % 64) != 0);
} while (frame_no != max_frames);
std::cout << "scope trace dump done! - " << (timestamp/2) << " cycles" << std::endl;
// verify data not valid
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 0));
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
assert(data_valid == 0);

View file

@ -1,5 +1,7 @@
#pragma once
#define HANG_TIMEOUT 60
int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1);
int vx_scope_stop(fpga_handle hfpga, uint64_t delay = -1);

View file

@ -12,6 +12,8 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO
@ -53,7 +55,7 @@ VL_FLAGS += verilator.vlt
# Debugigng
ifdef DEBUG
VL_FLAGS += -DVCD_OUTPUT --assert --trace $(DBG_FLAGS)
VL_FLAGS += -DVCD_OUTPUT --assert --trace-fst --trace-threads 1 $(DBG_FLAGS)
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG

Binary file not shown.

View file

@ -74,7 +74,7 @@ Disassembly of section .text:
800000e0: 0005006b 0x5006b
800000e4: 00002197 auipc gp,0x2
800000e8: c8418193 addi gp,gp,-892 # 80001d68 <__global_pointer$>
800000ec: f14025f3 csrr a1,mhartid
800000ec: 022025f3 csrr a1,0x22
800000f0: 00a59593 slli a1,a1,0xa
800000f4: 02002673 csrr a2,0x20
800000f8: 00261613 slli a2,a2,0x2
@ -122,7 +122,7 @@ Disassembly of section .text:
80000158: 00008067 ret
8000015c <vx_thread_gid>:
8000015c: f1402573 csrr a0,mhartid
8000015c: 02202573 csrr a0,0x22
80000160: 00008067 ret
80000164 <vx_core_id>:
@ -458,13 +458,12 @@ Disassembly of section .comment:
Disassembly of section .riscv.attributes:
00000000 <.riscv.attributes>:
0: 2041 jal 80 <_start-0x7fffff80>
0: 2541 jal 680 <_start-0x7ffff980>
2: 0000 unimp
4: 7200 flw fs0,32(a2)
6: 7369 lui t1,0xffffa
8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec>
c: 0016 c.slli zero,0x5
e: 0000 unimp
c: 0000001b 0x1b
10: 1004 addi s1,sp,32
12: 7205 lui tp,0xfffe1
14: 3376 fld ft6,376(sp)
@ -473,4 +472,4 @@ Disassembly of section .riscv.attributes:
1a: 5f30 lw a2,120(a4)
1c: 326d jal fffff9c6 <__global_pointer$+0x7fffdc5e>
1e: 3070 fld fa2,224(s0)
...
20: 665f 7032 0030 0x307032665f

Binary file not shown.

View file

@ -90,16 +90,20 @@ vx_buffer_h dst_buf = nullptr;
static void show_usage() {
std::cout << "Vortex Driver Test." << std::endl;
std::cout << "Usage: [-s:testid] [-e:testid] [-k: kernel] [-n words] [-c] [-h: help]" << std::endl;
std::cout << "Usage: [-t:testid] [-s:testid] [-e:testid] [-k: kernel] [-n words] [-c] [-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:s:e:k:ch?")) != -1) {
while ((c = getopt(argc, argv, "n:t:s:e:k:ch?")) != -1) {
switch (c) {
case 'n':
count = atoi(optarg);
break;
case 't':
testid_s = atoi(optarg);
testid_e = atoi(optarg);
break;
case 's':
testid_s = atoi(optarg);
break;

View file

@ -5,5 +5,5 @@ build_config:
$(MAKE) -C simulate
clean:
rm ./rtl/VX_user_config.vh ./VX_config.h
rm -f ./rtl/VX_user_config.vh ./VX_config.h
$(MAKE) -C simulate clean

View file

@ -60,9 +60,9 @@ qsub-sim
make ase
# tests
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n16
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n1 -s4 -e4
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n16
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
# modify "vsim_run.tcl" to dump VCD trace
@ -76,11 +76,12 @@ tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt
# compress VCD trace
tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd
tar -zcvf trace.vcd.tar.gz obj_dir/trace.vcd
tar -zcvf trace.vcd.tar.gz trace.vcd
tar -zcvf trace.fst.tar.gz trace.fst run.log
tar -zcvf run.log.tar.gz run.log
tar -cvjf vortex.vcd.tar.bz2 build_ase_1c/work/vortex.vcd
tar -zcvf vortex.vcd.tar.gz build_ase_1c/work/vortex.vcd
tar -zcvf run.log.tar.gz build_ase_1c/work/run.log
tar -zcvf vx_scope.vcd.tar.gz vx_scope.vcd
# decompress VCD trace
tar -zxvf /mnt/c/Users/Blaise/Downloads/vortex.vcd.tar.gz
@ -96,7 +97,7 @@ kill -9 <pid>
# fixing device resource busy issue when deleting /build_ase_1c/
lsof +D build_ase_1c
# quick off cache synthesis
# quick off synthesis
make -C pipeline clean && make -C pipeline > pipeline/build.log 2>&1 &
make -C cache clean && make -C cache > cache/build.log 2>&1 &
make -C core clean && make -C core > core/build.log 2>&1 &

View file

@ -104,7 +104,7 @@ module ccip_std_afu #(
vortex_afu #(
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
) vortex_afu_inst (
) afu (
.clk (clk),
.reset (reset_T1),

View file

@ -1,9 +1,26 @@
# Analysis & Synthesis Assignments
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
# set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
set_global_assignment -name VERILOG_MACRO QUARTUS
set_global_assignment -name VERILOG_MACRO SYNTHESIS
set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name MESSAGE_DISABLE 16818
set_global_assignment -name VERILOG_MACRO FPU_FAST
set_global_assignment -name VERILOG_MACRO FPU_FAST
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
set_global_assignment -name POWER_USE_TA_VALUE 65
set_global_assignment -name SEED 1
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"

View file

@ -74,111 +74,112 @@ localparam MMIO_CSR_ADDR = `AFU_IMAGE_MMIO_CSR_ADDR;
localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA;
localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ;
logic [127:0] afu_id = `AFU_ACCEL_UUID;
localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE);
localparam CCI_RD_RQ_DATAW = $bits(t_ccip_clData) + CCI_RD_RQ_TAGW;
typedef enum logic[3:0] {
STATE_IDLE,
STATE_READ,
STATE_WRITE,
STATE_START,
STATE_RUN,
STATE_CLFLUSH,
STATE_CSR_READ,
STATE_CSR_WRITE
} state_t;
typedef logic [$clog2(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag;
typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data;
state_t state;
localparam STATE_IDLE = 0;
localparam STATE_READ = 1;
localparam STATE_WRITE = 2;
localparam STATE_START = 3;
localparam STATE_RUN = 4;
localparam STATE_CLFLUSH = 5;
localparam STATE_CSR_READ = 6;
localparam STATE_CSR_WRITE = 7;
localparam STATE_MAX_VALUE = 8;
localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE);
`ifdef SCOPE
`SCOPE_SIGNALS_DECL
`SCOPE_DECL_SIGNALS
`endif
wire [127:0] afu_id = `AFU_ACCEL_UUID;
reg [STATE_WIDTH-1:0] state;
// Vortex ports ///////////////////////////////////////////////////////////////
logic vx_dram_req_valid;
logic vx_dram_req_rw;
logic [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen;
logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data;
logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
logic vx_dram_req_ready;
wire vx_dram_req_valid;
wire vx_dram_req_rw;
wire [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen;
wire [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data;
wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
wire vx_dram_req_ready;
logic vx_dram_rsp_valid;
logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data;
logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
logic vx_dram_rsp_ready;
wire vx_dram_rsp_valid;
wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data;
wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
wire vx_dram_rsp_ready;
logic vx_snp_req_valid;
logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr;
logic vx_snp_req_invalidate = 0;
logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag;
logic vx_snp_req_ready;
reg vx_snp_req_valid;
reg [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr;
wire vx_snp_req_invalidate = 0;
reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag;
wire vx_snp_req_ready;
logic vx_snp_rsp_valid;
reg vx_snp_rsp_valid;
`DEBUG_BEGIN
logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag;
reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag;
`DEBUG_END
logic vx_snp_rsp_ready;
reg vx_snp_rsp_ready;
logic vx_csr_io_req_valid;
logic [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid;
logic [11:0] vx_csr_io_req_addr;
logic vx_csr_io_req_rw;
logic [31:0] vx_csr_io_req_data;
logic vx_csr_io_req_ready;
wire vx_csr_io_req_valid;
wire [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid;
wire [11:0] vx_csr_io_req_addr;
wire vx_csr_io_req_rw;
wire [31:0] vx_csr_io_req_data;
wire vx_csr_io_req_ready;
logic vx_csr_io_rsp_valid;
logic [31:0] vx_csr_io_rsp_data;
logic vx_csr_io_rsp_ready;
wire vx_csr_io_rsp_valid;
wire [31:0] vx_csr_io_rsp_data;
wire vx_csr_io_rsp_ready;
logic vx_reset;
logic vx_busy;
reg vx_reset;
wire vx_busy;
// AVS Queues /////////////////////////////////////////////////////////////////
logic avs_rtq_push;
logic avs_rtq_pop;
wire avs_rtq_push;
wire avs_rtq_pop;
`DEBUG_BEGIN
logic avs_rtq_empty;
logic avs_rtq_full;
wire avs_rtq_empty;
wire avs_rtq_full;
`DEBUG_BEGIN
logic avs_rdq_push;
logic avs_rdq_pop;
wire avs_rdq_push;
wire avs_rdq_pop;
t_local_mem_data avs_rdq_dout;
logic avs_rdq_empty;
wire avs_rdq_empty;
`DEBUG_BEGIN
logic avs_rdq_full;
wire avs_rdq_full;
`DEBUG_END
// CMD variables //////////////////////////////////////////////////////////////
t_ccip_clAddr cmd_io_addr;
logic[DRAM_ADDR_WIDTH-1:0] cmd_mem_addr;
logic[DRAM_ADDR_WIDTH-1:0] cmd_data_size;
reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr;
reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size;
`ifdef SCOPE
logic [63:0] cmd_scope_rdata;
logic [63:0] cmd_scope_wdata;
logic cmd_scope_read;
logic cmd_scope_write;
wire [63:0] cmd_scope_rdata;
wire [63:0] cmd_scope_wdata;
wire cmd_scope_read;
wire cmd_scope_write;
`endif
logic [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core;
logic [11:0] cmd_csr_addr;
logic [31:0] cmd_csr_rdata;
logic [31:0] cmd_csr_wdata;
reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core;
reg [11:0] cmd_csr_addr;
reg [31:0] cmd_csr_rdata;
reg [31:0] cmd_csr_wdata;
// MMIO controller ////////////////////////////////////////////////////////////
`IGNORE_WARNINGS_BEGIN
t_ccip_c0_ReqMmioHdr mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
t_ccip_c0_ReqMmioHdr mmio_hdr;
`IGNORE_WARNINGS_END
assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
`STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, "Oops!")
`STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!"))
t_if_ccip_c2_Tx mmio_tx;
assign af2cp_sTxPort.c2 = mmio_tx;
@ -192,6 +193,10 @@ assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mm
`DEBUG_BEGIN
wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid;
wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid;
wire cp2af_sRxPort_c0_rspValid = cp2af_sRxPort.c0.rspValid;
wire cp2af_sRxPort_c1_rspValid = cp2af_sRxPort.c1.rspValid;
wire cp2af_sRxPort_c0TxAlmFull = cp2af_sRxPort.c0TxAlmFull;
wire cp2af_sRxPort_c1TxAlmFull = cp2af_sRxPort.c1TxAlmFull;
wire[$bits(mmio_hdr.address)-1:0] mmio_hdr_address = mmio_hdr.address;
wire[$bits(mmio_hdr.length)-1:0] mmio_hdr_length = mmio_hdr.length;
wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid;
@ -200,75 +205,93 @@ wire[$bits(cp2af_sRxPort.c0.hdr.mdata)-1:0] cp2af_sRxPort_c0_hdr_mdata = cp2af_s
wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address)) ? 3'(cp2af_sRxPort.c0.data) : 3'h0;
always_ff @(posedge clk)
begin
`ifdef SCOPE
reg scope_start;
`endif
// disable assertions until reset
`ifndef VERILATOR
initial begin
$assertoff;
end
`endif
always @(posedge clk) begin
if (reset) begin
`ifndef VERILATOR
$asserton; // enable assertions
`endif
mmio_tx.hdr <= 0;
mmio_tx.data <= 0;
mmio_tx.mmioRdValid <= 0;
cmd_io_addr <= 0;
cmd_mem_addr <= 0;
cmd_data_size <= 0;
`ifdef SCOPE
scope_start <= 0;
`endif
end
else begin
mmio_tx.mmioRdValid <= 0;
// serve MMIO write request
if (cp2af_sRxPort.c0.mmioWrValid)
begin
`ifdef SCOPE
scope_start <= 1;
`endif
case (mmio_hdr.address)
MMIO_IO_ADDR: begin
cmd_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_IO_ADDR: 0x%0h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data));
$display("%t: MMIO_IO_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_ccip_clAddr'(cp2af_sRxPort.c0.data));
`endif
end
MMIO_MEM_ADDR: begin
cmd_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data);
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_MEM_ADDR: 0x%0h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data));
$display("%t: MMIO_MEM_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_local_mem_addr'(cp2af_sRxPort.c0.data));
`endif
end
MMIO_DATA_SIZE: begin
cmd_data_size <= $bits(cmd_data_size)'(cp2af_sRxPort.c0.data);
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_DATA_SIZE: %0d", $time, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data));
$display("%t: MMIO_DATA_SIZE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data));
`endif
end
MMIO_CMD_TYPE: begin
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_CMD_TYPE: %0d", $time, $bits(cmd_type)'(cp2af_sRxPort.c0.data));
$display("%t: MMIO_CMD_TYPE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_type)'(cp2af_sRxPort.c0.data));
`endif
end
`ifdef SCOPE
MMIO_SCOPE_WRITE: begin
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_SCOPE_WRITE: %0h", $time, 64'(cp2af_sRxPort.c0.data));
$display("%t: MMIO_SCOPE_WRITE: addr=%0h, data=%0h", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data));
`endif
end
`endif
MMIO_CSR_CORE: begin
cmd_csr_core <= $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data);
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_CSR_CORE: %0h", $time, $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data));
$display("%t: MMIO_CSR_CORE: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data));
`endif
end
MMIO_CSR_ADDR: begin
cmd_csr_addr <= $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data);
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_CSR_ADDR: %0h", $time, $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data));
$display("%t: MMIO_CSR_ADDR: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data));
`endif
end
MMIO_CSR_DATA: begin
cmd_csr_wdata <= $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data);
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_CSR_DATA: %0h", $time, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data));
$display("%t: MMIO_CSR_DATA: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data));
`endif
end
default: begin
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_WR: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data));
$display("%t: Unknown MMIO Wr: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data));
`endif
end
endcase
@ -296,28 +319,29 @@ begin
MMIO_STATUS: begin
mmio_tx.data <= 64'(state);
`ifdef DBG_PRINT_OPAE
if (state != state_t'(mmio_tx.data)) begin
$display("%t: MMIO_STATUS: state=%0d", $time, state);
if (state != STATE_WIDTH'(mmio_tx.data)) begin
$display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state);
end
`endif
end
MMIO_CSR_READ: begin
mmio_tx.data <= 64'(cmd_csr_rdata);
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_CSR_READ: data=%0h", $time, cmd_csr_rdata);
$display("%t: MMIO_CSR_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_csr_rdata);
`endif
end
`ifdef SCOPE
MMIO_SCOPE_READ: begin
mmio_tx.data <= cmd_scope_rdata;
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_SCOPE_READ: data=%0h", $time, cmd_scope_rdata);
$display("%t: MMIO_SCOPE_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_scope_rdata);
`endif
end
`endif
default: begin
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_RD: addr=%0h", $time, mmio_hdr.address);
mmio_tx.data <= 64'h0;
`ifdef DBG_PRINT_OPAE
$display("%t: Unknown MMIO Rd: addr=%0h", $time, mmio_hdr.address);
`endif
end
endcase
@ -328,14 +352,13 @@ end
// COMMAND FSM ////////////////////////////////////////////////////////////////
logic cmd_read_done;
logic cmd_write_done;
logic cmd_clflush_done;
logic cmd_csr_done;
logic cmd_run_done;
wire cmd_read_done;
wire cmd_write_done;
wire cmd_clflush_done;
wire cmd_csr_done;
wire cmd_run_done;
always_ff @(posedge clk)
begin
always @(posedge clk) begin
if (reset) begin
state <= STATE_IDLE;
vx_reset <= 0;
@ -458,27 +481,28 @@ end
// AVS Controller /////////////////////////////////////////////////////////////
logic vortex_enabled;
logic cci_rdq_empty;
t_cci_rdq_data cci_rdq_dout;
wire vortex_enabled;
wire cci_rdq_empty;
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout;
logic cci_dram_rd_req_fire;
logic cci_dram_wr_req_fire;
logic vx_dram_rd_req_fire;
wire cci_dram_rd_req_fire;
wire cci_dram_wr_req_fire;
wire vx_dram_rd_req_fire;
`DEBUG_BEGIN
logic vx_dram_wr_req_fire;
wire vx_dram_wr_req_fire;
`DEBUG_END
logic vx_dram_rd_rsp_fire;
wire vx_dram_rd_rsp_fire;
t_local_mem_byte_mask vx_dram_req_byteen_;
logic [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads, avs_pending_reads_next;
logic [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset;
logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
reg [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads;
wire [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads_next;
wire [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
logic cci_dram_rd_req_enable, cci_dram_wr_req_enable;
logic vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable;
wire cci_dram_rd_req_enable, cci_dram_wr_req_enable;
wire vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable;
logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr;
assign vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
@ -503,8 +527,8 @@ assign vx_dram_wr_req_fire = vx_dram_wr_req_enable && !avs_waitrequest;
assign vx_dram_rd_rsp_fire = vx_dram_rsp_valid && vx_dram_rsp_ready;
assign avs_pending_reads_next = avs_pending_reads
+ (((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && !avs_rdq_pop) ? 1 :
(~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0);
+ $bits(avs_pending_reads)'(((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && !avs_rdq_pop) ? 1 :
(~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0);
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
assign vx_dram_req_offset = ((DRAM_LINE_LW)'(vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0])) << VX_DRAM_LINE_LW;
@ -514,11 +538,10 @@ end else begin
assign vx_dram_req_byteen_ = vx_dram_req_byteen;
end
always_comb
begin
always @(*) begin
case (state)
CMD_MEM_READ: avs_address = cci_dram_rd_req_addr;
CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout)));
CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout)));
default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
endcase
@ -529,8 +552,8 @@ begin
endcase
case (state)
CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:$bits(t_cci_rdq_tag)];
default: avs_writedata = (DRAM_LINE_WIDTH)'(vx_dram_req_data) << vx_dram_req_offset;
CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW];
default: avs_writedata = DRAM_LINE_WIDTH'(vx_dram_req_data) << vx_dram_req_offset;
endcase
end
@ -539,8 +562,7 @@ assign avs_write = cci_dram_wr_req_enable || vx_dram_wr_req_enable;
assign cmd_write_done = (cci_dram_wr_req_ctr >= cmd_data_size);
always_ff @(posedge clk)
begin
always @(posedge clk) begin
if (reset)
begin
mem_bank_select <= 0;
@ -565,16 +587,16 @@ begin
end
if (cci_dram_rd_req_fire) begin
cci_dram_rd_req_addr <= cci_dram_rd_req_addr + 1;
cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - 1;
cci_dram_rd_req_addr <= cci_dram_rd_req_addr + DRAM_ADDR_WIDTH'(1);
cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), (cci_dram_rd_req_ctr - 1), avs_pending_reads_next);
`endif
end
if (cci_dram_wr_req_fire) begin
cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((t_cci_rdq_tag'(cci_dram_wr_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + 1;
cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1));
`endif
@ -633,7 +655,7 @@ VX_generic_queue #(
// AVS data read response queue ///////////////////////////////////////////////
logic cci_wr_req_fire;
wire cci_wr_req_fire;
assign avs_rdq_push = avs_readdatavalid;
assign avs_rdq_pop = vx_dram_rd_rsp_fire || cci_wr_req_fire;
@ -655,41 +677,46 @@ VX_generic_queue #(
// CCI-P Read Request ///////////////////////////////////////////////////////////
logic [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads, cci_pending_reads_next;
logic [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr, cci_rd_req_ctr_next;
reg [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads;
wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads_next;
reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr;
wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next;
wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag;
reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr;
t_ccip_clAddr cci_rd_req_addr;
t_cci_rdq_tag cci_rd_rsp_ctr;
logic cci_rd_req_fire, cci_rd_rsp_fire;
logic cci_rd_req_enable, cci_rd_req_wait;
wire cci_rd_req_fire, cci_rd_rsp_fire;
reg cci_rd_req_enable, cci_rd_req_wait;
logic cci_rdq_push, cci_rdq_pop;
t_cci_rdq_data cci_rdq_din;
wire cci_rdq_push, cci_rdq_pop;
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din;
always_comb begin
always @(*) begin
af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0);
af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr;
af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(t_cci_rdq_tag'(cci_rd_req_ctr));
af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag);
end
assign cci_rd_req_fire = af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull;
assign cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid;
assign cci_rd_req_ctr_next = cci_rd_req_ctr + (cci_rd_req_fire ? 1 : 0);
assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr);
assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata);
assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0);
assign cci_rdq_pop = cci_dram_wr_req_fire;
assign cci_rdq_push = cci_rd_rsp_fire;
assign cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)};
assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_rd_rsp_tag};
assign cci_pending_reads_next = cci_pending_reads
+ ((cci_rd_req_fire && !cci_rdq_pop) ? 1 :
(!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0);
+ $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 :
(!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0);
assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait;
// Send read requests to CCI
always_ff @(posedge clk)
begin
always @(posedge clk) begin
if (reset) begin
cci_rd_req_addr <= 0;
cci_rd_req_ctr <= 0;
@ -717,21 +744,21 @@ begin
if (cci_rd_req_fire) begin
cci_rd_req_addr <= cci_rd_req_addr + 1;
cci_rd_req_ctr <= cci_rd_req_ctr_next;
if (t_cci_rdq_tag'(cci_rd_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 1; // end current request batch
if (cci_rd_req_tag == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 1; // end current request batch
end
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next);
$display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next);
`endif
end
if (cci_rd_rsp_fire) begin
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + 1;
if (cci_rd_rsp_ctr == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 0; // restart new request batch
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_RQ_TAGW'(1);
if (cci_rd_rsp_ctr == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 0; // restart new request batch
end
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata), cci_rd_rsp_ctr);
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr);
`endif
end
@ -742,12 +769,11 @@ begin
end
cci_pending_reads <= cci_pending_reads_next;
end
end
VX_generic_queue #(
.DATAW($bits(t_ccip_clData) + $bits(t_cci_rdq_tag)),
.DATAW(CCI_RD_RQ_DATAW),
.SIZE(CCI_RD_QUEUE_SIZE)
) cci_rd_req_queue (
.clk (clk),
@ -761,14 +787,36 @@ VX_generic_queue #(
`UNUSED_PIN (size)
);
`ifdef VERILATOR
`DEBUG_BLOCK(
reg [CCI_RD_WINDOW_SIZE-1:0] dbg_cci_rd_rsp_mask;
always @(posedge clk) begin
if (reset) begin
dbg_cci_rd_rsp_mask <= 0;
end else begin
if (cci_rd_rsp_fire) begin
if (cci_rd_rsp_ctr == 0) begin
dbg_cci_rd_rsp_mask <= (CCI_RD_WINDOW_SIZE'(1) << cci_rd_rsp_tag);
end else begin
assert(!dbg_cci_rd_rsp_mask[cci_rd_rsp_tag]);
dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] <= 1;
end
end
end
end
)
`endif
// CCI-P Write Request //////////////////////////////////////////////////////////
logic [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes, cci_pending_writes_next;
logic [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr;
reg [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes;
wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes_next;
reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr;
t_ccip_clAddr cci_wr_req_addr;
logic cci_wr_req_enable, cci_wr_rsp_fire;
reg cci_wr_req_enable;
wire cci_wr_rsp_fire;
always_comb begin
always @(*) begin
af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0);
af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr;
af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode
@ -779,15 +827,15 @@ assign cci_wr_req_fire = af2cp_sTxPort.c1.valid && !cp2af_sRxPort.c1TxAlmFull;
assign cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid;
assign cci_pending_writes_next = cci_pending_writes
+ ((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 :
(!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0);
+ $bits(cci_pending_writes)'((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 :
(!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0);
assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes);
assign af2cp_sTxPort.c1.valid = cci_wr_req_enable && !avs_rdq_empty;
// Send write requests to CCI
always_ff @(posedge clk)
always @(posedge clk)
begin
if (reset) begin
cci_wr_req_addr <= 0;
@ -809,10 +857,10 @@ begin
if (cci_wr_req_fire) begin
assert(cci_wr_req_ctr != 0);
cci_wr_req_addr <= cci_wr_req_addr + 1;
cci_wr_req_ctr <= cci_wr_req_ctr - 1;
cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1);
cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next);
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, avs_rdq_dout);
`endif
end
@ -828,12 +876,12 @@ end
// Vortex cache snooping //////////////////////////////////////////////////////
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size;
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr;
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_req_ctr_next;
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_rsp_ctr, snp_rsp_ctr_next;
wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size;
wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr;
reg [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_rsp_ctr;
wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr_next, snp_rsp_ctr_next;
logic vx_snp_req_fire, vx_snp_rsp_fire;
wire vx_snp_req_fire, vx_snp_rsp_fire;
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
assign snp_req_baseaddr = {cmd_mem_addr, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)};
@ -846,13 +894,12 @@ end
assign vx_snp_req_fire = vx_snp_req_valid && vx_snp_req_ready;
assign vx_snp_rsp_fire = vx_snp_rsp_valid && vx_snp_rsp_ready;
assign snp_req_ctr_next = vx_snp_req_fire ? (snp_req_ctr + 1) : snp_req_ctr;
assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - 1) : snp_rsp_ctr;
assign snp_req_ctr_next = vx_snp_req_fire ? (snp_req_ctr + `VX_DRAM_ADDR_WIDTH'(1)) : snp_req_ctr;
assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - `VX_DRAM_ADDR_WIDTH'(1)) : snp_rsp_ctr;
assign cmd_clflush_done = (0 == snp_rsp_ctr);
always_ff @(posedge clk)
begin
always @(posedge clk) begin
if (reset) begin
vx_snp_req_valid <= 0;
vx_snp_req_addr <= 0;
@ -886,11 +933,11 @@ begin
if (vx_snp_req_fire)
begin
assert(snp_req_ctr < snp_req_size);
vx_snp_req_addr <= vx_snp_req_addr + 1;
vx_snp_req_addr <= vx_snp_req_addr + `VX_DRAM_ADDR_WIDTH'(1);
vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next);
snp_req_ctr <= snp_req_ctr_next;
`ifdef DBG_PRINT_OPAE
$display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next), (snp_req_size - snp_req_ctr_next));
$display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(vx_snp_req_tag), (snp_req_size - snp_req_ctr_next));
`endif
end
@ -907,7 +954,7 @@ end
// CSRs///////////////////////////////////////////////////////////////////////
logic csr_io_req_sent;
reg csr_io_req_sent;
assign vx_csr_io_req_valid = !csr_io_req_sent
&& ((STATE_CSR_READ == state || STATE_CSR_WRITE == state));
@ -920,8 +967,7 @@ assign vx_csr_io_rsp_ready = 1;
assign cmd_csr_done = (STATE_CSR_WRITE == state) ? vx_csr_io_req_ready : vx_csr_io_rsp_valid;
always_ff @(posedge clk)
begin
always @(posedge clk) begin
if (reset) begin
csr_io_req_sent <= 0;
cmd_csr_rdata <= 0;
@ -946,11 +992,7 @@ end
assign cmd_run_done = !vx_busy;
Vortex #() vortex (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
`SCOPE_BIND_top_vortex
.clk (clk),
.reset (reset | vx_reset),
@ -989,10 +1031,10 @@ Vortex #() vortex (
`UNUSED_PIN (io_req_addr),
`UNUSED_PIN (io_req_data),
`UNUSED_PIN (io_req_tag),
.io_req_ready (1),
.io_req_ready (1'b1),
// I/O response
.io_rsp_valid (0),
.io_rsp_valid (1'b0),
.io_rsp_data (0),
.io_rsp_tag (0),
`UNUSED_PIN (io_rsp_ready),
@ -1026,10 +1068,7 @@ end
`ifdef SCOPE
localparam SCOPE_DATAW = $bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST});
localparam SCOPE_SR_DEPTH = 2;
`STATIC_ASSERT(SCOPE_DATAW == 1766, "invalid size")
`SCOPE_ASSIGN (scope_reset, vx_reset);
`SCOPE_ASSIGN (scope_dram_req_valid, vx_dram_req_valid);
`SCOPE_ASSIGN (scope_dram_req_addr, {vx_dram_req_addr, 4'b0});
@ -1060,57 +1099,20 @@ localparam SCOPE_SR_DEPTH = 2;
`SCOPE_ASSIGN (scope_busy, vx_busy);
wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready)
|| (scope_icache_rsp_valid && scope_icache_rsp_ready)
|| ((| scope_dcache_req_valid) && scope_dcache_req_ready)
|| ((| scope_dcache_rsp_valid) && scope_dcache_rsp_ready)
|| (scope_dram_req_valid && scope_dram_req_ready)
|| (scope_dram_rsp_valid && scope_dram_rsp_ready)
|| (scope_snp_req_valid && scope_snp_req_ready)
|| (scope_snp_rsp_valid && scope_snp_rsp_ready)
|| (scope_issue_valid && scope_issue_ready)
|| scope_gpr_rsp_valid
|| scope_bank_valid_st0
|| scope_bank_valid_st1
|| scope_bank_valid_st2
|| scope_bank_stall_pipe
|| scope_scoreboard_delay
|| scope_gpr_delay
|| scope_execute_delay
|| scope_busy;
wire scope_start = vx_reset;
wire [SCOPE_DATAW+1:0] scope_data_in_st[SCOPE_SR_DEPTH-1:0];
wire [SCOPE_DATAW+1:0] scope_data_in_ste;
assign scope_data_in_st[0] = {`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST, scope_changed, scope_start};
assign scope_data_in_ste = scope_data_in_st[SCOPE_SR_DEPTH-1];
for (genvar i = 1; i < SCOPE_SR_DEPTH; i++) begin
VX_generic_register #(
.N (SCOPE_DATAW+2)
) scope_sr (
.clk (clk),
.reset (reset),
.stall (0),
.flush (0),
.in (scope_data_in_st[i-1]),
.out (scope_data_in_st[i])
);
end
wire scope_changed = `SCOPE_TRIGGER;
VX_scope #(
.DATAW (SCOPE_DATAW),
.DATAW ($bits({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST})),
.BUSW (64),
.SIZE (4096),
.UPDW ($bits({`SCOPE_SIGNALS_UPD_LIST}))
.UPDW ($bits({`SCOPE_UPDATE_LIST}))
) scope (
.clk (clk),
.reset (reset),
.start (scope_data_in_ste[0]),
.stop (0),
.changed (scope_data_in_ste[1]),
.data_in (scope_data_in_ste[SCOPE_DATAW+1:2]),
.start (scope_start),
.stop (1'b0),
.changed (scope_changed),
.data_in ({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST}),
.bus_in (cmd_scope_wdata),
.bus_out (cmd_scope_rdata),
.bus_read (cmd_scope_read),

View file

@ -3,11 +3,7 @@
module VX_cluster #(
parameter CLUSTER_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
`SCOPE_IO_VX_cluster
// Clock
input wire clk,
@ -138,11 +134,7 @@ module VX_cluster #(
VX_core #(
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) core (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
`SCOPE_BIND_VX_cluster_core(i)
.clk (clk),
.reset (reset),
@ -380,7 +372,7 @@ module VX_cluster #(
.SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH)
) l2cache (
`SCOPE_SIGNALS_CACHE_UNBIND
`SCOPE_BIND_VX_cluster_l2cache
.clk (clk),
.reset (reset),

View file

@ -59,8 +59,6 @@
`define EXT_F_ENABLE
`endif
//`define FPU_FAST
// Device identification
`define VENDOR_ID 0
`define ARCHITECTURE_ID 0

View file

@ -3,11 +3,7 @@
module VX_core #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
`SCOPE_IO_VX_core
// Clock
input wire clk,
@ -179,10 +175,7 @@ module VX_core #(
VX_pipeline #(
.CORE_ID(CORE_ID)
) pipeline (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
`SCOPE_BIND_VX_core_pipeline
.clk(clk),
.reset(reset),
@ -258,7 +251,7 @@ module VX_core #(
VX_mem_unit #(
.CORE_ID(CORE_ID)
) mem_unit (
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_BIND_VX_core_mem_unit
.clk (clk),
.reset (reset),

View file

@ -7,7 +7,7 @@ module VX_csr_unit #(
input wire reset,
VX_cmt_to_csr_if cmt_to_csr_if,
VX_csr_to_issue_if csr_to_issue_if,
VX_csr_to_issue_if csr_to_issue_if,
VX_csr_io_req_if csr_io_req_if,
VX_csr_io_rsp_if csr_io_rsp_if,
@ -15,8 +15,8 @@ module VX_csr_unit #(
VX_csr_req_if csr_req_if,
VX_exu_to_cmt_if csr_commit_if
);
VX_csr_req_if csr_pipe_req_if();
VX_exu_to_cmt_if csr_pipe_rsp_if();
VX_csr_req_if csr_pipe_req_if();
VX_exu_to_cmt_if csr_pipe_rsp_if();
wire select_io_req = csr_io_req_if.valid;
wire select_io_rsp;

View file

@ -347,7 +347,7 @@ module VX_decode #(
assign decode_if.rd = rd;
assign decode_if.rs1 = rs1_qual;
assign decode_if.rs2 = rs2;
assign decode_if.rs3 = rs3;
assign decode_if.rs3 = 0;
`endif
assign decode_if.use_rs3 = use_rs3;

View file

@ -6,11 +6,6 @@
///////////////////////////////////////////////////////////////////////////////
// `define SYNTHESIS 1
// `define ASIC 1
///////////////////////////////////////////////////////////////////////////////
`define NW_BITS `LOG2UP(`NUM_WARPS)
`define NT_BITS `LOG2UP(`NUM_THREADS)
@ -248,7 +243,7 @@
////////////////////////// Dcache Configurable Knobs //////////////////////////
// Cache ID
`define DCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 0)
`define DCACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0
// TAG sharing enable
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
@ -277,7 +272,7 @@
////////////////////////// Icache Configurable Knobs //////////////////////////
// Cache ID
`define ICACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 1)
`define ICACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1
// Core request address bits
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
@ -309,7 +304,7 @@
////////////////////////// SM Configurable Knobs //////////////////////////////
// Cache ID
`define SCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 2)
`define SCACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define SNUM_REQUESTS `NUM_THREADS
@ -326,7 +321,7 @@
////////////////////////// L2cache Configurable Knobs /////////////////////////
// Cache ID
`define L2CACHE_ID (`L3_ENABLE ? 1 : 0)
`define L2CACHE_ID 32'(`L3_ENABLE) + CLUSTER_ID
// Core request tag bits
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))

View file

@ -3,8 +3,7 @@
module VX_execute #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_EXECUTE_IO
`SCOPE_IO_VX_execute
input wire clk,
input wire reset,
@ -55,7 +54,7 @@ module VX_execute #(
VX_lsu_unit #(
.CORE_ID(CORE_ID)
) lsu_unit (
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_BIND_VX_execute_lsu_unit
.clk (clk),
.reset (reset),
.dcache_req_if (dcache_req_if),
@ -122,6 +121,7 @@ module VX_execute #(
VX_gpu_unit #(
.CORE_ID(CORE_ID)
) gpu_unit (
`SCOPE_BIND_VX_execute_gpu_unit
.clk (clk),
.reset (reset),
.gpu_req_if (gpu_req_if),

View file

@ -3,7 +3,7 @@
module VX_fetch #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_IO_VX_fetch
input wire clk,
input wire reset,
@ -29,6 +29,8 @@ module VX_fetch #(
VX_warp_sched #(
.CORE_ID(CORE_ID)
) warp_sched (
`SCOPE_BIND_VX_fetch_warp_sched
.clk (clk),
.reset (reset),
.warp_ctl_if (warp_ctl_if),
@ -43,7 +45,7 @@ module VX_fetch #(
VX_icache_stage #(
.CORE_ID(CORE_ID)
) icache_stage (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_BIND_VX_fetch_icache_stage
.clk (clk),
.reset (reset),

View file

@ -1,74 +0,0 @@
`include "VX_define.vh"
// control module to support multi-cycle read for fp register
module VX_gpr_fp_ctrl (
input wire clk,
input wire reset,
input wire [`NUM_THREADS-1:0][31:0] rs1_data,
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
VX_gpr_req_if gpr_req_if,
// outputs
output wire [`NW_BITS+`NR_BITS-1:0] raddr1,
VX_gpr_rsp_if gpr_rsp_if
);
reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data, rsp_rs3_data;
reg rsp_valid;
reg [31:0] rsp_pc;
reg [`NW_BITS-1:0] rsp_wid;
reg read_rs1;
wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && read_rs1;
wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready;
always @(posedge clk) begin
if (reset) begin
rsp_valid <= 0;
rsp_pc <= 0;
rsp_rs1_data <= 0;
rsp_rs2_data <= 0;
rsp_rs3_data <= 0;
rsp_wid <= 0;
read_rs1 <= 1;
end else begin
if (rs3_delay) begin
read_rs1 <= 0;
rsp_wid <= gpr_req_if.wid;
end else if (read_fire) begin
read_rs1 <= 1;
end
rsp_valid <= gpr_req_if.valid;
rsp_wid <= gpr_req_if.wid;
rsp_pc <= gpr_req_if.PC;
if (read_rs1) begin
rsp_rs1_data <= rs1_data;
end
rsp_rs2_data <= rs2_data;
rsp_rs3_data <= rs1_data;
assert(read_rs1 || rsp_wid == gpr_req_if.wid);
end
end
always @(posedge clk) begin
end
// outputs
wire [`NR_BITS-1:0] rs1 = read_rs1 ? gpr_req_if.rs1 : gpr_req_if.rs3;
assign raddr1 = {gpr_req_if.wid, rs1};
assign gpr_req_if.ready = ~rs3_delay;
assign gpr_rsp_if.valid = rsp_valid;
assign gpr_rsp_if.wid = rsp_wid;
assign gpr_rsp_if.PC = rsp_pc;
assign gpr_rsp_if.rs1_data = rsp_rs1_data;
assign gpr_rsp_if.rs2_data = rsp_rs2_data;
assign gpr_rsp_if.rs3_data = rsp_rs3_data;
endmodule

View file

@ -10,136 +10,24 @@ module VX_gpr_ram (
output wire [`NUM_THREADS-1:0][31:0] rs1_data,
output wire [`NUM_THREADS-1:0][31:0] rs2_data
);
`ifndef ASIC
reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0];
initial begin // initialize ram: set r0 = 0
for (integer j = 0; j < `NUM_WARPS; j++) begin
for (integer i = 0; i < `NUM_REGS; i++) begin
ram[j * `NUM_REGS + i] = (i == 0) ? {`NUM_THREADS{32'h0}} : {`NUM_THREADS{32'hx}};
end
end
end
always @(posedge clk) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
if (we[i]) begin
ram[waddr][i][0] <= wdata[i][07:00];
ram[waddr][i][1] <= wdata[i][15:08];
ram[waddr][i][2] <= wdata[i][23:16];
ram[waddr][i][3] <= wdata[i][31:24];
end
end
end
assign rs1_data = ram[rs1];
assign rs2_data = ram[rs2];
`else
wire [`NUM_THREADS-1:0][31:0] write_bit_mask;
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
reg [`NUM_THREADS-1:0][31:0] q1, q2;
always @(posedge clk) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
assign write_bit_mask[i] = {32{~we[i]}};
end
wire cenb = 0;
wire cena_1 = 0;
wire cena_2 = 0;
wire [`NUM_THREADS-1:0][31:0] tmp_a;
wire [`NUM_THREADS-1:0][31:0] tmp_b;
`ifndef SYNTHESIS
for (integer i = 0; i < `NUM_THREADS; i++) begin
for (integer j = 0; j < 32; j++) begin
assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
if (we[i]) begin
mem[waddr][i][0] <= wdata[i][07:00];
mem[waddr][i][1] <= wdata[i][15:08];
mem[waddr][i][2] <= wdata[i][23:16];
mem[waddr][i][3] <= wdata[i][31:24];
end
end
`else
assign rs1_data = tmp_a;
assign rs2_data = tmp_b;
`endif
for (integer i = 0; i < 'NT; i=i+4) begin
`IGNORE_WARNINGS_BEGIN
rf2_32x128_wm1 first_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(tmp_a[(i+3):(i)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_1),
.AA(rs1[(i+3):(i)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(i+3):(i)]),
.AB(waddr[(i+3):(i)]),
.DB(wdata[(i+3):(i)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
q1 <= mem[rs1];
q2 <= mem[rs2];
end
rf2_`NUM_GPRSx128_wm1 second_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(tmp_b[(i+3):(i)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_2),
.AA(rs2[(i+3):(i)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(i+3):(i)]),
.AB(waddr[(i+3):(i)]),
.DB(wdata[(i+3):(i)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
`IGNORE_WARNINGS_END
end
assign rs1_data = q1;
assign rs2_data = q2;
`endif
endmodule
endmodule

View file

@ -15,9 +15,15 @@ module VX_gpr_stage #(
);
`UNUSED_VAR (reset)
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NW_BITS+`NR_BITS-1:0] raddr1;
reg rsp_valid;
reg [`NW_BITS-1:0] rsp_wid;
reg [31:0] rsp_pc;
reg rs1_is_zero, rs2_is_zero;
wire [`NUM_THREADS-1:0][31:0] rs1_data, rs2_data;
wire [`NW_BITS+`NR_BITS-1:0] raddr1, raddr2;
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
VX_gpr_ram gpr_ram (
.clk (clk),
@ -25,60 +31,77 @@ module VX_gpr_stage #(
.waddr ({writeback_if.wid, writeback_if.rd}),
.wdata (writeback_if.data),
.rs1 (raddr1),
.rs2 ({gpr_req_if.wid, gpr_req_if.rs2}),
.rs2 (raddr2),
.rs1_data (rs1_data),
.rs2_data (rs2_data)
);
);
`ifdef EXT_F_ENABLE
VX_gpr_fp_ctrl VX_gpr_fp_ctrl (
.clk (clk),
.reset (reset),
.rs1_data (rs1_data),
.rs2_data (rs2_data),
.raddr1 (raddr1),
.gpr_req_if (gpr_req_if),
.gpr_rsp_if (gpr_rsp_if)
);
`else
reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data;
reg rsp_valid;
reg [`NW_BITS-1:0] rsp_wid;
reg [31:0] rsp_pc;
always @(posedge clk) begin
if (reset) begin
rsp_valid <= 0;
rsp_wid <= 0;
rsp_pc <= 0;
rsp_rs1_data <= 0;
rsp_rs2_data <= 0;
rsp_valid <= 0;
rsp_wid <= 0;
rsp_pc <= 0;
rs1_is_zero <= 0;
rs2_is_zero <= 0;
end else begin
rsp_valid <= gpr_req_if.valid;
rsp_wid <= gpr_req_if.wid;
rsp_pc <= gpr_req_if.PC;
rsp_rs1_data <= rs1_data;
rsp_rs2_data <= rs2_data;
rsp_valid <= gpr_req_if.valid;
rsp_wid <= gpr_req_if.wid;
rsp_pc <= gpr_req_if.PC;
rs1_is_zero <= (0 == gpr_req_if.rs1);
rs2_is_zero <= (0 == gpr_req_if.rs2);
end
end
`ifdef EXT_F_ENABLE
reg [`NUM_THREADS-1:0][31:0] rs3_data;
reg read_rs3, save_rs3;
wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && !read_rs3;
wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready;
always @(posedge clk) begin
if (reset) begin
rs3_data <= 0;
read_rs3 <= 0;
end else begin
if (rs3_delay) begin
read_rs3 <= 1;
save_rs3 <= 1;
end else if (read_fire) begin
read_rs3 <= 0;
end
if (save_rs3) begin
rs3_data <= rs1_data;
save_rs3 <= 0;
end
assert(!read_rs3 || rsp_wid == gpr_req_if.wid);
end
end
assign raddr1 = {gpr_req_if.wid, (rs3_delay ? gpr_req_if.rs3 : gpr_req_if.rs1)};
assign gpr_req_if.ready = ~rs3_delay;
assign gpr_rsp_if.rs3_data = rs3_data;
`else
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign gpr_req_if.ready = 1;
assign gpr_rsp_if.valid = rsp_valid;
assign gpr_rsp_if.wid = rsp_wid;
assign gpr_rsp_if.PC = rsp_pc;
assign gpr_rsp_if.rs1_data = rsp_rs1_data;
assign gpr_rsp_if.rs2_data = rsp_rs2_data;
assign gpr_rsp_if.rs3_data = 0;
`UNUSED_VAR (gpr_req_if.valid);
`UNUSED_VAR (gpr_req_if.rs3);
`UNUSED_VAR (gpr_req_if.use_rs3);
`UNUSED_VAR (gpr_rsp_if.ready);
`endif
assign gpr_rsp_if.rs1_data = rs1_is_zero ? (`NUM_THREADS*32)'(0) : rs1_data;
assign gpr_rsp_if.rs2_data = rs2_is_zero ? (`NUM_THREADS*32)'(0) : rs2_data;
assign gpr_rsp_if.valid = rsp_valid;
assign gpr_rsp_if.wid = rsp_wid;
assign gpr_rsp_if.PC = rsp_pc;
assign writeback_if.ready = 1'b1;
endmodule
endmodule

View file

@ -3,6 +3,8 @@
module VX_gpu_unit #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_gpu_unit
input wire clk,
input wire reset,
@ -88,4 +90,18 @@ module VX_gpu_unit #(
// can accept new request?
assign gpu_req_if.ready = gpu_commit_if.ready;
`SCOPE_ASSIGN (scope_gpu_req_valid, gpu_req_if.valid);
`SCOPE_ASSIGN (scope_gpu_req_wid, gpu_req_if.wid);
`SCOPE_ASSIGN (scope_gpu_req_tmask, gpu_req_if.tmask);
`SCOPE_ASSIGN (scope_gpu_req_op_type, gpu_req_if.op_type);
`SCOPE_ASSIGN (scope_gpu_req_rs1, gpu_req_if.rs1_data[0]);
`SCOPE_ASSIGN (scope_gpu_req_rs2, gpu_req_if.rs2_data);
`SCOPE_ASSIGN (scope_gpu_req_ready, gpu_req_if.ready);
`SCOPE_ASSIGN (scope_gpu_rsp_valid, warp_ctl_if.valid);
`SCOPE_ASSIGN (scope_gpu_rsp_wid, warp_ctl_if.wid);
`SCOPE_ASSIGN (scope_gpu_rsp_tmc, warp_ctl_if.tmc);
`SCOPE_ASSIGN (scope_gpu_rsp_wspawn, warp_ctl_if.wspawn);
`SCOPE_ASSIGN (scope_gpu_rsp_split, warp_ctl_if.split);
`SCOPE_ASSIGN (scope_gpu_rsp_barrier, warp_ctl_if.barrier);
endmodule

View file

@ -20,15 +20,12 @@ module VX_ibuffer #(
localparam ADDRW = $clog2(SIZE);
localparam NWARPSW = $clog2(`NUM_WARPS+1);
`USE_FAST_BRAM reg [DATAW-1:0] entries [`NUM_WARPS-1:0][SIZE-1:0];
reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0];
reg [ADDRW:0] rd_ptr_r [`NUM_WARPS-1:0];
reg [ADDRW:0] wr_ptr_r [`NUM_WARPS-1:0];
wire [`NUM_WARPS-1:0] q_full;
wire [`NUM_WARPS-1:0][SIZEW-1:0] q_size;
wire [DATAW-1:0] q_data_in;
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
wire enq_fire = ibuf_enq_if.valid && ibuf_enq_if.ready;
@ -39,21 +36,33 @@ module VX_ibuffer #(
wire writing = enq_fire && (i == ibuf_enq_if.wid);
wire reading = deq_fire && (i == ibuf_deq_if.wid);
wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[i][ADDRW-1:0];
wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[i][ADDRW-1:0];
wire is_slot0 = (0 == size_r[i]) || ((1 == size_r[i]) && reading);
wire push = writing && !is_slot0;
wire pop = reading && (size_r[i] != 1);
VX_generic_queue #(
.DATAW(DATAW),
.SIZE(SIZE)
) queue (
.clk (clk),
.reset (reset),
.push (push),
.pop (pop),
.data_in (q_data_in),
.data_out (q_data_prev[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (size)
);
always @(posedge clk) begin
if (reset) begin
rd_ptr_r[i] <= 0;
wr_ptr_r[i] <= 0;
size_r[i] <= 0;
end else begin
if (writing) begin
if ((0 == size_r[i]) || ((1 == size_r[i]) && reading)) begin
size_r[i] <= 0;
end else begin
if (writing) begin
if (is_slot0) begin
q_data_out[i] <= q_data_in;
end else begin
entries[i][wr_ptr_a] <= q_data_in;
wr_ptr_r[i] <= wr_ptr_r[i] + ADDRW'(1);
end
if (!reading) begin
size_r[i] <= size_r[i] + SIZEW'(1);
@ -62,18 +71,16 @@ module VX_ibuffer #(
if (reading) begin
if (size_r[i] != 1) begin
q_data_out[i] <= q_data_prev[i];
rd_ptr_r[i] <= rd_ptr_r[i] + ADDRW'(1);
end
if (!writing) begin
size_r[i] <= size_r[i] - SIZEW'(1);
end
end
end
end
assign q_data_prev[i] = entries[i][rd_ptr_a];
assign q_full[i] = (size_r[i] == SIZE);
assign q_size[i] = size_r[i];
end
assign q_full[i] = (size_r[i] == SIZE);
assign q_size[i] = size_r[i];
end
///////////////////////////////////////////////////////////////////////////
@ -144,9 +151,9 @@ module VX_ibuffer #(
schedule_table[deq_wid_n] <= 0;
end
deq_valid <= deq_valid_n;
deq_wid <= deq_wid_n;
deq_instr <= deq_instr_n;
deq_valid <= deq_valid_n;
deq_wid <= deq_wid_n;
deq_instr <= deq_instr_n;
if (warp_added && !warp_removed) begin
num_warps <= num_warps + NWARPSW'(1);

View file

@ -3,7 +3,7 @@
module VX_icache_stage #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_IO_VX_icache_stage
input wire clk,
input wire reset,
@ -20,8 +20,8 @@ module VX_icache_stage #(
);
`UNUSED_VAR (reset)
reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
`NO_RW_RAM_CHECK reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
`NO_RW_RAM_CHECK reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
@ -29,8 +29,8 @@ module VX_icache_stage #(
wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0];
always @(posedge clk) begin
if (icache_req_fire) begin
rsp_PC_buf[req_tag] <= ifetch_req_if.PC;
if (icache_req_fire) begin
rsp_PC_buf[req_tag] <= ifetch_req_if.PC;
rsp_tmask_buf[req_tag] <= ifetch_req_if.tmask;
end
end

View file

@ -1,4 +1,3 @@
`include "VX_platform.vh"
module VX_ipdom_stack #(
@ -17,33 +16,55 @@ module VX_ipdom_stack #(
);
localparam STACK_SIZE = 2 ** DEPTH;
`USE_FAST_BRAM reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
`USE_FAST_BRAM reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
`USE_FAST_BRAM reg is_part [0:STACK_SIZE-1];
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
reg is_part [0:STACK_SIZE-1];
reg [DEPTH-1:0] rd_ptr, wr_ptr;
reg [WIDTH - 1:0] d1, d2;
reg p;
always @(posedge clk) begin
if (reset) begin
rd_ptr <= 0;
wr_ptr <= 0;
end else begin
if (push) begin
stack_1[wr_ptr] <= q1;
stack_2[wr_ptr] <= q2;
is_part[wr_ptr] <= 0;
rd_ptr <= wr_ptr;
wr_ptr <= wr_ptr + DEPTH'(1);
end else if (pop) begin
wr_ptr <= wr_ptr - DEPTH'(is_part[rd_ptr]);
rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]);
is_part[rd_ptr] <= 1;
end
end
end
assign d = is_part[rd_ptr] ? stack_1[rd_ptr] : stack_2[rd_ptr];
always @(posedge clk) begin
if (push) begin
stack_1[wr_ptr] <= q1;
end
end
assign d1 = stack_1[rd_ptr];
assign empty = (0 == wr_ptr);
always @(posedge clk) begin
if (push) begin
stack_2[wr_ptr] <= q2;
end
end
assign d2 = stack_2[rd_ptr];
always @(posedge clk) begin
if (push) begin
is_part[wr_ptr] <= 0;
end else if (pop) begin
is_part[rd_ptr] <= 1;
end
end
assign p = is_part[rd_ptr];
assign d = p ? d1 : d2;
assign empty = ~(| wr_ptr);
assign full = ((STACK_SIZE-1) == wr_ptr);
endmodule

View file

@ -3,7 +3,7 @@
module VX_issue #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_IO_VX_issue
input wire clk,
input wire reset,

View file

@ -3,7 +3,7 @@
module VX_lsu_unit #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_LSU_IO
`SCOPE_IO_VX_lsu_unit
input wire clk,
input wire reset,

View file

@ -3,7 +3,7 @@
module VX_mem_unit # (
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_IO_VX_mem_unit
input wire clk,
input wire reset,
@ -77,7 +77,7 @@ module VX_mem_unit # (
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH)
) smem (
`SCOPE_SIGNALS_CACHE_UNBIND
`SCOPE_BIND_VX_mem_unit_smem
.clk (clk),
.reset (reset),
@ -104,7 +104,7 @@ module VX_mem_unit # (
`UNUSED_PIN (dram_req_addr),
`UNUSED_PIN (dram_req_data),
`UNUSED_PIN (dram_req_tag),
.dram_req_ready (0),
.dram_req_ready (1'b0),
// DRAM response
.dram_rsp_valid (0),
@ -113,7 +113,7 @@ module VX_mem_unit # (
`UNUSED_PIN (dram_rsp_ready),
// Snoop request
.snp_req_valid (0),
.snp_req_valid (1'b0),
.snp_req_addr (0),
.snp_req_invalidate (0),
.snp_req_tag (0),
@ -122,17 +122,17 @@ module VX_mem_unit # (
// Snoop response
`UNUSED_PIN (snp_rsp_valid),
`UNUSED_PIN (snp_rsp_tag),
.snp_rsp_ready (0),
.snp_rsp_ready (1'b0),
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (0),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready)
);
@ -159,7 +159,7 @@ module VX_mem_unit # (
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH)
) dcache (
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_BIND_VX_mem_unit_dcache
.clk (clk),
.reset (reset),
@ -211,10 +211,10 @@ module VX_mem_unit # (
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (0),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready)
);
@ -240,7 +240,7 @@ module VX_mem_unit # (
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
) icache (
`SCOPE_SIGNALS_CACHE_UNBIND
`SCOPE_BIND_VX_mem_unit_icache
.clk (clk),
.reset (reset),
@ -276,26 +276,26 @@ module VX_mem_unit # (
.dram_rsp_ready (icache_dram_rsp_if.ready),
// Snoop request
.snp_req_valid (0),
.snp_req_valid (1'b0),
.snp_req_addr (0),
.snp_req_invalidate (0),
.snp_req_invalidate (1'b0),
.snp_req_tag (0),
`UNUSED_PIN (snp_req_ready),
// Snoop response
`UNUSED_PIN (snp_rsp_valid),
`UNUSED_PIN (snp_rsp_tag),
.snp_rsp_ready (0),
.snp_rsp_ready (1'b0),
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (0),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready)
);

View file

@ -3,10 +3,7 @@
module VX_pipeline #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
`SCOPE_IO_VX_pipeline
// Clock
input wire clk,
@ -126,7 +123,7 @@ module VX_pipeline #(
VX_fetch #(
.CORE_ID(CORE_ID)
) fetch (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_BIND_VX_pipeline_fetch
.clk (clk),
.reset (reset),
.icache_req_if (core_icache_req_if),
@ -153,7 +150,7 @@ module VX_pipeline #(
VX_issue #(
.CORE_ID(CORE_ID)
) issue (
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_BIND_VX_pipeline_issue
.clk (clk),
.reset (reset),
@ -173,8 +170,8 @@ module VX_pipeline #(
VX_execute #(
.CORE_ID(CORE_ID)
) execute (
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
`SCOPE_BIND_VX_pipeline_execute
.clk (clk),
.reset (reset),

View file

@ -22,14 +22,16 @@
/* verilator lint_off WIDTH */ \
/* verilator lint_off UNOPTFLAT */ \
/* verilator lint_off UNDRIVEN */ \
/* verilator lint_off DECLFILENAME */
/* verilator lint_off DECLFILENAME */ \
/* verilator lint_off IMPLICIT */
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
/* verilator lint_on PINCONNECTEMPTY */ \
/* verilator lint_on WIDTH */ \
/* verilator lint_on UNOPTFLAT */ \
/* verilator lint_on UNDRIVEN */ \
/* verilator lint_on DECLFILENAME */
/* verilator lint_on DECLFILENAME */ \
/* verilator lint_on IMPLICIT */
`define UNUSED_VAR(x) always @(x) begin end
@ -39,9 +41,9 @@
`define STRINGIFY(x) `"x`"
`define STATIC_ASSERT(cond, msg) \
generate \
if (!(cond)) $error(msg); \
`define STATIC_ASSERT(cond, msg) \
generate \
if (!(cond)) $error msg; \
endgenerate
`define ENABLE_TRACING /* verilator tracing_on */
@ -49,8 +51,8 @@
///////////////////////////////////////////////////////////////////////////////
`define USE_FAST_BRAM (* syn_ramstyle = "mlab" *)
`define RELAX_BRAM_RW (* syn_ramstyle = "no_rw_check" *)
`define USE_FAST_BRAM (* ramstyle="mlab" *)
`define NO_RW_RAM_CHECK (* ramstyle="mlab, no_rw_check" *)
///////////////////////////////////////////////////////////////////////////////

View file

@ -3,398 +3,85 @@
`ifdef SCOPE
`define SCOPE_SIGNALS_DATA_LIST \
scope_dram_req_addr, \
scope_dram_req_rw, \
scope_dram_req_byteen, \
scope_dram_req_data, \
scope_dram_req_tag, \
scope_dram_rsp_data, \
scope_dram_rsp_tag, \
scope_snp_req_addr, \
scope_snp_req_invalidate, \
scope_snp_req_tag, \
scope_snp_rsp_tag, \
scope_icache_req_wid, \
scope_icache_req_addr, \
scope_icache_req_tag, \
scope_icache_rsp_data, \
scope_icache_rsp_tag, \
scope_dcache_req_wid, \
scope_dcache_req_pc, \
scope_dcache_req_addr, \
scope_dcache_req_rw, \
scope_dcache_req_byteen, \
scope_dcache_req_data, \
scope_dcache_req_tag, \
scope_dcache_rsp_data, \
scope_dcache_rsp_tag, \
scope_issue_wid, \
scope_issue_tmask, \
scope_issue_pc, \
scope_issue_ex_type, \
scope_issue_op_type, \
scope_issue_op_mod, \
scope_issue_wb, \
scope_issue_rd, \
scope_issue_rs1, \
scope_issue_rs2, \
scope_issue_rs3, \
scope_issue_imm, \
scope_issue_rs1_is_pc, \
scope_issue_rs2_is_imm, \
scope_gpr_rsp_wid, \
scope_gpr_rsp_pc, \
scope_gpr_rsp_a, \
scope_gpr_rsp_b, \
scope_gpr_rsp_c, \
scope_writeback_wid, \
scope_writeback_pc, \
scope_writeback_rd, \
scope_writeback_data, \
scope_bank_addr_st0, \
scope_bank_addr_st1, \
scope_bank_addr_st2, \
scope_bank_is_mrvq_st1, \
scope_bank_miss_st1, \
scope_bank_dirty_st1, \
scope_bank_force_miss_st1,
`define SCOPE_SIGNALS_UPD_LIST \
scope_dram_req_valid, \
scope_dram_req_ready, \
scope_dram_rsp_valid, \
scope_dram_rsp_ready, \
scope_snp_req_valid, \
scope_snp_req_ready, \
scope_snp_rsp_valid, \
scope_snp_rsp_ready, \
scope_icache_req_valid, \
scope_icache_req_ready, \
scope_icache_rsp_valid, \
scope_icache_rsp_ready, \
scope_dcache_req_valid, \
scope_dcache_req_ready, \
scope_dcache_rsp_valid, \
scope_dcache_rsp_ready, \
scope_bank_valid_st0, \
scope_bank_valid_st1, \
scope_bank_valid_st2, \
scope_bank_stall_pipe, \
scope_issue_valid, \
scope_issue_ready, \
scope_gpr_rsp_valid, \
scope_writeback_valid, \
scope_scoreboard_delay, \
scope_gpr_delay, \
scope_execute_delay, \
scope_busy
`include "scope-defs.vh"
`define SCOPE_SIGNALS_DECL \
wire scope_dram_req_valid; \
wire [31:0] scope_dram_req_addr; \
wire scope_dram_req_rw; \
wire [15:0] scope_dram_req_byteen; \
wire [127:0] scope_dram_req_data; \
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \
wire scope_dram_req_ready; \
wire scope_dram_rsp_valid; \
wire [127:0] scope_dram_rsp_data; \
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \
wire scope_dram_rsp_ready; \
wire scope_snp_req_valid; \
wire [31:0] scope_snp_req_addr; \
wire scope_snp_req_invalidate; \
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \
wire scope_snp_req_ready; \
wire scope_snp_rsp_valid; \
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \
wire scope_icache_req_valid; \
wire [`NW_BITS-1:0] scope_icache_req_wid; \
wire [31:0] scope_icache_req_addr; \
wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_req_tag; \
wire scope_icache_req_ready; \
wire scope_icache_rsp_valid; \
wire [31:0] scope_icache_rsp_data; \
wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_rsp_tag; \
wire scope_icache_rsp_ready; \
wire [`NUM_THREADS-1:0] scope_dcache_req_valid; \
wire [`NW_BITS-1:0] scope_dcache_req_wid; \
wire [31:0] scope_dcache_req_pc; \
wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_addr; \
wire scope_dcache_req_rw; \
wire [`NUM_THREADS-1:0][3:0] scope_dcache_req_byteen; \
wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_data; \
wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_req_tag; \
wire scope_dcache_req_ready; \
wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid; \
wire [`NUM_THREADS-1:0][31:0] scope_dcache_rsp_data; \
wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_rsp_tag; \
wire scope_dcache_rsp_ready; \
wire scope_snp_rsp_ready; \
wire [`NW_BITS-1:0] scope_issue_wid; \
wire [`NUM_THREADS-1:0] scope_issue_tmask; \
wire [31:0] scope_issue_pc; \
wire [`EX_BITS-1:0] scope_issue_ex_type; \
wire [`OP_BITS-1:0] scope_issue_op_type; \
wire [`MOD_BITS-1:0] scope_issue_op_mod; \
wire scope_issue_wb; \
wire [`NR_BITS-1:0] scope_issue_rd; \
wire [`NR_BITS-1:0] scope_issue_rs1; \
wire [`NR_BITS-1:0] scope_issue_rs2; \
wire [`NR_BITS-1:0] scope_issue_rs3; \
wire [31:0] scope_issue_imm; \
wire scope_issue_rs1_is_pc; \
wire scope_issue_rs2_is_imm; \
wire scope_gpr_rsp_valid; \
wire [`NW_BITS-1:0] scope_gpr_rsp_wid; \
wire [31:0] scope_gpr_rsp_pc; \
wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_a; \
wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_b; \
wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_c; \
wire scope_writeback_valid; \
wire [`NW_BITS-1:0] scope_writeback_wid; \
wire [31:0] scope_writeback_pc; \
wire [`NR_BITS-1:0] scope_writeback_rd; \
wire [`NUM_THREADS-1:0][31:0] scope_writeback_data; \
wire scope_bank_valid_st0; \
wire scope_bank_valid_st1; \
wire scope_bank_valid_st2; \
wire [31:0] scope_bank_addr_st0; \
wire [31:0] scope_bank_addr_st1; \
wire [31:0] scope_bank_addr_st2; \
wire scope_bank_is_mrvq_st1; \
wire scope_bank_miss_st1; \
wire scope_bank_dirty_st1; \
wire scope_bank_force_miss_st1; \
wire scope_bank_stall_pipe; \
wire scope_issue_valid; \
wire scope_issue_ready; \
wire scope_scoreboard_delay; \
wire scope_gpr_delay; \
wire scope_execute_delay; \
wire scope_busy;
`define SCOPE_ASSIGN(d,s) assign d = s
`define SCOPE_SIGNALS_ISTAGE_IO \
output wire scope_icache_req_valid, \
output wire [`NW_BITS-1:0] scope_icache_req_wid, \
output wire [31:0] scope_icache_req_addr, \
output wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_req_tag, \
output wire scope_icache_req_ready, \
output wire scope_icache_rsp_valid, \
output wire [31:0] scope_icache_rsp_data, \
output wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_rsp_tag, \
output wire scope_icache_rsp_ready,
`define SCOPE_SIGNALS_LSU_IO \
output wire [`NUM_THREADS-1:0] scope_dcache_req_valid, \
output wire [`NW_BITS-1:0] scope_dcache_req_wid, \
output wire [31:0] scope_dcache_req_pc, \
output wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_addr, \
output wire scope_dcache_req_rw, \
output wire [`NUM_THREADS-1:0][3:0] scope_dcache_req_byteen, \
output wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_data, \
output wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_req_tag, \
output wire scope_dcache_req_ready, \
output wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid, \
output wire [`NUM_THREADS-1:0][31:0] scope_dcache_rsp_data, \
output wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_rsp_tag, \
output wire scope_dcache_rsp_ready,
`define SCOPE_SIGNALS_CACHE_IO \
output wire scope_bank_valid_st0, \
output wire scope_bank_valid_st1, \
output wire scope_bank_valid_st2, \
output wire [31:0] scope_bank_addr_st0, \
output wire [31:0] scope_bank_addr_st1, \
output wire [31:0] scope_bank_addr_st2, \
output wire scope_bank_is_mrvq_st1, \
output wire scope_bank_miss_st1, \
output wire scope_bank_dirty_st1, \
output wire scope_bank_force_miss_st1, \
output wire scope_bank_stall_pipe,
`define SCOPE_SIGNALS_ISSUE_IO \
output wire scope_issue_valid, \
output wire [`NW_BITS-1:0] scope_issue_wid, \
output wire [`NUM_THREADS-1:0] scope_issue_tmask, \
output wire [31:0] scope_issue_pc, \
output wire [`EX_BITS-1:0] scope_issue_ex_type, \
output wire [`OP_BITS-1:0] scope_issue_op_type, \
output wire [`MOD_BITS-1:0] scope_issue_op_mod, \
output wire scope_issue_wb, \
output wire [`NR_BITS-1:0] scope_issue_rd, \
output wire [`NR_BITS-1:0] scope_issue_rs1, \
output wire [`NR_BITS-1:0] scope_issue_rs2, \
output wire [`NR_BITS-1:0] scope_issue_rs3, \
output wire [31:0] scope_issue_imm, \
output wire scope_issue_rs1_is_pc, \
output wire scope_issue_rs2_is_imm, \
output wire scope_writeback_valid, \
output wire scope_gpr_rsp_valid, \
output wire [`NW_BITS-1:0] scope_gpr_rsp_wid, \
output wire [31:0] scope_gpr_rsp_pc, \
output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_a, \
output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_b, \
output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_c, \
output wire [`NW_BITS-1:0] scope_writeback_wid, \
output wire [31:0] scope_writeback_pc, \
output wire [`NR_BITS-1:0] scope_writeback_rd, \
output wire [`NUM_THREADS-1:0][31:0] scope_writeback_data, \
output wire scope_issue_ready, \
output wire scope_scoreboard_delay, \
output wire scope_gpr_delay, \
output wire scope_execute_delay,
`define SCOPE_SIGNALS_EXECUTE_IO
`define SCOPE_SIGNALS_ISTAGE_BIND \
.scope_icache_req_valid (scope_icache_req_valid), \
.scope_icache_req_wid (scope_icache_req_wid), \
.scope_icache_req_addr (scope_icache_req_addr), \
.scope_icache_req_tag (scope_icache_req_tag), \
.scope_icache_req_ready (scope_icache_req_ready), \
.scope_icache_rsp_valid (scope_icache_rsp_valid), \
.scope_icache_rsp_data (scope_icache_rsp_data), \
.scope_icache_rsp_tag (scope_icache_rsp_tag), \
.scope_icache_rsp_ready (scope_icache_rsp_ready),
`define SCOPE_SIGNALS_LSU_BIND \
.scope_dcache_req_valid (scope_dcache_req_valid), \
.scope_dcache_req_wid (scope_dcache_req_wid), \
.scope_dcache_req_pc (scope_dcache_req_pc), \
.scope_dcache_req_addr (scope_dcache_req_addr), \
.scope_dcache_req_rw (scope_dcache_req_rw), \
.scope_dcache_req_byteen(scope_dcache_req_byteen), \
.scope_dcache_req_data (scope_dcache_req_data), \
.scope_dcache_req_tag (scope_dcache_req_tag), \
.scope_dcache_req_ready (scope_dcache_req_ready), \
.scope_dcache_rsp_valid (scope_dcache_rsp_valid), \
.scope_dcache_rsp_data (scope_dcache_rsp_data), \
.scope_dcache_rsp_tag (scope_dcache_rsp_tag), \
.scope_dcache_rsp_ready (scope_dcache_rsp_ready),
`define SCOPE_SIGNALS_CACHE_BIND \
.scope_bank_valid_st0 (scope_bank_valid_st0), \
.scope_bank_valid_st1 (scope_bank_valid_st1), \
.scope_bank_valid_st2 (scope_bank_valid_st2), \
.scope_bank_addr_st0 (scope_bank_addr_st0), \
.scope_bank_addr_st1 (scope_bank_addr_st1), \
.scope_bank_addr_st2 (scope_bank_addr_st2), \
.scope_bank_is_mrvq_st1 (scope_bank_is_mrvq_st1), \
.scope_bank_miss_st1 (scope_bank_miss_st1), \
.scope_bank_dirty_st1 (scope_bank_dirty_st1), \
.scope_bank_force_miss_st1(scope_bank_force_miss_st1), \
.scope_bank_stall_pipe (scope_bank_stall_pipe),
`define SCOPE_SIGNALS_CACHE_UNBIND \
/* verilator lint_off PINCONNECTEMPTY */ \
.scope_bank_valid_st0 (), \
.scope_bank_valid_st1 (), \
.scope_bank_valid_st2 (), \
.scope_bank_addr_st0 (), \
.scope_bank_addr_st1 (), \
.scope_bank_addr_st2 (), \
.scope_bank_is_mrvq_st1 (), \
.scope_bank_miss_st1 (), \
.scope_bank_dirty_st1 (), \
.scope_bank_force_miss_st1 (), \
.scope_bank_stall_pipe (), \
/* verilator lint_on PINCONNECTEMPTY */
`define SCOPE_SIGNALS_CACHE_BANK_SELECT \
/* verilator lint_off UNUSED */ \
wire [NUM_BANKS-1:0] scope_per_bank_valid_st0; \
wire [NUM_BANKS-1:0] scope_per_bank_valid_st1; \
wire [NUM_BANKS-1:0] scope_per_bank_valid_st2; \
wire [NUM_BANKS-1:0][31:0] scope_per_bank_addr_st0; \
wire [NUM_BANKS-1:0][31:0] scope_per_bank_addr_st1; \
wire [NUM_BANKS-1:0][31:0] scope_per_bank_addr_st2; \
wire [NUM_BANKS-1:0] scope_per_bank_is_mrvq_st1; \
wire [NUM_BANKS-1:0] scope_per_bank_miss_st1; \
wire [NUM_BANKS-1:0] scope_per_bank_dirty_st1; \
wire [NUM_BANKS-1:0] scope_per_bank_force_miss_st1; \
wire [NUM_BANKS-1:0] scope_per_bank_stall_pipe; \
/* verilator lint_on UNUSED */ \
assign scope_bank_valid_st0 = scope_per_bank_valid_st0[0]; \
assign scope_bank_valid_st1 = scope_per_bank_valid_st1[0]; \
assign scope_bank_valid_st2 = scope_per_bank_valid_st2[0]; \
assign scope_bank_addr_st0 = scope_per_bank_addr_st0[0]; \
assign scope_bank_addr_st1 = scope_per_bank_addr_st1[0]; \
assign scope_bank_addr_st2 = scope_per_bank_addr_st2[0]; \
assign scope_bank_is_mrvq_st1 = scope_per_bank_is_mrvq_st1[0]; \
assign scope_bank_miss_st1 = scope_per_bank_miss_st1[0]; \
assign scope_bank_dirty_st1 = scope_per_bank_dirty_st1[0]; \
assign scope_bank_force_miss_st1 = scope_per_bank_force_miss_st1[0]; \
assign scope_bank_stall_pipe = scope_per_bank_stall_pipe[0];
`define SCOPE_SIGNALS_CACHE_BANK_BIND \
.scope_bank_valid_st0 (scope_per_bank_valid_st0[i]), \
.scope_bank_valid_st1 (scope_per_bank_valid_st1[i]), \
.scope_bank_valid_st2 (scope_per_bank_valid_st2[i]), \
.scope_bank_addr_st0 (scope_per_bank_addr_st0[i]), \
.scope_bank_addr_st1 (scope_per_bank_addr_st1[i]), \
.scope_bank_addr_st2 (scope_per_bank_addr_st2[i]), \
.scope_bank_is_mrvq_st1 (scope_per_bank_is_mrvq_st1[i]), \
.scope_bank_miss_st1 (scope_per_bank_miss_st1[i]), \
.scope_bank_dirty_st1 (scope_per_bank_dirty_st1[i]), \
.scope_bank_force_miss_st1 (scope_per_bank_force_miss_st1[i]), \
.scope_bank_stall_pipe (scope_per_bank_stall_pipe[i]),
`define SCOPE_SIGNALS_ISSUE_BIND \
.scope_issue_valid (scope_issue_valid), \
.scope_issue_wid (scope_issue_wid), \
.scope_issue_tmask (scope_issue_tmask), \
.scope_issue_pc (scope_issue_pc), \
.scope_issue_ex_type (scope_issue_ex_type), \
.scope_issue_op_type (scope_issue_op_type), \
.scope_issue_op_mod (scope_issue_op_mod), \
.scope_issue_wb (scope_issue_wb), \
.scope_issue_rd (scope_issue_rd), \
.scope_issue_rs1 (scope_issue_rs1), \
.scope_issue_rs2 (scope_issue_rs2), \
.scope_issue_rs3 (scope_issue_rs3), \
.scope_issue_imm (scope_issue_imm), \
.scope_issue_rs1_is_pc (scope_issue_rs1_is_pc), \
.scope_issue_rs2_is_imm (scope_issue_rs2_is_imm), \
.scope_writeback_valid (scope_writeback_valid), \
.scope_writeback_wid (scope_writeback_wid), \
.scope_writeback_pc (scope_writeback_pc), \
.scope_writeback_rd (scope_writeback_rd), \
.scope_writeback_data (scope_writeback_data), \
.scope_issue_ready (scope_issue_ready), \
.scope_gpr_rsp_valid (scope_gpr_rsp_valid), \
.scope_gpr_rsp_wid (scope_gpr_rsp_wid), \
.scope_gpr_rsp_pc (scope_gpr_rsp_pc), \
.scope_gpr_rsp_a (scope_gpr_rsp_a), \
.scope_gpr_rsp_b (scope_gpr_rsp_b), \
.scope_gpr_rsp_c (scope_gpr_rsp_c), \
.scope_scoreboard_delay (scope_scoreboard_delay), \
.scope_gpr_delay (scope_gpr_delay), \
.scope_execute_delay (scope_execute_delay), \
`define SCOPE_SIGNALS_EXECUTE_BIND
`define SCOPE_ASSIGN(d,s) assign d = s
`else
`define SCOPE_SIGNALS_ISTAGE_IO
`define SCOPE_SIGNALS_LSU_IO
`define SCOPE_SIGNALS_CACHE_IO
`define SCOPE_SIGNALS_ISSUE_IO
`define SCOPE_SIGNALS_EXECUTE_IO
`define SCOPE_SIGNALS_ISTAGE_BIND
`define SCOPE_SIGNALS_LSU_BIND
`define SCOPE_SIGNALS_CACHE_BIND
`define SCOPE_SIGNALS_ISSUE_BIND
`define SCOPE_SIGNALS_EXECUTE_BIND
`define SCOPE_SIGNALS_CACHE_UNBIND
`define SCOPE_SIGNALS_CACHE_BANK_SELECT
`define SCOPE_SIGNALS_CACHE_BANK_BIND
`define SCOPE_ASSIGN(d,s)
`define SCOPE_IO_VX_icache_stage
`define SCOPE_IO_VX_fetch
`define SCOPE_BIND_VX_fetch_icache_stage
`define SCOPE_BIND_VX_fetch_warp_sched
`define SCOPE_IO_VX_warp_sched
`define SCOPE_IO_VX_pipeline
`define SCOPE_BIND_VX_pipeline_fetch
`define SCOPE_IO_VX_core
`define SCOPE_BIND_VX_core_pipeline
`define SCOPE_IO_VX_cluster
`define SCOPE_BIND_VX_cluster_core(__i__)
`define SCOPE_IO_Vortex
`define SCOPE_BIND_Vortex_cluster(__i__)
`define SCOPE_BIND_top_vortex
`define SCOPE_IO_VX_lsu_unit
`define SCOPE_IO_VX_gpu_unit
`define SCOPE_IO_VX_execute
`define SCOPE_BIND_VX_execute_lsu_unit
`define SCOPE_BIND_VX_execute_gpu_unit
`define SCOPE_BIND_VX_pipeline_execute
`define SCOPE_IO_VX_issue
`define SCOPE_BIND_VX_pipeline_issue
`define SCOPE_IO_VX_bank
`define SCOPE_IO_VX_cache
`define SCOPE_BIND_VX_cache_bank(__i__)
`define SCOPE_BIND_Vortex_l3cache
`define SCOPE_BIND_VX_cluster_l2cache
`define SCOPE_IO_VX_mem_unit
`define SCOPE_BIND_VX_mem_unit_dcache
`define SCOPE_BIND_VX_core_mem_unit
`define SCOPE_BIND_VX_mem_unit_icache
`define SCOPE_BIND_VX_mem_unit_smem
`define SCOPE_DECL_SIGNALS
`define SCOPE_DATA_LIST
`define SCOPE_UPDATE_LIST
`define SCOPE_TRIGGER
`define SCOPE_ASSIGN(d,s)
`endif
// VX_SCOPE
`endif

View file

@ -28,12 +28,16 @@ typedef struct packed {
logic [`NUM_THREADS-1:0] tmask;
} gpu_tmc_t;
`define GPU_TMC_SIZE (1+`NUM_THREADS)
typedef struct packed {
logic valid;
logic [`NUM_WARPS-1:0] wmask;
logic [31:0] pc;
} gpu_wspawn_t;
`define GPU_WSPAWN_SIZE (1+`NUM_WARPS+32)
typedef struct packed {
logic valid;
logic diverged;
@ -42,10 +46,14 @@ typedef struct packed {
logic [31:0] pc;
} gpu_split_t;
`define GPU_SPLIT_SIZE (1+1+`NUM_THREADS+`NUM_THREADS+32)
typedef struct packed {
logic valid;
logic [`NB_BITS-1:0] id;
logic [`NW_BITS-1:0] size_m1;
} gpu_barrier_t;
`define GPU_BARRIER_SIZE (1+`NB_BITS+`NB_BITS)
`endif

View file

@ -3,6 +3,8 @@
module VX_warp_sched #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_warp_sched
input wire clk,
input wire reset,
@ -248,4 +250,11 @@ module VX_warp_sched #(
assign busy = (active_warps != 0);
`SCOPE_ASSIGN (scope_wsched_scheduled_warp, scheduled_warp);
`SCOPE_ASSIGN (scope_wsched_active_warps, active_warps);
`SCOPE_ASSIGN (scope_wsched_schedule_table, schedule_table);
`SCOPE_ASSIGN (scope_wsched_schedule_ready, schedule_ready);
`SCOPE_ASSIGN (scope_wsched_warp_to_schedule, warp_to_schedule);
`SCOPE_ASSIGN (scope_wsched_warp_pc, warp_pc);
endmodule

View file

@ -25,6 +25,7 @@ module VX_writeback #(
wire wb_valid;
wire [`NW_BITS-1:0] wb_wid;
wire [31:0] wb_PC;
wire [`NUM_THREADS-1:0] wb_tmask;
wire [`NR_BITS-1:0] wb_rd;
wire [`NUM_THREADS-1:0][31:0] wb_data;
@ -42,6 +43,13 @@ module VX_writeback #(
mul_valid ? mul_commit_if.wid :
fpu_valid ? fpu_commit_if.wid :
0;
assign wb_PC = alu_valid ? alu_commit_if.PC :
lsu_valid ? lsu_commit_if.PC :
csr_valid ? csr_commit_if.PC :
mul_valid ? mul_commit_if.PC :
fpu_valid ? fpu_commit_if.PC :
0;
assign wb_tmask = alu_valid ? alu_commit_if.tmask :
lsu_valid ? lsu_commit_if.tmask :
@ -68,16 +76,16 @@ module VX_writeback #(
wire stall = 0/*~writeback_if.ready && writeback_if.valid*/;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))
) wb_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({wb_valid, wb_wid, wb_tmask, wb_rd, wb_data}),
.out ({writeback_if.valid, writeback_if.wid, writeback_if.tmask, writeback_if.rd, writeback_if.data})
.in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}),
.out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data})
);
assign alu_commit_if.ready = !stall;
assign lsu_commit_if.ready = !stall && !alu_valid;
assign csr_commit_if.ready = !stall && !alu_valid && !lsu_valid;

View file

@ -1,11 +1,7 @@
`include "VX_define.vh"
module Vortex (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
`SCOPE_IO_Vortex
// Clock
input wire clk,
@ -75,11 +71,7 @@ module Vortex (
VX_cluster #(
.CLUSTER_ID(0)
) cluster (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
`SCOPE_BIND_Vortex_cluster(0)
.clk (clk),
.reset (reset),
@ -193,11 +185,7 @@ module Vortex (
VX_cluster #(
.CLUSTER_ID(i)
) cluster (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
`SCOPE_BIND_Vortex_cluster(i)
.clk (clk),
.reset (reset),
@ -384,7 +372,7 @@ module Vortex (
assign l3_core_rsp_ready = (& per_cluster_dram_rsp_ready);
VX_cache #(
.CACHE_ID (0),
.CACHE_ID (`L3CACHE_ID),
.CACHE_SIZE (`L3CACHE_SIZE),
.BANK_LINE_SIZE (`L3BANK_LINE_SIZE),
.NUM_BANKS (`L3NUM_BANKS),
@ -407,7 +395,7 @@ module Vortex (
.SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH)
) l3cache (
`SCOPE_SIGNALS_CACHE_UNBIND
`SCOPE_BIND_Vortex_l3cache
.clk (clk),
.reset (reset),

View file

@ -50,7 +50,7 @@ module VX_bank #(
// Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 0
) (
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_IO_VX_bank
input wire clk,
input wire reset,
@ -146,7 +146,7 @@ module VX_bank #(
) snp_req_queue (
.clk (clk),
.reset (reset),
.push (snp_req_valid),
.push (snp_req_valid && snp_req_ready),
.data_in ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
.pop (snrq_pop),
.data_out({snrq_addr_st0, snrq_invalidate_st0, snrq_tag_st0}),
@ -169,7 +169,7 @@ module VX_bank #(
) dfp_queue (
.clk (clk),
.reset (reset),
.push (dram_fill_rsp_valid),
.push (dram_fill_rsp_valid && dram_fill_rsp_ready),
.data_in ({dram_fill_rsp_addr, dram_fill_rsp_data}),
.pop (dfpq_pop),
.data_out({dfpq_addr_st0, dfpq_filldata_st0}),
@ -266,7 +266,9 @@ module VX_bank #(
`DEBUG_BEGIN
wire going_to_write_st1;
`DEBUG_END
//determines if the if it is time to pop a req from the queues
//unqual - the req does NOT qualify for execution in the bank.
wire mrvq_pop_unqual = mrvq_valid_st0;
wire dfpq_pop_unqual = !mrvq_pop_unqual && !dfpq_empty;
wire reqq_pop_unqual = !mrvq_stop && !mrvq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && reqq_req_st0 && !is_fill_st1 && !is_fill_st1;
@ -276,7 +278,8 @@ module VX_bank #(
assign dfpq_pop = dfpq_pop_unqual && !stall_bank_pipe;
assign reqq_pop = reqq_pop_unqual && !stall_bank_pipe;
assign snrq_pop = snrq_pop_unqual && !stall_bank_pipe;
//signals to progress to the next stage
wire qual_is_fill_st0;
wire qual_valid_st0;
wire [`LINE_ADDR_WIDTH-1:0] qual_addr_st0;
@ -289,7 +292,8 @@ module VX_bank #(
wire qual_going_to_write_st0;
wire qual_is_snp_st0;
wire qual_snp_invalidate_st0;
//signals to be *used* in the next stage
wire valid_st1;
wire [`LINE_ADDR_WIDTH-1:0] addr_st1;
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1;
@ -300,15 +304,19 @@ module VX_bank #(
wire snp_invalidate_st1;
wire is_mrvq_st1;
assign qual_is_fill_st0 = dfpq_pop_unqual;
//Determine which req will progress to the next stage
assign qual_is_fill_st0 = dfpq_pop_unqual; //dram is filling a request
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop;
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; //valid if something is being popped
assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 :
mrvq_pop_unqual ? mrvq_addr_st0 :
//Decides which request to deal with. Priority: 1) Miss reserve 2) DRAM fill 3) Core req 4) Snp req
assign qual_addr_st0 = mrvq_pop_unqual ? mrvq_addr_st0 :
dfpq_pop_unqual ? dfpq_addr_st0 :
reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] :
snrq_pop_unqual ? snrq_addr_st0 :
0;
//Word select does ? Does this just pick a specific word from the line instead of the whole line?
if (`WORD_SELECT_WIDTH != 0) begin
assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] :
mrvq_pop_unqual ? mrvq_wsel_st0 :
@ -318,30 +326,35 @@ module VX_bank #(
assign qual_wsel_st0 = 0;
end
//if you are filling from dram then that is the write data? What about core? What is 57?
assign qual_writedata_st0 = dfpq_pop_unqual ? dfpq_filldata_st0 : 57;
//note that this is stored even if a DRAM fill is processed
assign qual_inst_meta_st0 = mrvq_pop_unqual ? {`REQ_TAG_WIDTH'(mrvq_tag_st0) , mrvq_rw_st0, mrvq_byteen_st0, mrvq_tid_st0} :
reqq_pop_unqual ? {`REQ_TAG_WIDTH'(reqq_req_tag_st0), reqq_req_rw_st0, reqq_req_byteen_st0, reqq_req_tid_st0} :
snrq_pop_unqual ? {`REQ_TAG_WIDTH'(snrq_tag_st0), 1'b0, WORD_SIZE'(0), `REQS_BITS'(0)} :
0;
assign qual_going_to_write_st0 = dfpq_pop_unqual ? 1 :
(mrvq_pop_unqual && mrvq_rw_st0) ? 1 :
(reqq_pop_unqual && reqq_req_rw_st0) ? 1 :
0;
//snp signals check to see if the miss reserve as a snp in it first.
assign qual_is_snp_st0 = mrvq_pop_unqual ? mrvq_is_snp_st0 :
snrq_pop_unqual ? 1 :
0;
//if we are popping from the miss reserve then assign to the mrvq invalidate. If not and popping from the snoop queue use the snoop invalidate. Else this is 0
assign qual_snp_invalidate_st0 = mrvq_pop_unqual ? mrvq_snp_invalidate_st0 :
snrq_pop_unqual ? snrq_invalidate_st0 :
0;
//choose which word of the lien is being written to
assign qual_writeword_st0 = mrvq_pop_unqual ? mrvq_writeword_st0 :
reqq_pop_unqual ? reqq_req_writeword_st0 :
0;
assign qual_is_mrvq_st0 = mrvq_pop_unqual;
`ifdef DBG_CORE_REQ_INFO
@ -356,7 +369,7 @@ module VX_bank #(
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.flush (0),
.flush (1'b0),
.in ({qual_is_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}),
.out ({is_mrvq_st1 , is_snp_st1, snp_invalidate_st1, going_to_write_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
);
@ -453,6 +466,8 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
end else begin
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = 0;
end
`endif
@ -486,7 +501,7 @@ module VX_bank #(
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.flush (0),
.flush (1'b0),
.in ({mrvq_recover_ready_state_st1, is_mrvq_st1_st2, mrvq_init_ready_state_st1, snp_to_mrvq_st1, is_snp_st1, snp_invalidate_st1, fill_saw_dirty_st1, is_fill_st1, qual_valid_st1_2, addr_st1, wsel_st1, writeword_st1, readword_st1, readdata_st1, readtag_st1, miss_st1, dirty_st1, dirtyb_st1, inst_meta_st1}),
.out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2})
);
@ -728,18 +743,18 @@ module VX_bank #(
end
`endif
`SCOPE_ASSIGN (scope_bank_valid_st0, qual_valid_st0);
`SCOPE_ASSIGN (scope_bank_valid_st1, valid_st1);
`SCOPE_ASSIGN (scope_bank_valid_st2, valid_st2);
`SCOPE_ASSIGN (scope_valid_st0, qual_valid_st0);
`SCOPE_ASSIGN (scope_valid_st1, valid_st1);
`SCOPE_ASSIGN (scope_valid_st2, valid_st2);
`SCOPE_ASSIGN (scope_bank_is_mrvq_st1, is_mrvq_st1);
`SCOPE_ASSIGN (scope_bank_miss_st1, miss_st1);
`SCOPE_ASSIGN (scope_bank_dirty_st1, dirty_st1);
`SCOPE_ASSIGN (scope_bank_force_miss_st1, force_request_miss_st1);
`SCOPE_ASSIGN (scope_bank_stall_pipe, stall_bank_pipe);
`SCOPE_ASSIGN (scope_is_mrvq_st1, is_mrvq_st1);
`SCOPE_ASSIGN (scope_miss_st1, miss_st1);
`SCOPE_ASSIGN (scope_dirty_st1, dirty_st1);
`SCOPE_ASSIGN (scope_force_miss_st1, force_request_miss_st1);
`SCOPE_ASSIGN (scope_stall_pipe, stall_bank_pipe);
`SCOPE_ASSIGN (scope_bank_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID));
`SCOPE_ASSIGN (scope_bank_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
`SCOPE_ASSIGN (scope_bank_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
`SCOPE_ASSIGN (scope_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID));
`SCOPE_ASSIGN (scope_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
`SCOPE_ASSIGN (scope_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
endmodule

View file

@ -51,15 +51,15 @@ module VX_cache #(
parameter DRAM_TAG_WIDTH = 28,
// Number of snoop forwarding requests
parameter NUM_SNP_REQUESTS = 2,
parameter NUM_SNP_REQUESTS = 1,
// Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 28,
parameter SNP_REQ_TAG_WIDTH = 1,
// Snooping forward tag width
parameter SNP_FWD_TAG_WIDTH = 1
) (
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_IO_VX_cache
input wire clk,
input wire reset,
@ -167,7 +167,7 @@ module VX_cache #(
wire [NUM_BANKS-1:0] per_bank_miss;
assign miss_vec = per_bank_miss;
`SCOPE_SIGNALS_CACHE_BANK_SELECT
wire snp_req_valid_qual;
wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr_qual;
@ -376,7 +376,7 @@ module VX_cache #(
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
) bank (
`SCOPE_SIGNALS_CACHE_BANK_BIND
`SCOPE_BIND_VX_cache_bank(i)
.clk (clk),
.reset (reset),

View file

@ -2,7 +2,6 @@
`define VX_CACHE_CONFIG
`include "VX_platform.vh"
`include "VX_scope.vh"
`ifdef DBG_CORE_REQ_INFO
`include "VX_define.vh"

View file

@ -91,7 +91,7 @@ module VX_cache_core_rsp_merge #(
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.flush (1'b0),
.in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
);

View file

@ -56,8 +56,9 @@ module VX_cache_miss_resrv #(
output wire miss_resrv_is_snp_st0,
output wire miss_resrv_snp_invalidate_st0
);
reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0];
wire [`MRVQ_METADATA_WIDTH-1:0] metadata_table;
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
reg [MRVQ_SIZE-1:0] valid_table;
reg [MRVQ_SIZE-1:0] ready_table;
reg [`LOG2UP(MRVQ_SIZE)-1:0] schedule_ptr;
@ -66,13 +67,13 @@ module VX_cache_miss_resrv #(
reg [`LOG2UP(MRVQ_SIZE+1)-1:0] size;
`STATIC_ASSERT(MRVQ_SIZE > 5, "invalid size")
`STATIC_ASSERT(MRVQ_SIZE > 5, ("invalid size"))
assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE));
assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); // need to add 5 cycles to prevent pipeline lock
wire enqueue_possible = !miss_resrv_full;
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
wire enqueue_possible = !miss_resrv_full;
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
reg [MRVQ_SIZE-1:0] make_ready;
reg [MRVQ_SIZE-1:0] make_ready_push;
@ -85,11 +86,11 @@ module VX_cache_miss_resrv #(
assign pending_hazard_st1 = |(valid_address_match);
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr;
assign miss_resrv_valid_st0 = dequeue_possible;
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
assign {miss_resrv_data_st0,
miss_resrv_tid_st0,
miss_resrv_tag_st0,
@ -97,7 +98,7 @@ module VX_cache_miss_resrv #(
miss_resrv_byteen_st0,
miss_resrv_wsel_st0,
miss_resrv_is_snp_st0,
miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index];
miss_resrv_snp_invalidate_st0} = metadata_table;
wire mrvq_push = miss_add && enqueue_possible && !is_mrvq;
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
@ -124,13 +125,12 @@ module VX_cache_miss_resrv #(
valid_table[enqueue_index] <= 1;
ready_table[enqueue_index] <= mrvq_init_ready_state;
addr_table[enqueue_index] <= miss_add_addr;
metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate};
tail_ptr <= tail_ptr + 1;
tail_ptr <= tail_ptr + $bits(tail_ptr)'(1);
end else if (increment_head) begin
valid_table[head_ptr] <= 0;
head_ptr <= head_ptr + 1;
head_ptr <= head_ptr + $bits(head_ptr)'(1);
end else if (recover_state) begin
schedule_ptr <= schedule_ptr - 1;
schedule_ptr <= schedule_ptr - $bits(schedule_ptr)'(1);
end
// update entry as 'ready' during DRAM fill response
@ -140,20 +140,36 @@ module VX_cache_miss_resrv #(
if (mrvq_pop) begin
ready_table[dequeue_index] <= 0;
schedule_ptr <= schedule_ptr + 1;
schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1);
end
if (!(mrvq_push && increment_head)) begin
if (mrvq_push) begin
size <= size + 1;
size <= size + $bits(size)'(1);
end
if (increment_head) begin
size <= size - 1;
size <= size - $bits(size)'(1);
end
end
end
end
VX_dp_ram #(
.DATAW(`MRVQ_METADATA_WIDTH),
.SIZE(MRVQ_SIZE),
.BYTEENW(1),
.BUFFERED(0),
.RWCHECK(1)
) metadata_ram (
.clk(clk),
.waddr(enqueue_index),
.raddr(dequeue_index),
.wren(mrvq_push),
.rden(1'b1),
.din({miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}),
.dout(metadata_table)
);
`ifdef DBG_PRINT_CACHE_MSRQ
always @(posedge clk) begin
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin

View file

@ -37,7 +37,7 @@ module VX_snp_forwarder #(
input wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdin_tag,
output wire [NUM_REQUESTS-1:0] snp_fwdin_ready
);
`STATIC_ASSERT(NUM_REQUESTS > 1, "invalid value")
`STATIC_ASSERT(NUM_REQUESTS > 1, ("invalid value"))
reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];

View file

@ -183,15 +183,15 @@ module VX_tag_data_access #(
if (valid_req_st1) begin
if ((| use_write_enable)) begin
if (writefill_st1) begin
$display("%t: cache%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
$display("%t: cache%0d:%0d data-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
end else begin
$display("%t: cache%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
$display("%t: cache%0d:%0d data-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
end
end else
if (miss_st1) begin
$display("%t: cache%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
$display("%t: cache%0d:%0d data-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
end else begin
$display("%t: cache%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
$display("%t: cache%0d:%0d data-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
end
end
end

View file

@ -6,7 +6,7 @@ module VX_tag_data_store #(
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 0,
parameter NUM_BANKS = 0, //unused parameter?
// Size of a word in bytes
parameter WORD_SIZE = 0
) (
@ -30,7 +30,6 @@ module VX_tag_data_store #(
input wire fill_sent
);
reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0][7:0] data [`BANK_LINE_COUNT-1:0];
reg [`TAG_SELECT_BITS-1:0] tag [`BANK_LINE_COUNT-1:0];
reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0];
reg [`BANK_LINE_COUNT-1:0] dirty;
@ -40,8 +39,7 @@ module VX_tag_data_store #(
assign read_dirty = dirty [read_addr];
assign read_dirtyb = dirtyb [read_addr];
assign read_tag = tag [read_addr];
assign read_data = data [read_addr];
wire do_write = (| write_enable);
always @(posedge clk) begin
@ -69,15 +67,26 @@ module VX_tag_data_store #(
if (invalidate) begin
valid[write_addr] <= 0;
end
for (integer j = 0; j < `BANK_LINE_WORDS; j++) begin
for (integer i = 0; i < WORD_SIZE; i++) begin
if (write_enable[j][i]) begin
data[write_addr][j][i] <= write_data[j * `WORD_WIDTH + i * 8 +: 8];
end
end
end
end
end
endmodule
wire [(`BANK_LINE_WORDS * WORD_SIZE)-1:0] ram_wren;
assign ram_wren = write_enable & {(`BANK_LINE_WORDS * WORD_SIZE){!stall_bank_pipe}};
VX_dp_ram #(
.DATAW(`BANK_LINE_WORDS * WORD_SIZE * 8),
.SIZE(`BANK_LINE_COUNT),
.BYTEENW(`BANK_LINE_WORDS * WORD_SIZE),
.BUFFERED(0),
.RWCHECK(1)
) dp_ram (
.clk(clk),
.waddr(write_addr),
.raddr(read_addr),
.wren(ram_wren),
.rden(1'b1),
.din(write_data),
.dout(read_data)
);
endmodule

145
hw/rtl/libs/VX_dp_ram.v Normal file
View file

@ -0,0 +1,145 @@
`include "VX_platform.vh"
module VX_dp_ram #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter BYTEENW = 1,
parameter BUFFERED = 1,
parameter RWCHECK = 1,
parameter RWBYPASS = 0,
parameter ADDRW = $clog2(SIZE),
parameter SIZEW = $clog2(SIZE+1)
) (
input wire clk,
input wire [ADDRW-1:0] waddr,
input wire [ADDRW-1:0] raddr,
input wire [BYTEENW-1:0] wren,
input wire rden,
input wire [DATAW-1:0] din,
output wire [DATAW-1:0] dout
);
if (BUFFERED) begin
reg [DATAW-1:0] mem [SIZE-1:0];
reg [DATAW-1:0] dout_r;
if (BYTEENW > 1) begin
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
end
end
end else begin
always @(posedge clk) begin
if (wren)
mem[waddr] <= din;
end
end
always @(posedge clk) begin
if (rden)
dout_r <= mem[raddr];
end
if (RWBYPASS) begin
reg [DATAW-1:0] din_r;
wire writing;
if (BYTEENW > 1) begin
assign writing = (| wren);
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8];
end
end
end else begin
assign writing = wren;
always @(posedge clk) begin
din_r <= din;
end
end
reg bypass_r;
always @(posedge clk) begin
bypass_r <= writing && (raddr == waddr);
end
assign dout = bypass_r ? din_r : dout_r;
end else begin
assign dout = dout_r;
end
end else begin
`UNUSED_VAR(rden)
if (RWCHECK) begin
reg [DATAW-1:0] mem [SIZE-1:0];
if (BYTEENW > 1) begin
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
end
end
end else begin
always @(posedge clk) begin
if (wren)
mem[waddr] <= din;
end
end
if (RWBYPASS) begin
reg [DATAW-1:0] din_r;
wire writing;
if (BYTEENW > 1) begin
assign writing = (| wren);
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8];
end
end
end else begin
assign writing = wren;
always @(posedge clk) begin
din_r <= din;
end
end
reg bypass_r;
always @(posedge clk) begin
bypass_r <= writing && (raddr == waddr);
end
assign dout = bypass_r ? din_r : mem[raddr];
end else begin
assign dout = mem[raddr];
end
end else begin
`NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
if (BYTEENW > 1) begin
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
end
end
end else begin
always @(posedge clk) begin
if (wren)
mem[waddr] <= din;
end
end
assign dout = mem[raddr];
end
end
endmodule

View file

@ -3,7 +3,7 @@
module VX_generic_queue #(
parameter DATAW = 1,
parameter SIZE = 2,
parameter BUFFERED = 0,
parameter BUFFERED = 1,
parameter ADDRW = $clog2(SIZE),
parameter SIZEW = $clog2(SIZE+1)
) (
@ -17,30 +17,26 @@ module VX_generic_queue #(
output wire full,
output wire [SIZEW-1:0] size
);
`STATIC_ASSERT(`ISPOW2(SIZE), "must be 0 or power of 2!")
reg [SIZEW-1:0] size_r;
wire reading;
wire writing;
assign reading = pop && !empty;
assign writing = push && !full;
if (SIZE == 1) begin // (SIZE == 1)
`STATIC_ASSERT(`ISPOW2(SIZE), ("must be 0 or power of 2!"))
if (SIZE == 1) begin
reg [DATAW-1:0] head_r;
reg size_r;
always @(posedge clk) begin
if (reset) begin
head_r <= 0;
size_r <= 0;
end else begin
if (writing && !reading) begin
if (push && !pop) begin
assert(!full);
size_r <= 1;
end else if (reading && !writing) begin
end else if (pop && !push) begin
assert(!empty);
size_r <= 0;
end
if (writing) begin
if (push) begin
head_r <= data_in;
end
end
@ -51,15 +47,14 @@ module VX_generic_queue #(
assign full = (size_r != 0);
assign size = size_r;
end else begin // (SIZE > 1)
end else begin
`USE_FAST_BRAM reg [DATAW-1:0] data [SIZE-1:0];
if (0 == BUFFERED) begin
if (0 == BUFFERED) begin
reg [ADDRW:0] rd_ptr_r;
reg [ADDRW:0] wr_ptr_r;
reg [ADDRW-1:0] used_r;
wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[ADDRW-1:0];
wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[ADDRW-1:0];
@ -67,96 +62,126 @@ module VX_generic_queue #(
if (reset) begin
rd_ptr_r <= 0;
wr_ptr_r <= 0;
size_r <= 0;
used_r <= 0;
end else begin
if (writing) begin
data[wr_ptr_a] <= data_in;
wr_ptr_r <= wr_ptr_r + 1;
if (!reading) begin
size_r <= size_r + 1;
if (push) begin
assert(!full);
wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(1);
if (!pop) begin
used_r <= used_r + ADDRW'(1);
end
end
if (reading) begin
rd_ptr_r <= rd_ptr_r + 1;
if (!writing) begin
size_r <= size_r - 1;
if (pop) begin
assert(!empty);
rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(1);
if (!push) begin
used_r <= used_r - ADDRW'(1);
end
end
end
end
end
assign data_out = data[rd_ptr_a];
assign empty = (wr_ptr_r == rd_ptr_r);
assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]);
assign size = size_r;
VX_dp_ram #(
.DATAW(DATAW),
.SIZE(SIZE),
.BUFFERED(0),
.RWCHECK(1)
) dp_ram (
.clk(clk),
.waddr(wr_ptr_a),
.raddr(rd_ptr_a),
.wren(push),
.rden(pop),
.din(data_in),
.dout(data_out)
);
assign empty = (wr_ptr_r == rd_ptr_r);
assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]);
assign size = {full, used_r};
end else begin
reg [DATAW-1:0] head_r;
reg [DATAW-1:0] curr_r;
wire [DATAW-1:0] dout;
reg [DATAW-1:0] din_r;
reg [ADDRW-1:0] wr_ptr_r;
reg [ADDRW-1:0] rd_ptr_r;
reg [ADDRW-1:0] rd_ptr_next_r;
reg [ADDRW-1:0] rd_ptr_n_r;
reg [ADDRW-1:0] used_r;
reg empty_r;
reg full_r;
reg bypass_r;
always @(posedge clk) begin
if (reset) begin
size_r <= 0;
head_r <= 0;
curr_r <= 0;
wr_ptr_r <= 0;
rd_ptr_r <= 0;
rd_ptr_next_r <= 1;
empty_r <= 1;
full_r <= 0;
if (reset) begin
wr_ptr_r <= 0;
rd_ptr_r <= 0;
rd_ptr_n_r <= 1;
empty_r <= 1;
full_r <= 0;
used_r <= 0;
end else begin
if (writing) begin
data[wr_ptr_r] <= data_in;
wr_ptr_r <= wr_ptr_r + 1;
if (push) begin
wr_ptr_r <= wr_ptr_r + ADDRW'(1);
if (!reading) begin
if (!pop) begin
empty_r <= 0;
if (size_r == ($bits(size_r)'(SIZE-1))) begin
if (used_r == ADDRW'(SIZE-1)) begin
full_r <= 1;
end
size_r <= size_r + 1;
used_r <= used_r + ADDRW'(1);
end
end
if (reading) begin
rd_ptr_r <= rd_ptr_next_r;
if (pop) begin
rd_ptr_r <= rd_ptr_n_r;
if (SIZE > 2) begin
rd_ptr_next_r <= rd_ptr_r + $bits(rd_ptr_r)'(2);
rd_ptr_n_r <= rd_ptr_r + ADDRW'(2);
end else begin // (SIZE == 2);
rd_ptr_next_r <= ~rd_ptr_next_r;
rd_ptr_n_r <= ~rd_ptr_n_r;
end
if (!writing) begin
if (size_r == 1) begin
assert(rd_ptr_next_r == wr_ptr_r);
if (!push) begin
full_r <= 0;
if (used_r == ADDRW'(1)) begin
assert(rd_ptr_n_r == wr_ptr_r);
empty_r <= 1;
end;
full_r <= 0;
size_r <= size_r - 1;
end;
used_r <= used_r - ADDRW'(1);
end
end
bypass_r <= writing
&& (empty_r || ((1 == size_r) && reading)); // empty or about to go empty
curr_r <= data_in;
head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r];
end
end
end
assign data_out = bypass_r ? curr_r : head_r;
always @(posedge clk) begin
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
bypass_r <= 1;
din_r <= data_in;
end else if (pop)
bypass_r <= 0;
end
VX_dp_ram #(
.DATAW(DATAW),
.SIZE(SIZE),
.BUFFERED(1),
.RWCHECK(0)
) dp_ram (
.clk(clk),
.waddr(wr_ptr_r),
.raddr(rd_ptr_n_r),
.wren(push),
.rden(pop),
.din(data_in),
.dout(dout)
);
assign data_out = bypass_r ? din_r : dout;
assign empty = empty_r;
assign full = full_r;
assign size = size_r;
assign size = {full_r, used_r};
end
end

View file

@ -28,9 +28,13 @@ module VX_index_queue #(
assign empty = (wr_ptr == rd_ptr);
assign full = (wr_a == rd_a) && (wr_ptr[`LOG2UP(SIZE)] != rd_ptr[`LOG2UP(SIZE)]);
assign enqueue = push && !full;
assign enqueue = push;
assign dequeue = !empty && !valid[rd_a]; // auto-remove when head is invalid
always @(*) begin
assert(!push || !full);
end
always @(posedge clk) begin
if (reset) begin
rd_ptr <= 0;

View file

@ -18,7 +18,7 @@ module VX_scope #(
input wire bus_write,
input wire bus_read
);
localparam DELTA_ENABLE = (UPDW != 0);
localparam UPDW_ENABLE = (UPDW != 0);
localparam MAX_DELTA = (2 ** DELTAW) - 1;
localparam CMD_GET_VALID = 3'd0;
@ -27,19 +27,22 @@ module VX_scope #(
localparam CMD_GET_COUNT = 3'd3;
localparam CMD_SET_DELAY = 3'd4;
localparam CMD_SET_STOP = 3'd5;
localparam CMD_RESERVED1 = 3'd6;
localparam CMD_GET_OFFSET= 3'd6;
localparam CMD_RESERVED2 = 3'd7;
localparam GET_VALID = 2'd0;
localparam GET_DATA = 2'd1;
localparam GET_WIDTH = 2'd2;
localparam GET_COUNT = 2'd3;
localparam GET_VALID = 3'd0;
localparam GET_DATA = 3'd1;
localparam GET_WIDTH = 3'd2;
localparam GET_COUNT = 3'd3;
localparam GET_OFFSET = 3'd6;
`NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [SIZE-1:0];
`NO_RW_RAM_CHECK reg [DELTAW-1:0] delta_store [SIZE-1:0];
reg [DATAW-1:0] data_store [SIZE-1:0];
reg [DELTAW-1:0] delta_store [SIZE-1:0];
reg [UPDW-1:0] prev_trigger_id;
reg [DELTAW-1:0] delta;
reg [BUSW-1:0] bus_out_r;
reg [63:0] timestamp, start_time;
reg [`CLOG2(SIZE)-1:0] raddr, waddr, waddr_end;
@ -49,8 +52,7 @@ module VX_scope #(
reg [BUSW-3:0] delay_val, delay_cntr;
reg [1:0] out_cmd;
reg [2:0] get_cmd;
wire [2:0] cmd_type;
wire [BUSW-4:0] cmd_data;
assign {cmd_data, cmd_type} = bus_in;
@ -59,7 +61,7 @@ module VX_scope #(
always @(posedge clk) begin
if (reset) begin
out_cmd <= $bits(out_cmd)'(CMD_GET_VALID);
get_cmd <= $bits(get_cmd)'(CMD_GET_VALID);
raddr <= 0;
waddr <= 0;
waddr_end <= $bits(waddr)'(SIZE-1);
@ -74,13 +76,18 @@ module VX_scope #(
read_offset <= 0;
read_delta <= 0;
data_valid <= 0;
timestamp <= 0;
end else begin
timestamp <= timestamp + 1;
if (bus_write) begin
case (cmd_type)
CMD_GET_VALID,
CMD_GET_DATA,
CMD_GET_WIDTH,
CMD_GET_COUNT: out_cmd <= $bits(out_cmd)'(cmd_type);
CMD_GET_OFFSET,
CMD_GET_COUNT: get_cmd <= $bits(get_cmd)'(cmd_type);
CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data);
CMD_SET_STOP: waddr_end <= $bits(waddr)'(cmd_data);
default:;
@ -92,8 +99,10 @@ module VX_scope #(
delta_flush <= 1;
if (0 == delay_val) begin
start_wait <= 0;
recording <= 1;
delay_cntr <= 0;
recording <= 1;
delta <= 0;
delay_cntr <= 0;
start_time <= timestamp;
end else begin
start_wait <= 1;
recording <= 0;
@ -106,26 +115,29 @@ module VX_scope #(
if (1 == delay_cntr) begin
start_wait <= 0;
recording <= 1;
delta <= 0;
start_time <= timestamp;
end
end
if (recording) begin
if (DELTA_ENABLE) begin
if (UPDW_ENABLE) begin
if (delta_flush
|| changed
|| (trigger_id != prev_trigger_id)) begin
data_store[waddr] <= data_in;
delta_store[waddr] <= delta;
waddr <= waddr + 1;
data_store[waddr] <= data_in;
waddr <= waddr + $bits(waddr)'(1);
delta <= 0;
delta_flush <= 0;
end else begin
delta <= delta + 1;
delta <= delta + DELTAW'(1);
delta_flush <= (delta == (MAX_DELTA-1));
end
prev_trigger_id <= trigger_id;
end else begin
data_store[waddr] <= data_in;
delta_store[waddr] <= 0;
data_store[waddr] <= data_in;
waddr <= waddr + 1;
end
@ -134,12 +146,12 @@ module VX_scope #(
waddr <= waddr; // keep last address
recording <= 0;
data_valid <= 1;
read_delta <= DELTA_ENABLE;
read_delta <= 1;
end
end
if (bus_read
&& (out_cmd == GET_DATA)
&& (get_cmd == GET_DATA)
&& data_valid) begin
if (read_delta) begin
read_delta <= 0;
@ -148,16 +160,16 @@ module VX_scope #(
if (read_offset < $bits(read_offset)'(DATAW-BUSW)) begin
read_offset <= read_offset + $bits(read_offset)'(BUSW);
end else begin
raddr <= raddr + 1;
raddr <= raddr + $bits(raddr)'(1);
read_offset <= 0;
read_delta <= DELTA_ENABLE;
read_delta <= 1;
if (raddr == waddr) begin
data_valid <= 0;
end
end
end else begin
raddr <= raddr + 1;
read_delta <= DELTA_ENABLE;
read_delta <= 1;
if (raddr == waddr) begin
data_valid <= 0;
end
@ -168,11 +180,14 @@ module VX_scope #(
end
always @(*) begin
case (out_cmd)
case (get_cmd)
GET_VALID : bus_out_r = BUSW'(data_valid);
GET_WIDTH : bus_out_r = BUSW'(DATAW);
GET_COUNT : bus_out_r = BUSW'(waddr) + BUSW'(1);
GET_OFFSET: bus_out_r = BUSW'(start_time);
/* verilator lint_off WIDTH */
GET_DATA : bus_out_r = read_delta ? BUSW'(delta_store[raddr]) : BUSW'(data_store[raddr] >> read_offset);
/* verilator lint_on WIDTH */
default : bus_out_r = 0;
endcase
end
@ -182,7 +197,7 @@ module VX_scope #(
`ifdef DBG_PRINT_SCOPE
always @(posedge clk) begin
if (bus_read) begin
$display("%t: scope-read: cmd=%0d, out=%0h, addr=%0d", $time, out_cmd, bus_out, raddr);
$display("%t: scope-read: cmd=%0d, addr=%0d, value=%0h", $time, get_cmd, raddr, bus_out);
end
if (bus_write) begin
$display("%t: scope-write: cmd=%0d, value=%0d", $time, cmd_type, cmd_data);

View file

@ -1,48 +0,0 @@
#!/usr/bin/env python3
import os
import glob
config_location = 'configs'
name_template = '{clusters}cl-{cores}c-{warps}w-{threads}t-{l2}Kl2-{dcache}Kd-{icache}Ki{name_suffix}.sh'
template = """
export V_NT={threads}
export V_NW={warps}
export V_NUM_CORES_PER_CLUSTER={cores}
export V_NUM_CLUSTERS={clusters}
export V_DCACHE_SIZE_BYTES={dcachek}
export V_ICACHE_SIZE_BYTES={icachek}
# L2 Cache size
export V_L2CACHE_SIZE_BYTES={l2k}
{codegen}
"""
# cluster, cores, warps, threads, l2, dcache, icache
configs = [
(1, 2, 8, 4, 8, 4, 1),
(1, 2, 8, 8, 8, 4, 1),
(1, 2, 8, 8, 16, 8, 1),
(1, 4, 8, 8, 16, 4, 1),
(1, 4, 8, 8, 16, 8, 1),
(1, 4, 16, 8, 16, 8, 1),
(2, 4, 8, 4, 8, 4, 1),
(2, 4, 8, 8, 16, 8, 1),
]
files = glob.glob(config_location + '/*.sh')
for f in files:
os.remove(f)
for clusters, cores, warps, threads, l2, dcache, icache in configs:
l2k, dcachek, icachek = 1024 * l2, 1024 * dcache, 1024 * icache
name_suffix = ''
with open(config_location + '/' + name_template.format(**locals()), 'w') as f:
codegen = ''
f.write(template.format(**locals()))

195
hw/scripts/scope.json Normal file
View file

@ -0,0 +1,195 @@
{
"version": 1,
"includes":[
"../rtl/VX_config.vh",
"../rtl/VX_platform.vh",
"../rtl/VX_define.vh",
"../rtl/cache/VX_cache_config.vh"
],
"modules": {
"top": {
"submodules": {
"vortex": {"type":"Vortex"}
}
},
"Vortex": {
"submodules": {
"cluster": {"type":"VX_cluster", "count":"`NUM_CLUSTERS"},
"l3cache": {"type":"VX_cache", "enabled":"`L3_ENABLE", "params":{"NUM_BANKS":"`L3NUM_BANKS"}}
}
},
"VX_cluster": {
"submodules": {
"core": {"type":"VX_core", "count":"`NUM_CORES", "enabled":true},
"l2cache": {"type":"VX_cache", "enabled":"`L2_ENABLE", "params":{"NUM_BANKS":"`L2NUM_BANKS"}}
}
},
"VX_core": {
"submodules": {
"pipeline": {"type":"VX_pipeline", "enabled":true},
"mem_unit": {"type":"VX_mem_unit", "enabled":true}
}
},
"VX_pipeline": {
"submodules": {
"fetch": {"type":"VX_fetch", "enabled":true},
"decode": {"type":"VX_decode", "enabled":true},
"issue": {"type":"VX_issue", "enabled":true},
"execute": {"type":"VX_execute", "enabled":true},
"commit": {"type":"VX_commit", "enabled":true}
}
},
"VX_fetch": {
"submodules": {
"warp_sched": {"type":"VX_warp_sched"},
"icache_stage": {"type":"VX_icache_stage"}
}
},
"VX_warp_sched": {},
"VX_icache_stage": {},
"VX_decode": {},
"VX_issue": {},
"VX_execute": {
"submodules": {
"lsu_unit": {"type":"VX_lsu_unit"},
"gpu_unit": {"type":"VX_gpu_unit"}
}
},
"VX_commit": {},
"VX_lsu_unit": {},
"VX_gpu_unit": {},
"VX_mem_unit": {
"submodules": {
"smem": {"type":"VX_cache", "params":{"NUM_BANKS":"`SNUM_BANKS"}},
"dcache": {"type":"VX_cache", "params":{"NUM_BANKS":"`DNUM_BANKS"}},
"icache": {"type":"VX_cache", "params":{"NUM_BANKS":"`INUM_BANKS"}}
}
},
"VX_cache": {
"submodules": {
"bank": {"type":"VX_bank", "count":"NUM_BANKS"}
}
},
"VX_bank": {}
},
"taps": {
"top": {
"!reset": 1,
"?dram_req_valid": 1,
"dram_req_addr": 32,
"dram_req_rw": 1,
"dram_req_byteen":"`VX_DRAM_BYTEEN_WIDTH",
"dram_req_data":"`VX_DRAM_LINE_WIDTH",
"dram_req_tag":"`VX_DRAM_TAG_WIDTH",
"?dram_req_ready": 1,
"?dram_rsp_valid": 1,
"dram_rsp_data":"`VX_DRAM_LINE_WIDTH",
"dram_rsp_tag":"`VX_DRAM_TAG_WIDTH",
"?dram_rsp_ready": 1,
"?snp_req_valid": 1,
"snp_req_addr": 32,
"snp_req_invalidate": 1,
"snp_req_tag":"`VX_SNP_TAG_WIDTH",
"?snp_req_ready": 1,
"?snp_rsp_valid": 1,
"snp_rsp_tag":"`VX_SNP_TAG_WIDTH",
"?snp_rsp_ready": 1,
"busy": 1
},
"top/vortex/cluster/core/pipeline/fetch/icache_stage": {
"?icache_req_valid": 1,
"icache_req_wid":"`NW_BITS",
"icache_req_addr": 32,
"icache_req_tag":"`ICORE_TAG_ID_BITS",
"?icache_req_ready": 1,
"?icache_rsp_valid": 1,
"icache_rsp_data": 32,
"icache_rsp_tag":"`ICORE_TAG_ID_BITS",
"?icache_rsp_ready": 1
},
"top/vortex/cluster/core/pipeline/fetch/warp_sched": {
"?wsched_scheduled_warp": 1,
"wsched_active_warps": "`NUM_WARPS",
"wsched_schedule_table": "`NUM_WARPS",
"wsched_schedule_ready": "`NUM_WARPS",
"wsched_warp_to_schedule": "`NW_BITS",
"wsched_warp_pc": "32"
},
"top/vortex/cluster/core/pipeline/execute/gpu_unit": {
"?gpu_req_valid": 1,
"gpu_req_wid": "`NW_BITS",
"gpu_req_tmask": "`NUM_THREADS",
"gpu_req_op_type": "`GPU_BITS",
"gpu_req_rs1": "32",
"gpu_req_rs2": "32",
"?gpu_req_ready": 1,
"?gpu_rsp_valid": 1,
"gpu_rsp_wid": "`NW_BITS",
"gpu_rsp_tmc": "`GPU_TMC_SIZE",
"gpu_rsp_wspawn": "`GPU_WSPAWN_SIZE",
"gpu_rsp_split": "`GPU_SPLIT_SIZE",
"gpu_rsp_barrier": "`GPU_BARRIER_SIZE"
},
"top/vortex/cluster/core/pipeline/execute/lsu_unit": {
"?dcache_req_valid":"`NUM_THREADS",
"dcache_req_wid":"`NW_BITS",
"dcache_req_pc": 32,
"dcache_req_addr":"`NUM_THREADS * 32",
"dcache_req_rw": 1,
"dcache_req_byteen":"`NUM_THREADS * 4",
"dcache_req_data": "`NUM_THREADS * 32",
"dcache_req_tag":"`DCORE_TAG_ID_BITS",
"?dcache_req_ready": 1,
"?dcache_rsp_valid":"`NUM_THREADS",
"dcache_rsp_data":"`NUM_THREADS * 32",
"dcache_rsp_tag":"`DCORE_TAG_ID_BITS",
"?dcache_rsp_ready": 1
},
"top/vortex/cluster/core/pipeline/issue": {
"?issue_valid": 1,
"issue_wid":"`NW_BITS",
"issue_tmask":"`NUM_THREADS",
"issue_pc": 32,
"issue_ex_type":"`EX_BITS",
"issue_op_type":"`OP_BITS",
"issue_op_mod":"`MOD_BITS",
"issue_wb": 1,
"issue_rd":"`NR_BITS",
"issue_rs1":"`NR_BITS",
"issue_rs2":"`NR_BITS",
"issue_rs3":"`NR_BITS",
"issue_imm": 32,
"issue_rs1_is_pc": 1,
"issue_rs2_is_imm": 1,
"?issue_ready": 1,
"?gpr_rsp_valid": 1,
"gpr_rsp_wid":"`NW_BITS",
"gpr_rsp_pc": 32,
"gpr_rsp_a":"`NUM_THREADS * 32",
"gpr_rsp_b":"`NUM_THREADS * 32",
"gpr_rsp_c":"`NUM_THREADS * 32",
"!gpr_delay": 1,
"?writeback_valid": 1,
"writeback_wid":"`NW_BITS",
"writeback_pc": 32,
"writeback_rd":"`NR_BITS",
"writeback_data":"`NUM_THREADS * 32",
"!scoreboard_delay": 1,
"!execute_delay": 1
},
"top/vortex/l3cache/bank, top/vortex/cluster/l2cache/bank, top/vortex/cluster/core/mem_unit/dcache/bank, top/vortex/cluster/core/mem_unit/icache/bank, top/vortex/cluster/core/mem_unit/smem/bank": {
"?valid_st0": 1,
"?valid_st1": 1,
"?valid_st2": 1,
"addr_st0": 32,
"addr_st1": 32,
"addr_st2": 32,
"is_mrvq_st1": 1,
"miss_st1": 1,
"dirty_st1": 1,
"!force_miss_st1": 1,
"!stall_pipe": 1
}
}
}

830
hw/scripts/scope.py Executable file
View file

@ -0,0 +1,830 @@
#!/usr/bin/env python3
import os
import re
import json
import argparse
import math
vl_include_re = re.compile(r"^\s*`include\s+\"(.+)\"")
vl_define_re = re.compile(r"^\s*`define\s+(\w+)(\([\w\s,]*\))?(.*)")
vl_ifdef_re = re.compile(r"^\s*`(ifdef|ifndef|elsif)\s+(\w+)\s*$")
vl_endif_re = re.compile(r"^\s*`(endif|else)\s*$")
vl_expand_re = re.compile(r"`([0-9a-zA-Z_]+)")
exclude_files = []
include_dirs = []
macros = []
br_stack = []
def translate_ternary(text):
def skip_space(text, i, ln, step):
while (i >= 0) and (i < ln):
c = text[i]
if not c.isspace():
break
i += step
return i
def skip_expr(text, i, ln, step):
paren = 0
checkparen = True
while (i >= 0) and (i < ln):
c = text[i]
if checkparen and (((step < 0) and (c == ')')) or ((step > 0) and (c == '('))):
paren += 1
elif checkparen and (((step < 0) and (c == '(')) or ((step > 0) and (c == ')'))):
if (0 == paren):
break
paren -= 1
if (0 == paren):
i = skip_space(text, i + step, ln, step)
checkparen = False
continue
elif (0 == paren) and not (c.isalnum() or (c == '_')):
break
i += step
return (i - step)
def parse_ternary(text):
ternary = None
ln = len(text)
for i in range(1, ln):
c = text[i]
if not (c == '?'):
continue
# parse condition expression
i0 = skip_space(text, i - 1, ln, -1)
if (i < 0):
raise Exception("invalid condition expression")
i1 = skip_expr(text, i0, ln, -1)
if (i1 > i0):
raise Exception("invalid condition expression")
# parse true expression
i2 = skip_space(text, i + 1, ln, 1)
if (i2 >= ln):
raise Exception("invalid true expression")
i3 = skip_expr(text, i2, ln, 1)
if (i3 < i2):
raise Exception("invalid true expression")
# parse colon
i4 = skip_space(text, i3 + 1, ln, 1)
if (i4 >= ln):
raise Exception("invalid colon")
if not (text[i4] == ':'):
raise Exception("missing colon")
# parse false expression
i5 = skip_space(text, i4 + 1, ln, 1)
if (i5 >= ln):
raise Exception("invalid false expression")
i6 = skip_expr(text, i5, ln, 1)
if (i6 < i5):
raise Exception("invalid false expression")
ternary = (i0, i1, i2, i3, i5, i6)
break
return ternary
while True:
pos = parse_ternary(text)
if pos is None:
break
# convert to python ternary
newText = text[:pos[1]] + text[pos[2]:pos[3]+1] + " if " + text[pos[1]:pos[0]+1] + " else " + text[pos[4]:pos[5]+1] + text[pos[5]+1:]
text = newText
return text
def parse_func_args(text):
args = []
arg = ''
l = len(text)
if text[0] != '(':
raise Exception("missing leading parenthesis: " + text)
paren = 1
for i in range(1, l):
c = text[i]
if c == '(':
paren += 1
elif c == ')':
if paren == 0:
raise Exception("mismatched parenthesis: (" + i + ") " + text)
paren -= 1
if paren == 0:
l = i
break
if c == ',' and paren == 1:
if arg.strip():
args.append(arg)
arg = ''
else:
arg += c
if paren != 0:
raise Exception("missing closing parenthesis: " + text)
if arg.strip():
args.append(arg)
return (args, l)
def resolve_include_path(filename, parent_dir):
if os.path.basename(filename) in exclude_files:
return None
if os.path.isfile(filename):
return os.path.abspath(filename)
search_dirs = include_dirs
if parent_dir:
search_dirs.append(parent_dir)
for dir in search_dirs:
filepath = os.path.join(dir, filename)
if os.path.isfile(filepath):
return os.path.abspath(filepath)
raise Exception("couldn't find include file: " + filename)
def remove_comments(text):
text = re.sub(re.compile("/\*.*?\*/",re.DOTALL ), "", text) # multiline
text = re.sub(re.compile("//.*?\n" ), "\n", text) # singleline
return text
def add_macro(name, args, value):
macro = (name, args, value)
macros.append(macro)
if not args is None:
print("*** token: " + name + "(", end='')
for i in range(len(args)):
if i > 0:
print(', ', end='')
print(args[i], end='')
print(")=" + value)
else:
print("*** token: " + name + "=" + value)
def find_macro(name):
for macro in macros:
if macro[0] == name:
return macro
return None
def expand_text(text, params):
def re_pattern_args(args):
p = "(?<![0-9a-zA-Z_])("
i = 0
for arg in args:
if i > 0:
p += "|"
p += arg
i += 1
p += ")(?![0-9a-zA-Z_])"
return p
class DoReplParam(object):
def __init__(self, params):
self.params = params
self.expanded = False
def __call__(self, match):
name = match.group(1)
self.expanded = True
return self.params[name]
class DoReplMacro(object):
def __init__(self):
self.expanded = False
self.has_func = False
def __call__(self, match):
name = match.group(1)
macro = find_macro(name)
if macro:
if not macro[1] is None:
self.has_func = True
else:
self.expanded = True
return macro[2]
return "`" + name
def repl_func_macro(text):
expanded = False
match = re.search(vl_expand_re, text)
if match:
name = match.group(1)
macro = find_macro(name)
if macro:
args = macro[1]
value = macro[2]
if not args is None:
str_args = text[match.end():].strip()
f_args = parse_func_args(str_args)
if len(args) == 0:
if len(f_args[0]) != 0:
raise Exception("invalid argments for macro '" + name + "': value=" + text)
else:
if len(args) != len(f_args[0]):
raise Exception("mismatch number of argments for macro '" + name + "': actual=" + len(f_args[0]) + ", expected=" + len(args))
pattern = re_pattern_args(args)
params = {}
for i in range(len(args)):
params[args[i]] = f_args[0][i]
dorepl = DoReplParam(params)
value = re.sub(pattern, dorepl, value)
str_head = text[0:match.start()]
str_tail = text[match.end() + f_args[1]+1:]
text = str_head + value + str_tail
expanded = True
if expanded:
return text
return None
changed = False
iter = 0
while True:
if iter > 99:
raise Exception("Macro recursion!")
has_func = False
while True:
params_updated = False
if not params is None:
do_repl = DoReplParam(params)
pattern = re_pattern_args(params)
new_text = re.sub(pattern, do_repl, text)
if do_repl.expanded:
text = new_text
params_updated = True
do_repl = DoReplMacro()
new_text = re.sub(vl_expand_re, do_repl, text)
has_func = do_repl.has_func
if not (params_updated or do_repl.expanded):
break
text = new_text
changed = True
if not has_func:
break
expanded = repl_func_macro(text)
if not expanded:
break
text = expanded
changed = True
iter += 1
if changed:
return text
return None
def parse_include(filename, nesting):
if nesting > 99:
raise Exception("include recursion!")
print("*** parsing '" + filename + "'...")
content = None
with open(filename, "r") as f:
content = f.read()
# remove comments
content = remove_comments(content)
# parse content
prev_line = None
for line in content.splitlines(False):
# skip empty lines
if re.match(re.compile(r'^\s*$'), line):
continue
# merge multi-line lines
if line.endswith('\\'):
if prev_line:
prev_line += line[:len(line) - 1]
else:
prev_line = line[:len(line) - 1]
continue
if prev_line:
line = prev_line + line
prev_line = None
# parse ifdef
m = re.match(vl_ifdef_re, line)
if m:
key = m.group(1)
cond = m.group(2)
taken = find_macro(cond) is not None
if key == 'ifndef':
taken = not taken
elif key == '"elsif':
br_stack.pop()
br_stack.append(taken)
print("*** " + key + "(" + cond + ") => " + str(taken))
continue
# parse endif
m = re.match(vl_endif_re, line)
if m:
key = m.group(1)
top = br_stack.pop()
if key == 'else':
br_stack.append(not top)
print("*** " + key)
continue
# skip disabled blocks
if not all(br_stack):
continue
# parse include
m = re.match(vl_include_re, line)
if m:
include = m.group(1)
include = resolve_include_path(include, os.path.dirname(filename))
if include:
parse_include(include, nesting + 1)
continue
# parse define
m = re.match(vl_define_re, line)
if m:
name = m.group(1)
args = m.group(2)
if args:
args = args[1:len(args)-1].strip()
if args != '':
args = args.split(',')
for i in range(len(args)):
args[i] = args[i].strip()
else:
args = []
value = m.group(3)
add_macro(name, args, value.strip())
continue
def parse_includes(includes):
# change current directory to include directory
old_dir = os.getcwd()
script_dir = os.path.dirname(os.path.realpath(__file__))
os.chdir(script_dir)
for include in includes:
parse_include(include, 0)
# restore current directory
os.chdir(old_dir)
def load_include_dirs(dirs):
for dir in dirs:
print("*** include dir: " + dir)
include_dirs.append(dir)
def load_defines(defines):
for define in defines:
key_value = define.split('=', 2)
name = key_value[0]
value = ''
if len(key_value) == 2:
value = key_value[1]
add_macro(name, None, value)
def load_config(filename):
with open(filename, "r") as f:
config = json.load(f)
print("condfig=", config)
return config
def eval_node(text, params):
def clog2(x):
l2 = math.log2(x)
cl = math.ceil(l2)
return int(cl)
if not type(text) == str:
return text
expanded = expand_text(text, params)
if expanded:
text = expanded
try:
__text = text.replace('$clog2', '__clog2')
__text = translate_ternary(__text)
e = eval(__text, {'__clog2': clog2})
return e
except (NameError, SyntaxError):
return text
def gen_vl_header(file, modules, taps):
header = '''
`ifndef VX_SCOPE_DEFS
`define VX_SCOPE_DEFS
'''
footer = '`endif'
def signal_size(size, mn):
if type(size) == int:
if (size != mn):
return "[" + str(size-1) + ":0]"
else:
return ""
else:
return "[" + size + "-1:0]"
def create_signal(key, ports):
if not key in ports:
ports[key] = []
return ports[key]
def dic_insert(gdic, ldic, key, value, enabled):
if enabled:
ldic[key] = value
if key in gdic:
return False
if enabled:
gdic[key] = None
return True
def trigger_name(name, size):
if type(size) == int:
if size != 1:
return "(| " + name + ")"
else:
return name
else:
return "(| " + name + ")"
def trigger_subscripts(asize):
def Q(arr, ss, asize, idx, N):
a = asize[idx]
if (a != 0):
for i in range(a):
tmp = ss + '[' + str(i) + ']'
if (idx + 1) < N:
Q(arr, tmp, asize, idx + 1, N)
else:
arr.append(tmp)
else:
if (idx + 1) < N:
Q(arr, ss, asize, idx + 1, N)
else:
arr.append(ss)
if asize is None:
return [""]
ln = len(asize)
if (0 == ln):
return [""]
arr = []
Q(arr, "", asize, 0, ln)
return arr
def visit_path(alltaps, ports, ntype, paths, modules, taps):
curtaps = {}
if (len(paths) != 0):
spath = paths.pop(0)
snodes = modules[ntype]["submodules"]
if not spath in snodes:
raise Exception("invalid path: " + spath + " in " + ntype)
snode = snodes[spath]
stype = snode["type"]
enabled = True
if "enabled" in snode:
enabled = eval_node(snode["enabled"], None)
subtaps = visit_path(alltaps, ports, stype, paths, modules, taps)
scount = 0
if "count" in snode:
scount = eval_node(snode["count"], None)
params = None
if "params" in snode:
params = snode["params"]
new_staps = []
nn = "SCOPE_IO_" + ntype
pp = create_signal(nn, ports)
for key in subtaps:
subtap = subtaps[key]
s = subtap[0]
a = subtap[1]
t = subtap[2]
aa = [scount]
sa = signal_size(scount, 0)
if a:
for i in a:
x = eval_node(i, params)
aa.append(x)
sa += signal_size(x, 0)
if dic_insert(alltaps, curtaps, spath + '/' + key, (s, aa, t), enabled):
skey = key.replace('/', '_')
if enabled:
pp.append("\toutput wire" + sa + signal_size(s, 1) + " scope_" + spath + '_' + skey + ',')
new_staps.append(skey)
ports[nn] = pp
if (0 == scount):
nn = "SCOPE_BIND_" + ntype + '_' + spath
pp = create_signal(nn, ports)
for st in new_staps:
if enabled:
pp.append("\t.scope_" + st + "(scope_" + spath + '_' + st + "),")
else:
pp.append("\t`UNUSED_PIN (scope_" + st + "),")
ports[nn] = pp
else:
nn = "SCOPE_BIND_" + ntype + '_' + spath + "(__i__)"
pp = create_signal(nn, ports)
for st in new_staps:
if enabled:
pp.append("\t.scope_" + st + "(scope_" + spath + '_' + st + "[__i__]),")
else:
pp.append("\t`UNUSED_PIN (scope_" + st + "),")
ports[nn] = pp
else:
nn = "SCOPE_IO_" + ntype
pp = create_signal(nn, ports)
for tk in taps:
trigger = 0
name = tk
size = eval_node(taps[tk], None)
if name[0] == '!':
name = name[1:]
trigger = 1
elif name[0] == '?':
name = name[1:]
trigger = 2
if dic_insert(alltaps, curtaps, name, (size, None, trigger), True):
pp.append("\toutput wire" + signal_size(size, 1) + " scope_" + name + ',')
ports[nn] = pp
return curtaps
toptaps = {}
with open(file, 'w') as f:
ports = {}
alltaps = {}
for key in taps:
skey_list = key.split(',')
_taps = taps[key]
for skey in skey_list:
print('processing node: ' + skey + ' ...')
paths = skey.strip().split('/')
ntype = paths.pop(0)
curtaps = visit_path(alltaps, ports, ntype, paths, modules, _taps)
for tk in curtaps:
toptaps[tk] = curtaps[tk]
print(header, file=f)
for key in ports:
print("`define " + key + ' \\', file=f)
for port in ports[key]:
print(port + ' \\', file=f)
print("", file=f)
print("`define SCOPE_DECL_SIGNALS \\", file=f)
i = 0
for key in toptaps:
tap = toptaps[key]
name = key.replace('/', '_')
size = tap[0]
asize = tap[1]
sa = ""
if asize:
for a in asize:
sa += signal_size(a, 0)
if i > 0:
print(" \\", file=f)
print('\t wire' + sa + signal_size(size, 1) + " scope_" + name + ';', file=f, end='')
i += 1
print("", file=f)
print("", file=f)
print("`define SCOPE_DATA_LIST \\", file=f)
i = 0
for key in toptaps:
tap = toptaps[key]
trigger = tap[2]
if trigger != 0:
continue
name = key.replace('/', '_')
if i > 0:
print(", \\", file=f)
print("\t scope_" + name, file=f, end='')
i += 1
print("", file=f)
print("", file=f)
print("`define SCOPE_UPDATE_LIST \\", file=f)
i = 0
for key in toptaps:
tap = toptaps[key]
trigger = tap[2]
if trigger == 0:
continue
name = key.replace('/', '_')
if i > 0:
print(", \\", file=f)
print("\t scope_" + name, file=f, end='')
i += 1
print("", file=f)
print("", file=f)
print("`define SCOPE_TRIGGER \\", file=f)
i = 0
excluded_list = []
for key in toptaps:
if key in excluded_list:
continue
tap = toptaps[key]
if tap[2] != 2:
continue
size = tap[0]
asize = tap[1]
sus = trigger_subscripts(asize)
for su in sus:
if i > 0:
print(" | \\", file=f)
print("\t(", file=f, end='')
name = trigger_name("scope_" + key.replace('/', '_') + su, size)
if key.endswith("_valid"):
ready_signal = key[:-6] + "_ready"
if ready_signal in toptaps:
rname = trigger_name("scope_" + ready_signal.replace('/', '_') + su, size)
print(name + " && " + rname, file=f, end='')
excluded_list.append(ready_signal)
else:
print(name, file=f, end='')
else:
print(name, file=f, end='')
print(")", file=f, end='')
i += 1
print("", file=f)
print("", file=f)
print(footer, file=f)
return toptaps
def gen_cc_header(file, taps):
header = '''
#pragma once
struct scope_module_t {
const char* name;
int index;
int parent;
};
struct scope_tap_t {
int width;
const char* name;
int module;
};
'''
def flatten_path(paths, sizes):
def Q(arr, ss, idx, N, paths, sizes):
size = sizes[idx]
if size != 0:
for i in range(sizes[idx]):
tmp = ss + ('/' if (ss != '') else '')
tmp += paths[idx] + '_' + str(i)
if (idx + 1) < N:
Q(arr, tmp, idx + 1, N, paths, sizes)
else:
arr.append(tmp)
else:
tmp = ss + ('/' if (ss != '') else '')
tmp += paths[idx]
if (idx + 1) < N:
Q(arr, tmp, idx + 1, N, paths, sizes)
else:
arr.append(tmp)
arr = []
Q(arr, "", 0, len(asize), paths, asize)
return arr
# flatten the taps
fdic = {}
for key in taps:
tap = taps[key]
size = str(tap[0])
trigger = tap[2]
if (trigger != 0):
continue
paths = key.split('/')
if (len(paths) > 1):
name = paths.pop(-1)
asize = tap[1]
for ss in flatten_path(paths, asize):
fdic[ss + '/' + name ] = [size, 0]
else:
fdic[key] = [size, 0]
for key in taps:
tap = taps[key]
size = str(tap[0])
trigger = tap[2]
if (trigger == 0):
continue
paths = key.split('/')
if (len(paths) > 1):
name = paths.pop(-1)
asize = tap[1]
for ss in flatten_path(paths, asize):
fdic[ss + '/' + name ] = [size, 0]
else:
fdic[key] = [size, 0]
# generate module dic
mdic = {}
mdic["*"] = ("*", 0, -1)
for key in fdic:
paths = key.split('/')
if len(paths) == 1:
continue
paths.pop(-1)
parent = 0
mk = ""
for path in paths:
mk += '/' + path
if not mk in mdic:
index = len(mdic)
mdic[mk] = (path, index, parent)
parent = index
else:
parent = mdic[mk][1]
fdic[key][1] = parent
with open(file, 'w') as f:
print(header, file=f)
print("static constexpr scope_module_t scope_modules[] = {", file=f)
i = 0
for key in mdic:
m = mdic[key]
if i > 0:
print(',', file=f)
print("\t{\"" + m[0] + "\", " + str(m[1]) + ", " + str(m[2]) + "}", file=f, end='')
i += 1
print("", file=f)
print("};", file=f)
print("", file=f)
print("static constexpr scope_tap_t scope_taps[] = {", file=f)
i = 0
for key in fdic:
size = fdic[key][0]
parent = fdic[key][1]
paths = key.split('/')
if len(paths) > 1:
name = paths.pop(-1)
else:
name = key
if i > 0:
print(',', file=f)
print("\t{" + size + ", \"" + name + "\", " + str(parent) + "}", file=f, end='')
i += 1
print("", file=f)
print("};", file=f)
def main():
parser = argparse.ArgumentParser(description='Scope headers generator.')
parser.add_argument('-vl', nargs='?', default='scope-defs.vh', metavar='file', help='Output Verilog header')
parser.add_argument('-cc', nargs='?', default='scope-defs.h', metavar='file', help='Output C++ header')
parser.add_argument('-D', nargs='?', action='append', metavar='macro[=value]', help='define macro')
parser.add_argument('-I', nargs='?', action='append', metavar='<includedir>', help='include directory')
parser.add_argument('config', help='Json config file')
args = parser.parse_args()
print("args=", args)
global exclude_files
global include_dirs
global macros
global br_stack
if args.I:
load_include_dirs(args.I)
if args.D:
load_defines(args.D)
config = load_config(args.config)
exclude_files.append(os.path.basename(args.vl))
if "includes" in config:
parse_includes(config["includes"])
taps = gen_vl_header(args.vl, config["modules"], config["taps"])
gen_cc_header(args.cc, taps)
if __name__ == '__main__':
main()

View file

@ -13,6 +13,8 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO
@ -42,7 +44,7 @@ gen-s:
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
gen-sd:
verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace $(DBG)
verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace-fst --trace-threads 1 $(DBG)
gen-st:
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS)
@ -51,7 +53,7 @@ gen-m:
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
gen-md:
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace $(DBG)
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace-fst --trace-threads 1 $(DBG)
gen-mt:
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
@ -75,11 +77,12 @@ build-mt: gen-mt
(cd obj_dir && make -j -f VVortex.mk)
run: run-s
run-s: build-s
(cd obj_dir && ./VVortex)
run-sd: build-sd
(cd obj_dir && valgrind ./VVortex)
(cd obj_dir && ./VVortex)
run-st: build-st
(cd obj_dir && ./VVortex)

View file

@ -28,15 +28,11 @@ Simulator::Simulator() {
ram_ = nullptr;
vortex_ = new VVortex();
dram_rsp_active_ = false;
snp_req_active_ = false;
csr_req_active_ = false;
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC();
trace_ = new VerilatedFstC();
vortex_->trace(trace_, 99);
trace_->open("trace.vcd");
trace_->open("trace.fst");
#endif
// reset the device
@ -66,12 +62,35 @@ void Simulator::reset() {
std::cout << timestamp << ": [sim] reset()" << std::endl;
#endif
vortex_->reset = 1;
this->step();
vortex_->reset = 0;
print_bufs_.clear();
dram_rsp_vec_.clear();
dram_rsp_active_ = false;
snp_req_active_ = false;
csr_req_active_ = false;
snp_req_size_ = 0;
pending_snp_reqs_ = 0;
csr_rsp_value_ = nullptr;
vortex_->dram_rsp_valid = 0;
vortex_->dram_req_ready = 0;
vortex_->io_req_ready = 0;
vortex_->io_rsp_valid = 0;
vortex_->snp_req_valid = 0;
vortex_->snp_rsp_ready = 0;
vortex_->csr_io_req_valid = 0;
vortex_->csr_io_rsp_ready = 0;
vortex_->reset = 1;
vortex_->clk = 0;
this->eval();
vortex_->clk = 1;
this->eval();
vortex_->reset = 0;
// Turn on assertion after reset
Verilated::assertOn(true);
}
@ -79,10 +98,9 @@ void Simulator::reset() {
void Simulator::step() {
vortex_->clk = 0;
this->eval();
vortex_->clk = 1;
this->eval();
this->eval_dram_bus();
this->eval_io_bus();
this->eval_csr_bus();
@ -104,14 +122,13 @@ void Simulator::eval_dram_bus() {
}
// schedule DRAM responses
int dequeue_index = -1;
for (int i = 0; i < dram_rsp_vec_.size(); i++) {
if (dram_rsp_vec_[i].cycles_left > 0) {
dram_rsp_vec_[i].cycles_left -= 1;
std::list<dram_req_t>::iterator dram_rsp_it(dram_rsp_vec_.end());
for (auto it = dram_rsp_vec_.begin(), ie = dram_rsp_vec_.end(); it != ie; ++it) {
if (it->cycles_left > 0) {
it->cycles_left -= 1;
}
if ((dequeue_index == -1)
&& (dram_rsp_vec_[i].cycles_left == 0)) {
dequeue_index = i;
if ((dram_rsp_it == ie) && (it->cycles_left == 0)) {
dram_rsp_it = it;
}
}
@ -122,11 +139,11 @@ void Simulator::eval_dram_bus() {
dram_rsp_active_ = false;
}
if (!dram_rsp_active_) {
if (dequeue_index != -1) {
if (dram_rsp_it != dram_rsp_vec_.end()) {
vortex_->dram_rsp_valid = 1;
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].block.data(), GLOBAL_BLOCK_SIZE);
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_it->block.data(), GLOBAL_BLOCK_SIZE);
vortex_->dram_rsp_tag = dram_rsp_it->tag;
dram_rsp_vec_.erase(dram_rsp_it);
dram_rsp_active_ = true;
} else {
vortex_->dram_rsp_valid = 0;
@ -161,7 +178,7 @@ void Simulator::eval_dram_bus() {
dram_req.cycles_left = DRAM_LATENCY;
dram_req.tag = vortex_->dram_req_tag;
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());
dram_rsp_vec_.push_back(dram_req);
dram_rsp_vec_.emplace_back(dram_req);
}
}
}
@ -199,7 +216,7 @@ void Simulator::eval_snp_bus() {
#endif
}
if (vortex_->snp_req_valid && vortex_->snp_req_ready) {
if (snp_req_size_) {
if (snp_req_size_ != 0) {
vortex_->snp_req_addr += 1;
vortex_->snp_req_tag += 1;
--snp_req_size_;
@ -272,7 +289,7 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
vortex_->snp_req_valid = 1;
vortex_->snp_rsp_ready = 1;
snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
--snp_req_size_;
pending_snp_reqs_ = 1;

View file

@ -5,13 +5,14 @@
#include "verilated.h"
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
#include <verilated_fst_c.h>
#endif
#include <VX_config.h>
#include "ram.h"
#include <ostream>
#include <list>
#include <vector>
#include <sstream>
#include <unordered_map>
@ -62,7 +63,7 @@ private:
void eval_csr_bus();
void eval_snp_bus();
std::vector<dram_req_t> dram_rsp_vec_;
std::list<dram_req_t> dram_rsp_vec_;
bool dram_rsp_active_;
bool snp_req_active_;
@ -75,6 +76,6 @@ private:
RAM *ram_;
VVortex *vortex_;
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
VerilatedFstC *trace_;
#endif
};

View file

@ -33,7 +33,7 @@ set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS OFF
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
set_global_assignment -name VERILOG_MACRO QUARTUS
set_global_assignment -name VERILOG_MACRO SYNTHESIS
set_global_assignment -name VERILOG_MACRO NDEBUG
@ -43,17 +43,19 @@ set_global_assignment -name VERILOG_MACRO FPU_FAST
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
set_global_assignment -name POWER_USE_TA_VALUE 65
set_global_assignment -name SEED 1
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
set idx 0
foreach arg $q_args_orig {

View file

@ -51,7 +51,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)"
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE"
syn.chg:
$(STAMP) syn.chg

View file

@ -1,17 +1,22 @@
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_bypass_buffer.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_cam_buffer.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_countones.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_divide.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_encoder_onehot.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_elastic_buffer.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fair_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fixed_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_queue.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_register.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_stack.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_indexable_queue.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_index_queue.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_matrix_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_mult.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_multiplier.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_onehot_encooder.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_priority_encoder.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_rr_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_scope.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_serial_div.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_shift_register.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_skid_buffer.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank_core_req_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache.v
@ -20,114 +25,72 @@ read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I..
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_fill_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_req_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_miss_resrv.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_prefetcher.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_forwarder.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_rsp_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_access.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_structure.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_backend_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_branch_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_store.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_alu_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_branch_ctl_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cmt_to_csr_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_exec_unit_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_read_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpu_inst_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_inst_meta_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_jal_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_to_issue_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_decode_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_exu_to_cmt_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_fpu_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_fpu_to_cmt_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_fpu_to_csr_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpu_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_ifetch_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_ifetch_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_join_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_lsu_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_mul_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_warp_ctl_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wb_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_writeback_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wstall_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_alu_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_back_end.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_cluster.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_commit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_core.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_data.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_io_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_pipe.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_d_e_reg.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_dcache_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_decode.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_exec_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_f_d_reg.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_execute.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_fetch.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_front_end.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_fpu_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_bypass.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_fp_ctrl.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_ram.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_stage.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_wrapper.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpu_inst.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_i_d_reg.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpu_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_ibuffer.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_icache_stage.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_inst_multiplex.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_instr_demux.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_io_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_ipdom_stack.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_issue.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_lsu_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_mem_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_mem_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_mul_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_pipeline.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_scheduler.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_user_config.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_warp.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_scoreboard.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_warp_sched.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_writeback.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/Vortex.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank_core_req_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_core_req_bank_sel.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_core_rsp_merge.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_fill_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_req_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_miss_resrv.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_prefetcher.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_forwarder.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_rsp_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_access.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_structure.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_backend_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_branch_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_exec_unit_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_read_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpu_inst_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_inst_meta_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_jal_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_join_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_lsu_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_warp_ctl_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wb_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wstall_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_countones.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_divide.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_encoder_onehot.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fair_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fixed_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_queue.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_register.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_stack.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_indexable_queue.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_matrix_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_mult.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_priority_encoder.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_rr_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_scope.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/tex_unit/VX_tex_mgr.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/tex_unit/VX_tex_unit.v
hierarchy -check -top Vortex
add -global_input reset 1
proc -global_arst reset