mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
adding support for verilator-driven AFU driver: vlsim
This commit is contained in:
parent
8eee036295
commit
0fab1ddd92
17 changed files with 742 additions and 66 deletions
|
@ -1,4 +1,4 @@
|
|||
all: stub rtlsim simx
|
||||
all: stub rtlsim simx opae
|
||||
|
||||
stub:
|
||||
$(MAKE) -C stub
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
OPAE_HOME ?= /tools/opae/1.4.0
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../include -I/tools/opae/1.4.0/include -I../../hw
|
||||
CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw
|
||||
|
||||
LDFLAGS += -L/tools/opae/1.4.0/lib
|
||||
LDFLAGS += -L$(OPAE_HOME)/lib
|
||||
|
||||
# stack execution protection
|
||||
LDFLAGS +=-z noexecstack
|
||||
|
@ -23,49 +24,60 @@ CXXFLAGS += -DDUMP_PERF_STATS
|
|||
# Enable scope analyzer
|
||||
#CXXFLAGS += -DSCOPE
|
||||
|
||||
LDFLAGS += -luuid
|
||||
|
||||
LDFLAGS += -shared
|
||||
|
||||
FPGA_LIBS += -lopae-c
|
||||
FPGA_LIBS += -luuid -lopae-c
|
||||
|
||||
ASE_LIBS += -lopae-c-ase
|
||||
ASE_LIBS += -luuid -lopae-c-ase
|
||||
|
||||
VLSIM_LIBS += -lopae-c-vlsim
|
||||
|
||||
LIB_DIR=../lib
|
||||
|
||||
ASE_DIR = ase
|
||||
|
||||
VLSIM_DIR = vlsim
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
PROJECT_ASE = $(ASE_DIR)/libvortex.so
|
||||
|
||||
PROJECT_VLSIM = $(VLSIM_DIR)/libvortex.so
|
||||
|
||||
AFU_JSON_INFO = vortex_afu.h
|
||||
|
||||
SRCS = vortex.cpp scope.cpp ../common/vx_utils.cpp
|
||||
SRCS = vortex.cpp vx_scope.cpp ../common/vx_utils.cpp
|
||||
|
||||
all: $(PROJECT) $(PROJECT_ASE)
|
||||
all: vlsim
|
||||
|
||||
# AFU info from JSON file, including AFU UUID
|
||||
$(AFU_JSON_INFO): ../../hw/opae/vortex_afu.json
|
||||
json: ../../hw/opae/vortex_afu.json
|
||||
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $@
|
||||
fpga: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
|
||||
|
||||
$(PROJECT_ASE): $(SRCS) $(ASE_DIR)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $@
|
||||
ase: $(SRCS) $(ASE_DIR)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
|
||||
|
||||
vortex.o: vortex.cpp $(AFU_JSON_INFO)
|
||||
vlsim: $(SRCS) opae-vlsim
|
||||
$(CXX) $(CXXFLAGS) -L./vlsim -DUSE_VLSIM $(SRCS) $(LDFLAGS) $(VLSIM_LIBS) -o $(PROJECT_VLSIM)
|
||||
|
||||
opae-vlsim:
|
||||
$(MAKE) -C vlsim
|
||||
|
||||
vortex.o: vortex.cpp
|
||||
$(CXX) $(CXXFLAGS) -c vortex.cpp -o $@
|
||||
|
||||
$(ASE_DIR):
|
||||
mkdir -p ase
|
||||
|
||||
.depend: $(SRCS) $(AFU_JSON_INFO)
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) $(PROJECT_ASE) $(AFU_JSON_INFO) *.o .depend
|
||||
rm -rf $(PROJECT) $(PROJECT_ASE) $(PROJECT_VLSIM) *.o .depend
|
||||
$(MAKE) -C vlsim clean
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
|
|
78
driver/opae/vlsim/Makefile
Normal file
78
driver/opae/vlsim/Makefile
Normal file
|
@ -0,0 +1,78 @@
|
|||
#CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
|
||||
CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CFLAGS += -I../../../../hw
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
|
||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
#DEBUG=1
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
CFLAGS += -DUSE_RTLSIM $(CONFIGS)
|
||||
|
||||
CFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
# LDFLAGS += -dynamiclib -pthread
|
||||
|
||||
TOP = vortex_afu_shim
|
||||
|
||||
RTL_DIR = ../../../hw/rtl
|
||||
|
||||
SRCS = fpga.cpp opae_sim.cpp
|
||||
SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/svdpi -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS)
|
||||
VL_FLAGS += -Wno-DECLFILENAME
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += verilator.vlt
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --assert --trace $(DBG_FLAGS)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
VL_FLAGS += -DNOPAE
|
||||
CFLAGS += -DNOPAE
|
||||
VL_FLAGS += -DSCOPE
|
||||
CFLAGS += -DSCOPE
|
||||
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
|
||||
|
||||
PROJECT = libopae-c-vlsim.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
make -j -C obj_dir -f V$(TOP).mk
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir
|
82
driver/opae/vlsim/fpga.cpp
Normal file
82
driver/opae/vlsim/fpga.cpp
Normal file
|
@ -0,0 +1,82 @@
|
|||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cstdlib>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include "fpga.h"
|
||||
#include "opae_sim.h"
|
||||
#include <VX_config.h>
|
||||
|
||||
extern fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags) {
|
||||
if (NULL == handle || flags != 0)
|
||||
return FPGA_INVALID_PARAM;
|
||||
auto sim = new opae_sim();
|
||||
*handle = reinterpret_cast<fpga_handle>(sim);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaClose(fpga_handle handle) {
|
||||
if (NULL == handle)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
delete sim;
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
if (NULL == handle || len == 0 || buf_addr == NULL || wsid == NULL)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->prepare_buffer(len, buf_addr, wsid, flags);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid) {
|
||||
if (NULL == handle)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->release_buffer(wsid);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr) {
|
||||
if (NULL == handle || ioaddr == NULL)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->get_io_address(wsid, ioaddr);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
if (NULL == handle || mmio_num != 0)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->write_mmio64(mmio_num, offset, value);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
if (NULL == handle || mmio_num != 0 || value == NULL)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->read_mmio64(mmio_num, offset, value);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern const char *fpgaErrStr(fpga_result e) {
|
||||
return "";
|
||||
}
|
48
driver/opae/vlsim/fpga.h
Normal file
48
driver/opae/vlsim/fpga.h
Normal file
|
@ -0,0 +1,48 @@
|
|||
#ifndef __FPGA_H__
|
||||
#define __FPGA_H__
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
FPGA_OK = 0, /**< Operation completed successfully */
|
||||
FPGA_INVALID_PARAM, /**< Invalid parameter supplied */
|
||||
FPGA_BUSY, /**< Resource is busy */
|
||||
FPGA_EXCEPTION, /**< An exception occurred */
|
||||
FPGA_NOT_FOUND, /**< A required resource was not found */
|
||||
FPGA_NO_MEMORY, /**< Not enough memory to complete operation */
|
||||
FPGA_NOT_SUPPORTED, /**< Requested operation is not supported */
|
||||
FPGA_NO_DRIVER, /**< Driver is not loaded */
|
||||
FPGA_NO_DAEMON, /**< FPGA Daemon (fpgad) is not running */
|
||||
FPGA_NO_ACCESS, /**< Insufficient privileges or permissions */
|
||||
FPGA_RECONF_ERROR /**< Error while reconfiguring FPGA */
|
||||
} fpga_result;
|
||||
|
||||
typedef void *fpga_handle;
|
||||
|
||||
typedef void *fpga_token;
|
||||
|
||||
fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags);
|
||||
|
||||
fpga_result fpgaClose(fpga_handle handle);
|
||||
|
||||
fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
|
||||
fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid);
|
||||
|
||||
fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr);
|
||||
|
||||
fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
|
||||
fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
|
||||
const char *fpgaErrStr(fpga_result e);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // __FPGA_H__
|
261
driver/opae/vlsim/opae_sim.cpp
Normal file
261
driver/opae/vlsim/opae_sim.cpp
Normal file
|
@ -0,0 +1,261 @@
|
|||
#include "opae_sim.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
|
||||
#define CCI_LATENCY 8
|
||||
#define CCI_RQ_SIZE 16
|
||||
#define CCI_WQ_SIZE 16
|
||||
|
||||
#define ENABLE_DRAM_STALLS
|
||||
#define DRAM_LATENCY 4
|
||||
#define DRAM_RQ_SIZE 16
|
||||
#define DRAM_STALLS_MODULO 16
|
||||
|
||||
uint64_t timestamp = 0;
|
||||
|
||||
double sc_time_stamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
opae_sim::opae_sim() {
|
||||
// force random values for unitialized signals
|
||||
Verilated::randReset(2);
|
||||
Verilated::randSeed(50);
|
||||
|
||||
// Turn off assertion before reset
|
||||
Verilated::assertOn(false);
|
||||
|
||||
stop_ = false;
|
||||
vortex_afu_ = new Vvortex_afu_shim();
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
trace_ = new VerilatedVcdC();
|
||||
vortex_afu_->trace(trace_, 99);
|
||||
trace_->open("trace.vcd");
|
||||
#endif
|
||||
|
||||
future_ = std::async(std::launch::async, [&]{
|
||||
this->reset();
|
||||
while (stop_) {
|
||||
this->step();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
opae_sim::~opae_sim() {
|
||||
stop_ = true;
|
||||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
#ifdef VCD_OUTPUT
|
||||
trace_->close();
|
||||
#endif
|
||||
delete vortex_afu_;
|
||||
}
|
||||
|
||||
void opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
host_alloc_t alloc;
|
||||
alloc.data = new uint8_t[len];
|
||||
alloc.size = len;
|
||||
*wsid = host_allocs_.size();
|
||||
host_allocs_.push_back(alloc);
|
||||
}
|
||||
|
||||
void opae_sim::release_buffer(uint64_t wsid) {
|
||||
delete [] host_allocs_[wsid].data;
|
||||
host_allocs_.erase(host_allocs_.begin() + wsid);
|
||||
}
|
||||
|
||||
void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) {
|
||||
*ioaddr = (intptr_t)host_allocs_[wsid].data / GLOBAL_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_hdr_resp_type = offset;
|
||||
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, &value, 8);
|
||||
}
|
||||
|
||||
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_hdr_resp_type = offset;
|
||||
while (0 == vortex_afu_->af2cp_sTxPort_c2_mmioRdValid);
|
||||
*value = vortex_afu_->af2cp_sTxPort_c2_data;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void opae_sim::reset() {
|
||||
vortex_afu_->clk = 0;
|
||||
this->eval();
|
||||
|
||||
vortex_afu_->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
|
||||
void opae_sim::step() {
|
||||
vortex_afu_->clk = 0;
|
||||
this->eval();
|
||||
|
||||
vortex_afu_->clk = 1;
|
||||
this->eval();
|
||||
|
||||
this->sRxPort_bus();
|
||||
this->sTxPort_bus();
|
||||
this->avs_bus();
|
||||
}
|
||||
|
||||
void opae_sim::eval() {
|
||||
vortex_afu_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
trace_->dump(timestamp);
|
||||
#endif
|
||||
++timestamp;
|
||||
}
|
||||
|
||||
void opae_sim::sRxPort_bus() {
|
||||
// schedule CCI read responses
|
||||
int cci_rd_index = -1;
|
||||
for (int i = 0; i < cci_reads_.size(); i++) {
|
||||
if (cci_reads_[i].cycles_left > 0) {
|
||||
cci_reads_[i].cycles_left -= 1;
|
||||
}
|
||||
if ((cci_rd_index == -1)
|
||||
&& (cci_reads_[i].cycles_left == 0)) {
|
||||
cci_rd_index = i;
|
||||
}
|
||||
}
|
||||
|
||||
// schedule CCI write responses
|
||||
int cci_wr_index = -1;
|
||||
for (int i = 0; i < cci_writes_.size(); i++) {
|
||||
if (cci_writes_[i].cycles_left > 0) {
|
||||
cci_writes_[i].cycles_left -= 1;
|
||||
}
|
||||
if ((cci_wr_index == -1)
|
||||
&& (cci_writes_[i].cycles_left == 0)) {
|
||||
cci_wr_index = i;
|
||||
}
|
||||
}
|
||||
|
||||
// send CCI read response
|
||||
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
if (cci_rd_index != -1) {
|
||||
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
|
||||
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_reads_[cci_rd_index].block.data(), GLOBAL_BLOCK_SIZE);
|
||||
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_reads_[cci_rd_index].mdata;
|
||||
cci_reads_.erase(cci_reads_.begin() + cci_rd_index);
|
||||
}
|
||||
|
||||
// send CCI write response
|
||||
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
if (cci_wr_index != -1) {
|
||||
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_writes_[cci_wr_index].mdata;
|
||||
cci_writes_.erase(cci_writes_.begin() + cci_wr_index);
|
||||
}
|
||||
|
||||
// mmio
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
|
||||
}
|
||||
|
||||
void opae_sim::sTxPort_bus() {
|
||||
// check read queue size
|
||||
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= CCI_RQ_SIZE);
|
||||
|
||||
// check write queue size
|
||||
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= CCI_WQ_SIZE);
|
||||
|
||||
// process read requests
|
||||
if (vortex_afu_->af2cp_sTxPort_c0_valid && !vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull) {
|
||||
cci_rd_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY;
|
||||
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata;
|
||||
auto host_ptr = this->find_host_ptr(vortex_afu_->af2cp_sTxPort_c0_hdr_address);
|
||||
memcpy(cci_req.block.data(), host_ptr, GLOBAL_BLOCK_SIZE);
|
||||
cci_reads_.push_back(cci_req);
|
||||
}
|
||||
|
||||
// process write requests
|
||||
if (vortex_afu_->af2cp_sTxPort_c1_valid && !vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull) {
|
||||
cci_wr_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY;
|
||||
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c1_hdr_mdata;
|
||||
auto host_ptr = this->find_host_ptr(vortex_afu_->af2cp_sTxPort_c1_hdr_address);
|
||||
memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, GLOBAL_BLOCK_SIZE);
|
||||
cci_writes_.push_back(cci_req);
|
||||
}
|
||||
}
|
||||
|
||||
void opae_sim::avs_bus() {
|
||||
// schedule DRAM read responses
|
||||
int dram_rd_index = -1;
|
||||
for (int i = 0; i < dram_reads_.size(); i++) {
|
||||
if (dram_reads_[i].cycles_left > 0) {
|
||||
dram_reads_[i].cycles_left -= 1;
|
||||
}
|
||||
if ((dram_rd_index == -1)
|
||||
&& (dram_reads_[i].cycles_left == 0)) {
|
||||
dram_rd_index = i;
|
||||
}
|
||||
}
|
||||
|
||||
// send DRAM response
|
||||
vortex_afu_->avs_readdatavalid = 0;
|
||||
if (dram_rd_index != -1) {
|
||||
vortex_afu_->avs_readdatavalid = 1;
|
||||
memcpy(vortex_afu_->avs_readdata, dram_reads_[dram_rd_index].block.data(), GLOBAL_BLOCK_SIZE);
|
||||
dram_reads_.erase(dram_reads_.begin() + dram_rd_index);
|
||||
}
|
||||
|
||||
// handle DRAM stalls
|
||||
bool dram_stalled = false;
|
||||
#ifdef ENABLE_DRAM_STALLS
|
||||
if (0 == ((timestamp/2) % DRAM_STALLS_MODULO)) {
|
||||
dram_stalled = true;
|
||||
} else
|
||||
if (dram_reads_.size() >= DRAM_RQ_SIZE) {
|
||||
dram_stalled = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
// process DRAM requests
|
||||
if (!dram_stalled) {
|
||||
if (vortex_afu_->avs_write) {
|
||||
assert(0 == vortex_afu_->mem_bank_select);
|
||||
uint64_t byteen = vortex_afu_->avs_byteenable;
|
||||
unsigned base_addr = (vortex_afu_->avs_address * GLOBAL_BLOCK_SIZE);
|
||||
uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata);
|
||||
for (int i = 0; i < GLOBAL_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
ram_[base_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (vortex_afu_->avs_read) {
|
||||
assert(0 == vortex_afu_->mem_bank_select);
|
||||
dram_rd_req_t dram_req;
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
ram_.read(vortex_afu_->avs_address * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());
|
||||
dram_reads_.push_back(dram_req);
|
||||
}
|
||||
}
|
||||
|
||||
vortex_afu_->avs_waitrequest = dram_stalled;
|
||||
}
|
||||
|
||||
uint8_t* opae_sim::find_host_ptr(uint64_t addr) {
|
||||
auto b_addr = addr * GLOBAL_BLOCK_SIZE;
|
||||
for (auto& host_alloc : host_allocs_) {
|
||||
auto alloc_addr = (intptr_t)host_alloc.data;
|
||||
if (b_addr >= alloc_addr
|
||||
&& b_addr < (alloc_addr + host_alloc.size)) {
|
||||
return (uint8_t*)b_addr;
|
||||
}
|
||||
}
|
||||
assert(false);
|
||||
return nullptr;
|
||||
}
|
87
driver/opae/vlsim/opae_sim.h
Normal file
87
driver/opae/vlsim/opae_sim.h
Normal file
|
@ -0,0 +1,87 @@
|
|||
#pragma once
|
||||
|
||||
#include "Vvortex_afu_shim.h"
|
||||
#include "Vvortex_afu_shim__Syms.h"
|
||||
#include "verilated.h"
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_vcd_c.h>
|
||||
#endif
|
||||
|
||||
#include <VX_config.h>
|
||||
#include "ram.h"
|
||||
|
||||
#include <ostream>
|
||||
#include <future>
|
||||
#include <vector>
|
||||
|
||||
class opae_sim {
|
||||
public:
|
||||
|
||||
opae_sim();
|
||||
virtual ~opae_sim();
|
||||
|
||||
void prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
|
||||
void release_buffer(uint64_t wsid);
|
||||
|
||||
void get_io_address(uint64_t wsid, uint64_t *ioaddr);
|
||||
|
||||
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
|
||||
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
|
||||
private:
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
|
||||
unsigned tag;
|
||||
} dram_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
|
||||
unsigned mdata;
|
||||
} cci_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
|
||||
unsigned mdata;
|
||||
} cci_wr_req_t;
|
||||
|
||||
typedef struct {
|
||||
uint8_t* data;
|
||||
size_t size;
|
||||
} host_alloc_t;
|
||||
|
||||
void reset();
|
||||
|
||||
void eval();
|
||||
|
||||
void step();
|
||||
|
||||
void sRxPort_bus();
|
||||
void sTxPort_bus();
|
||||
void avs_bus();
|
||||
|
||||
uint8_t* find_host_ptr(uint64_t addr);
|
||||
|
||||
std::future<void> future_;
|
||||
bool stop_;
|
||||
|
||||
std::vector<host_alloc_t> host_allocs_;
|
||||
|
||||
std::vector<dram_rd_req_t> dram_reads_;
|
||||
|
||||
std::vector<cci_rd_req_t> cci_reads_;
|
||||
|
||||
std::vector<cci_wr_req_t> cci_writes_;
|
||||
|
||||
RAM ram_;
|
||||
Vvortex_afu_shim *vortex_afu_;
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *trace_;
|
||||
#endif
|
||||
};
|
64
driver/opae/vlsim/ram.h
Normal file
64
driver/opae/vlsim/ram.h
Normal file
|
@ -0,0 +1,64 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
class RAM {
|
||||
private:
|
||||
|
||||
mutable uint8_t *mem_[(1 << 12)];
|
||||
|
||||
uint8_t *get(uint32_t address) const {
|
||||
uint32_t block_addr = address >> 20;
|
||||
uint32_t block_offset = address & 0x000FFFFF;
|
||||
if (mem_[block_addr] == NULL) {
|
||||
mem_[block_addr] = new uint8_t[(1 << 20)];
|
||||
}
|
||||
return mem_[block_addr] + block_offset;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
RAM() {
|
||||
for (uint32_t i = 0; i < (1 << 12); i++) {
|
||||
mem_[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
~RAM() {
|
||||
this->clear();
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return (1ull << 32);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (uint32_t i = 0; i < (1 << 12); i++) {
|
||||
if (mem_[i]) {
|
||||
delete mem_[i];
|
||||
mem_[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void read(uint32_t address, uint32_t length, uint8_t *data) const {
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
data[i] = *this->get(address + i);
|
||||
}
|
||||
}
|
||||
|
||||
void write(uint32_t address, uint32_t length, const uint8_t *data) {
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
*this->get(address + i) = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t& operator[](uint32_t address) {
|
||||
return *get(address);
|
||||
}
|
||||
|
||||
const uint8_t& operator[](uint32_t address) const {
|
||||
return *get(address);
|
||||
}
|
||||
};
|
9
driver/opae/vlsim/verilator.vlt
Normal file
9
driver/opae/vlsim/verilator.vlt
Normal file
|
@ -0,0 +1,9 @@
|
|||
`verilator_config
|
||||
|
||||
lint_off -rule BLKANDNBLK -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNOPTFLAT -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule WIDTH -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNUSED -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule LITENDIAN -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule IMPORTSTAR -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule PINCONNECTEMPTY -file "../../../hw/rtl/fp_cores/fpnew/*"
|
|
@ -1,11 +1,11 @@
|
|||
`include "vortex_afu.vh"
|
||||
|
||||
`include "VX_define.vh"
|
||||
/* verilator lint_off IMPORTSTAR */
|
||||
import ccip_if_pkg::*;
|
||||
import local_mem_cfg_pkg::*;
|
||||
/* verilator lint_on IMPORTSTAR */
|
||||
|
||||
module vortex_afu_sim #(
|
||||
module vortex_afu_shim #(
|
||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||
) (
|
||||
// global signals
|
|
@ -6,13 +6,20 @@
|
|||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <cmath>
|
||||
#include <uuid/uuid.h>
|
||||
|
||||
#ifdef USE_VLSIM
|
||||
#include "vlsim/fpga.h"
|
||||
#else
|
||||
#include <opae/fpga.h>
|
||||
#include <uuid/uuid.h>
|
||||
#endif
|
||||
|
||||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
#include "vortex_afu.h"
|
||||
|
||||
#ifdef SCOPE
|
||||
#include "scope.h"
|
||||
#include "vx_scope.h"
|
||||
#endif
|
||||
|
||||
#define CACHE_LINESIZE 64
|
||||
|
@ -122,14 +129,16 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
|||
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
fpga_properties filter = nullptr;
|
||||
fpga_result res;
|
||||
fpga_guid guid;
|
||||
fpga_token accel_token;
|
||||
uint32_t num_matches;
|
||||
fpga_handle accel_handle;
|
||||
|
||||
fpga_result res;
|
||||
fpga_handle accel_handle;
|
||||
vx_device_t* device;
|
||||
|
||||
#ifndef USE_VLSIM
|
||||
fpga_token accel_token;
|
||||
fpga_properties filter = nullptr;
|
||||
fpga_guid guid;
|
||||
uint32_t num_matches;
|
||||
|
||||
// Set up a filter that will search for an accelerator
|
||||
fpgaGetProperties(nullptr, &filter);
|
||||
|
@ -159,6 +168,13 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||
|
||||
// Done with token
|
||||
fpgaDestroyToken(&accel_token);
|
||||
#else
|
||||
// Open accelerator
|
||||
res = fpgaOpen(NULL, &accel_handle, 0);
|
||||
if (FPGA_OK != res) {
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
// allocate device object
|
||||
device = (vx_device_t*)malloc(sizeof(vx_device_t));
|
||||
|
|
30
driver/opae/vortex_afu.h
Normal file
30
driver/opae/vortex_afu.h
Normal file
|
@ -0,0 +1,30 @@
|
|||
//
|
||||
// Generated by afu_json_mgr from ../../hw/opae/vortex_afu.json
|
||||
//
|
||||
|
||||
#ifndef __AFU_JSON_INFO__
|
||||
#define __AFU_JSON_INFO__
|
||||
|
||||
#define AFU_ACCEL_NAME "vortex_afu"
|
||||
#define AFU_ACCEL_UUID "35F9452B-25C2-434C-93D5-6F8C60DB361C"
|
||||
#define AFU_IMAGE_CMD_CLFLUSH 4
|
||||
#define AFU_IMAGE_CMD_CSR_READ 5
|
||||
#define AFU_IMAGE_CMD_CSR_WRITE 6
|
||||
#define AFU_IMAGE_CMD_MEM_READ 1
|
||||
#define AFU_IMAGE_CMD_MEM_WRITE 2
|
||||
#define AFU_IMAGE_CMD_RUN 3
|
||||
#define AFU_IMAGE_MMIO_CMD_TYPE 10
|
||||
#define AFU_IMAGE_MMIO_CSR_ADDR 26
|
||||
#define AFU_IMAGE_MMIO_CSR_CORE 24
|
||||
#define AFU_IMAGE_MMIO_CSR_DATA 28
|
||||
#define AFU_IMAGE_MMIO_CSR_READ 30
|
||||
#define AFU_IMAGE_MMIO_DATA_SIZE 16
|
||||
#define AFU_IMAGE_MMIO_IO_ADDR 12
|
||||
#define AFU_IMAGE_MMIO_MEM_ADDR 14
|
||||
#define AFU_IMAGE_MMIO_SCOPE_READ 20
|
||||
#define AFU_IMAGE_MMIO_SCOPE_WRITE 22
|
||||
#define AFU_IMAGE_MMIO_STATUS 18
|
||||
#define AFU_IMAGE_POWER 0
|
||||
#define AFU_TOP_IFC "ccip_std_afu_avalon_mm"
|
||||
|
||||
#endif // __AFU_JSON_INFO__
|
|
@ -4,8 +4,15 @@
|
|||
#include <chrono>
|
||||
#include <vector>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef USE_VLSIM
|
||||
#include "vlsim/fpga.h"
|
||||
#else
|
||||
#include <opae/fpga.h>
|
||||
#endif
|
||||
|
||||
#include <VX_config.h>
|
||||
#include "scope.h"
|
||||
#include "vx_scope.h"
|
||||
#include "vortex_afu.h"
|
||||
|
||||
#define CHECK_RES(_expr) \
|
||||
|
@ -18,12 +25,6 @@
|
|||
return -1; \
|
||||
} while (false)
|
||||
|
||||
|
||||
template<int N>
|
||||
constexpr bool static_print() {
|
||||
return (0 < N < 100);
|
||||
}
|
||||
|
||||
#define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4)
|
||||
#define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4)
|
||||
|
||||
|
@ -33,8 +34,7 @@ struct scope_signal_t {
|
|||
};
|
||||
|
||||
constexpr int ilog2(int n) {
|
||||
return (n > 1) ? 1 +
|
||||
ilog2(n >> 1) : 0;
|
||||
return (n > 1) ? 1 + ilog2(n >> 1) : 0;
|
||||
}
|
||||
|
||||
static constexpr int NW_BITS = ilog2(NUM_WARPS);
|
|
@ -1,7 +1,5 @@
|
|||
#pragma once
|
||||
|
||||
#include <opae/fpga.h>
|
||||
|
||||
int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1);
|
||||
|
||||
int vx_scope_stop(fpga_handle hfpga, uint64_t delay = -1);
|
|
@ -22,7 +22,6 @@ DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
|||
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
#DEBUG=1
|
||||
#AFU=1
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
|
@ -35,11 +34,13 @@ LDFLAGS += -shared -pthread
|
|||
|
||||
TOP = Vortex
|
||||
|
||||
SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
|
||||
SRCS += ../../hw/rtl/fp_cores/svdpi/float_dpi.cpp
|
||||
RTL_DIR = ../../hw/rtl
|
||||
|
||||
FPU_INCLUDE = -I../../hw/rtl/fp_cores -I../../hw/rtl/fp_cores/svdpi -I../../hw/rtl/fp_cores/fpnew/src/common_cells/include -I../../hw/rtl/fp_cores/fpnew/src/common_cells/src -I../../hw/rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../../hw/rtl/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/cache $(FPU_INCLUDE)
|
||||
SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
|
||||
SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/svdpi -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS)
|
||||
VL_FLAGS += -Wno-DECLFILENAME
|
||||
|
@ -59,16 +60,6 @@ else
|
|||
CFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
# AFU
|
||||
ifdef AFU
|
||||
TOP = vortex_afu_sim
|
||||
VL_FLAGS += -DNOPAE
|
||||
CFLAGS += -DNOPAE
|
||||
VL_FLAGS += -DSCOPE
|
||||
CFLAGS += -DSCOPE
|
||||
RTL_INCLUDE += -I../../hw/opae -I../../hw/opae/ccip
|
||||
endif
|
||||
|
||||
PROJECT = libvortex.so
|
||||
# PROJECT = libvortex.dylib
|
||||
|
||||
|
|
|
@ -3,6 +3,11 @@
|
|||
#include <fstream>
|
||||
#include <iomanip>
|
||||
|
||||
#define ENABLE_DRAM_STALLS
|
||||
#define DRAM_LATENCY 4
|
||||
#define DRAM_RQ_SIZE 16
|
||||
#define DRAM_STALLS_MODULO 16
|
||||
|
||||
uint64_t timestamp = 0;
|
||||
|
||||
double sc_time_stamp() {
|
||||
|
|
|
@ -14,17 +14,6 @@
|
|||
#include <ostream>
|
||||
#include <vector>
|
||||
|
||||
#define ENABLE_DRAM_STALLS
|
||||
#define DRAM_LATENCY 4
|
||||
#define DRAM_RQ_SIZE 16
|
||||
#define DRAM_STALLS_MODULO 16
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
|
||||
unsigned tag;
|
||||
} dram_req_t;
|
||||
|
||||
class Simulator {
|
||||
public:
|
||||
|
||||
|
@ -53,6 +42,12 @@ public:
|
|||
|
||||
private:
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
|
||||
unsigned tag;
|
||||
} dram_req_t;
|
||||
|
||||
void eval();
|
||||
|
||||
void eval_dram_bus();
|
||||
|
@ -61,7 +56,7 @@ private:
|
|||
void eval_snp_bus();
|
||||
|
||||
std::vector<dram_req_t> dram_rsp_vec_;
|
||||
int dram_rsp_active_;
|
||||
bool dram_rsp_active_;
|
||||
|
||||
bool snp_req_active_;
|
||||
bool csr_req_active_;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue