mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
opae driver refactoring
This commit is contained in:
parent
d0da975b26
commit
50c7adbd3c
16 changed files with 474 additions and 416 deletions
|
@ -9,7 +9,7 @@ show_usage()
|
|||
SCRIPT_DIR=$(dirname "$0")
|
||||
VORTEX_HOME=$SCRIPT_DIR/..
|
||||
|
||||
DRIVER=vlsim
|
||||
DRIVER=simx
|
||||
APP=sgemm
|
||||
CLUSTERS=1
|
||||
CORES=1
|
||||
|
@ -95,20 +95,14 @@ esac
|
|||
done
|
||||
|
||||
case $DRIVER in
|
||||
simx)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/simx
|
||||
;;
|
||||
rtlsim)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/rtlsim
|
||||
;;
|
||||
vlsim)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/vlsim
|
||||
;;
|
||||
asesim)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/asesim
|
||||
;;
|
||||
fpga)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/fpga
|
||||
;;
|
||||
simx)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/simx
|
||||
opae)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/opae
|
||||
;;
|
||||
xrt)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/xrt
|
||||
|
|
|
@ -37,7 +37,9 @@ translation_rules = [
|
|||
# literals
|
||||
(re.compile(r"\d+'d(\d+)"), r'\1'),
|
||||
(re.compile(r"\d+'b([01]+)"), r'0b\1'),
|
||||
(re.compile(r"128'h([\da-fA-F_]+)"), r'"\1"'),
|
||||
(re.compile(r"\d+'h([\da-fA-F]+)"), r'0x\1')
|
||||
|
||||
]
|
||||
|
||||
with open(args.output, 'w') as f:
|
||||
|
|
|
@ -161,9 +161,9 @@ def add_macro(name, args, value):
|
|||
print(args[i], end='')
|
||||
print(")=" + value)
|
||||
else:
|
||||
print("*** token: " + name + "=" + value)
|
||||
print("*** token: " + name + "=" + value)
|
||||
'''
|
||||
|
||||
|
||||
def find_macro(name):
|
||||
for macro in macros:
|
||||
if macro[0] == name:
|
||||
|
@ -278,7 +278,7 @@ def expand_text(text, params):
|
|||
return None
|
||||
|
||||
def parse_include(filename, nesting):
|
||||
print("*** parsing: " + filename + "...")
|
||||
print("*** parsing: " + filename + "...")
|
||||
if nesting > 99:
|
||||
raise Exception("include recursion!")
|
||||
#print("*** parsing '" + filename + "'...")
|
||||
|
@ -311,7 +311,7 @@ def parse_include(filename, nesting):
|
|||
taken = find_macro(cond) is not None
|
||||
if key == 'ifndef':
|
||||
taken = not taken
|
||||
elif key == '"elsif':
|
||||
elif key == 'elsif':
|
||||
br_stack.pop()
|
||||
br_stack.append(taken)
|
||||
#print("*** " + key + "(" + cond + ") => " + str(taken))
|
||||
|
@ -353,6 +353,7 @@ def parse_include(filename, nesting):
|
|||
value = m.group(3)
|
||||
add_macro(name, args, value.strip())
|
||||
continue
|
||||
print("*** exiting: " + filename + "...")
|
||||
|
||||
def parse_includes(includes):
|
||||
# change current directory to include directory
|
||||
|
@ -666,8 +667,7 @@ def gen_vl_header(file, modules, taps):
|
|||
|
||||
def gen_cc_header(file, taps):
|
||||
|
||||
header = '''
|
||||
#pragma once
|
||||
header = '''#pragma once
|
||||
|
||||
struct scope_module_t {
|
||||
const char* name;
|
||||
|
|
|
@ -1,73 +0,0 @@
|
|||
OPAE_HOME ?= /tools/opae/1.4.0
|
||||
|
||||
RTL_DIR=../../hw/rtl
|
||||
|
||||
SCRIPT_DIR=../../hw/scripts
|
||||
|
||||
OPAE_SYN_DIR=../../hw/syn/altera/opae
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I. -I../include -I../../hw -I$(OPAE_HOME)/include -I$(OPAE_SYN_DIR)
|
||||
|
||||
LDFLAGS += -L$(OPAE_HOME)/lib -luuid -lopae-c-ase
|
||||
|
||||
# stack execution protection
|
||||
LDFLAGS +=-z noexecstack
|
||||
|
||||
# data relocation and projection
|
||||
LDFLAGS +=-z relro -z now
|
||||
|
||||
# stack buffer overrun detection
|
||||
CXXFLAGS +=-fstack-protector
|
||||
|
||||
# Position independent code
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
# Add external configuration
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Dump perf stats
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SRCS += ../common/vx_scope.cpp
|
||||
SCOPE_H = scope-defs.h
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(OPAE_SYN_DIR)/vortex_afu.h:
|
||||
$(MAKE) -C $(OPAE_SYN_DIR) vortex_afu.h
|
||||
|
||||
scope-defs.h: $(SCRIPT_DIR)/scope.json
|
||||
$(SCRIPT_DIR)/scope.py $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
|
||||
|
||||
# generate scope data
|
||||
scope: scope-defs.h
|
||||
|
||||
$(PROJECT): $(SRCS) $(OPAE_SYN_DIR)/vortex_afu.h $(SCOPE_H)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) -o $(PROJECT)
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o scope-defs.h
|
|
@ -1,19 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef USE_VLSIM
|
||||
#include <fpga.h>
|
||||
#else
|
||||
#include <opae/fpga.h>
|
||||
#endif
|
||||
|
||||
#if defined(USE_FPGA)
|
||||
#define HANG_TIMEOUT 60
|
||||
#else
|
||||
#define HANG_TIMEOUT (30*60)
|
||||
#endif
|
||||
|
||||
int vx_scope_start(fpga_handle hfpga, uint64_t start_time = 0, uint64_t stop_time = -1);
|
||||
|
||||
int vx_scope_stop(fpga_handle hfpga);
|
|
@ -1,75 +0,0 @@
|
|||
OPAE_HOME ?= /tools/opae/1.4.0
|
||||
|
||||
RTL_DIR=../../hw/rtl
|
||||
|
||||
SCRIPT_DIR=../../hw/scripts
|
||||
|
||||
OPAE_SYN_DIR=../../hw/syn/altera/opae
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I. -I../include -I../../hw -I$(OPAE_HOME)/include -I$(OPAE_SYN_DIR)
|
||||
|
||||
LDFLAGS += -L$(OPAE_HOME)/lib -luuid -lopae-c
|
||||
|
||||
#SCOPE=1
|
||||
|
||||
# stack execution protection
|
||||
LDFLAGS +=-z noexecstack
|
||||
|
||||
# data relocation and projection
|
||||
LDFLAGS +=-z relro -z now
|
||||
|
||||
# stack buffer overrun detection
|
||||
CXXFLAGS +=-fstack-protector
|
||||
|
||||
# Position independent code
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
# Add external configuration
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Dump perf stats
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SRCS += ../common/vx_scope.cpp
|
||||
SCOPE_H = scope-defs.h
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(OPAE_SYN_DIR)/vortex_afu.h:
|
||||
$(MAKE) -C $(OPAE_SYN_DIR) vortex_afu.h
|
||||
|
||||
scope-defs.h: $(SCRIPT_DIR)/scope.json
|
||||
$(SCRIPT_DIR)/scope.py $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
|
||||
|
||||
# generate scope data
|
||||
scope: scope-defs.h
|
||||
|
||||
$(PROJECT): $(SRCS) $(OPAE_SYN_DIR)/vortex_afu.h $(SCOPE_H)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) -o $(PROJECT)
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o scope-defs.h
|
|
@ -1,4 +1,4 @@
|
|||
VLSIM_DIR = ../../sim/vlsim
|
||||
OPAESIM_DIR = ../../sim/opaesim
|
||||
|
||||
RTL_DIR=../../hw/rtl
|
||||
|
||||
|
@ -6,7 +6,7 @@ SCRIPT_DIR=../../hw/scripts
|
|||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I. -I../include -I../../hw -I$(VLSIM_DIR)
|
||||
CXXFLAGS += -I. -I../include -I../common/ -I../../hw -I$(OPAESIM_DIR)
|
||||
|
||||
# Position independent code
|
||||
CXXFLAGS += -fPIC
|
||||
|
@ -17,10 +17,9 @@ CXXFLAGS += $(CONFIGS)
|
|||
# Dump perf stats
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
LDFLAGS += -L. -lopae-c-vlsim
|
||||
LDFLAGS += -shared -luuid -ldl -pthread
|
||||
|
||||
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
|
||||
SRCS = vortex.cpp api_init.cpp ../common/vx_utils.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
@ -32,7 +31,7 @@ endif
|
|||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SRCS += ../common/vx_scope.cpp
|
||||
SRCS += scope.cpp
|
||||
SCOPE_H = scope-defs.h
|
||||
endif
|
||||
|
||||
|
@ -46,15 +45,13 @@ PROJECT = libvortex.so
|
|||
all: $(PROJECT)
|
||||
|
||||
scope-defs.h: $(SCRIPT_DIR)/scope.json
|
||||
$(SCRIPT_DIR)/scope.py -DSIMULATION $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
|
||||
$(SCRIPT_DIR)/scope.py $(CONFIGS) -D NOGLOBALS -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
|
||||
|
||||
# generate scope data
|
||||
scope: scope-defs.h
|
||||
|
||||
$(PROJECT): $(SRCS) $(SCOPE_H)
|
||||
DESTDIR=../../runtime/vlsim $(MAKE) -C $(VLSIM_DIR) ../../runtime/vlsim/libopae-c-vlsim.so
|
||||
$(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -o $(PROJECT)
|
||||
$(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $(PROJECT)
|
||||
|
||||
clean:
|
||||
DESTDIR=../../runtime/vlsim $(MAKE) -C $(VLSIM_DIR) clean
|
||||
rm -rf libopae-c-vlsim.so $(PROJECT) *.o scope-defs.h
|
||||
rm -rf $(PROJECT) *.o scope-defs.h
|
70
runtime/opae/api_init.cpp
Normal file
70
runtime/opae/api_init.cpp
Normal file
|
@ -0,0 +1,70 @@
|
|||
#include "common.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/limits.h>
|
||||
#include <dlfcn.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include<sstream>
|
||||
|
||||
#define DEFAULT_OPAE_API_PATHS "/usr/lib64/opae/libope-c.so,/usr/lib/opae/libope-c.so,libope-c.so"
|
||||
|
||||
#define SET_API(func) \
|
||||
opae_api_funcs->func = (pfn_##func)dlsym(dl_handle, #func); \
|
||||
if (opae_api_funcs->func == nullptr) { \
|
||||
printf("dlsym failed: %s\n", dlerror()); \
|
||||
dlclose(dl_handle); \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
int api_init(opae_api_funcs_t* opae_api_funcs) {
|
||||
if (opae_api_funcs == nullptr)
|
||||
return -1;
|
||||
|
||||
const char* api_path_s = getenv("OPAE_API_PATHS");
|
||||
if (api_path_s == nullptr) {
|
||||
api_path_s = DEFAULT_OPAE_API_PATHS;
|
||||
}
|
||||
|
||||
std::vector<std::string> api_paths;
|
||||
{
|
||||
std::stringstream ss(api_path_s);
|
||||
while (ss.good()) {
|
||||
std::string path;
|
||||
getline(ss, path, ',');
|
||||
api_paths.push_back(path);
|
||||
}
|
||||
}
|
||||
|
||||
void* dl_handle = nullptr;
|
||||
for (auto& api_path : api_paths) {
|
||||
dl_handle = dlopen(api_path.c_str(), RTLD_LAZY | RTLD_LOCAL);
|
||||
if (dl_handle)
|
||||
break;
|
||||
}
|
||||
if (dl_handle == nullptr) {
|
||||
printf("dlopen failed: %s\n", dlerror());
|
||||
return -1;
|
||||
}
|
||||
|
||||
SET_API(fpgaGetProperties);
|
||||
SET_API(fpgaPropertiesSetObjectType);
|
||||
SET_API(fpgaPropertiesSetGUID);
|
||||
SET_API(fpgaDestroyProperties);
|
||||
SET_API(fpgaEnumerate);
|
||||
SET_API(fpgaDestroyToken);
|
||||
SET_API(fpgaOpen);
|
||||
SET_API(fpgaClose);
|
||||
SET_API(fpgaPrepareBuffer);
|
||||
SET_API(fpgaReleaseBuffer);
|
||||
SET_API(fpgaGetIOAddress);
|
||||
SET_API(fpgaWriteMMIO64);
|
||||
SET_API(fpgaReadMMIO64);
|
||||
SET_API(fpgaErrStr);
|
||||
|
||||
dlclose(dl_handle);
|
||||
|
||||
return 0;
|
||||
}
|
96
runtime/opae/common.h
Normal file
96
runtime/opae/common.h
Normal file
|
@ -0,0 +1,96 @@
|
|||
#pragma once
|
||||
|
||||
#include <vortex.h>
|
||||
#include <fpga.h>
|
||||
#include "vx_utils.h"
|
||||
#include "vx_malloc.h"
|
||||
|
||||
#define CHECK_HANDLE(handle, _expr, _cleanup) \
|
||||
auto handle = _expr; \
|
||||
if (handle == nullptr) { \
|
||||
printf("[VXDRV] Error: '%s' returned NULL!\n", #_expr); \
|
||||
_cleanup \
|
||||
}
|
||||
|
||||
#define CHECK_ERR(_expr, _cleanup) \
|
||||
do { \
|
||||
auto err = _expr; \
|
||||
if (err == 0) \
|
||||
break; \
|
||||
printf("[VXDRV] Error: '%s' returned %d, %s!\n", #_expr, (int)err, api.fpgaErrStr(err)); \
|
||||
_cleanup \
|
||||
} while (false)
|
||||
|
||||
typedef fpga_result (*pfn_fpgaGetProperties)(fpga_token token, fpga_properties *prop);
|
||||
typedef fpga_result (*pfn_fpgaPropertiesSetObjectType)(fpga_properties prop, fpga_objtype objtype);
|
||||
typedef fpga_result (*pfn_fpgaPropertiesSetGUID)(fpga_properties prop, fpga_guid guid);
|
||||
typedef fpga_result (*pfn_fpgaDestroyProperties)(fpga_properties *prop);
|
||||
typedef fpga_result (*pfn_fpgaEnumerate)(const fpga_properties *filters, uint32_t num_filters, fpga_token *tokens, uint32_t max_tokens, uint32_t *num_matches);
|
||||
typedef fpga_result (*pfn_fpgaDestroyToken)(fpga_token *token);
|
||||
|
||||
typedef fpga_result (*pfn_fpgaOpen)(fpga_token token, fpga_handle *handle, int flags);
|
||||
typedef fpga_result (*pfn_fpgaClose)(fpga_handle handle);
|
||||
typedef fpga_result (*pfn_fpgaPrepareBuffer)(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
typedef fpga_result (*pfn_fpgaReleaseBuffer)(fpga_handle handle, uint64_t wsid);
|
||||
typedef fpga_result (*pfn_fpgaGetIOAddress)(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr);
|
||||
typedef fpga_result (*pfn_fpgaWriteMMIO64)(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
typedef fpga_result (*pfn_fpgaReadMMIO64)(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
typedef const char *(*pfn_fpgaErrStr)(fpga_result e);
|
||||
|
||||
struct opae_api_funcs_t {
|
||||
pfn_fpgaGetProperties fpgaGetProperties;
|
||||
pfn_fpgaPropertiesSetObjectType fpgaPropertiesSetObjectType;
|
||||
pfn_fpgaPropertiesSetGUID fpgaPropertiesSetGUID;
|
||||
pfn_fpgaDestroyProperties fpgaDestroyProperties;
|
||||
pfn_fpgaEnumerate fpgaEnumerate;
|
||||
pfn_fpgaDestroyToken fpgaDestroyToken;
|
||||
|
||||
pfn_fpgaOpen fpgaOpen;
|
||||
pfn_fpgaClose fpgaClose;
|
||||
pfn_fpgaPrepareBuffer fpgaPrepareBuffer;
|
||||
pfn_fpgaReleaseBuffer fpgaReleaseBuffer;
|
||||
pfn_fpgaGetIOAddress fpgaGetIOAddress;
|
||||
pfn_fpgaWriteMMIO64 fpgaWriteMMIO64;
|
||||
pfn_fpgaReadMMIO64 fpgaReadMMIO64;
|
||||
pfn_fpgaErrStr fpgaErrStr;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device {
|
||||
public:
|
||||
vx_device()
|
||||
: mem_allocator(
|
||||
ALLOC_BASE_ADDR,
|
||||
ALLOC_BASE_ADDR + LOCAL_MEM_SIZE,
|
||||
4096,
|
||||
CACHE_BLOCK_SIZE)
|
||||
{}
|
||||
|
||||
~vx_device() {}
|
||||
|
||||
opae_api_funcs_t api;
|
||||
|
||||
fpga_handle fpga;
|
||||
vortex::MemoryAllocator mem_allocator;
|
||||
DeviceConfig dcrs;
|
||||
unsigned version;
|
||||
unsigned num_cores;
|
||||
unsigned num_warps;
|
||||
unsigned num_threads;
|
||||
uint64_t isa_caps;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef struct vx_buffer_ {
|
||||
uint64_t wsid;
|
||||
void* host_ptr;
|
||||
uint64_t io_addr;
|
||||
vx_device_h hdevice;
|
||||
uint64_t size;
|
||||
} vx_buffer_t;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int api_init(opae_api_funcs_t* opae_api_funcs);
|
|
@ -1,4 +1,5 @@
|
|||
#include "vx_scope.h"
|
||||
#include "common.h"
|
||||
#include "scope.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <thread>
|
||||
|
@ -10,20 +11,10 @@
|
|||
#include <mutex>
|
||||
#include <VX_config.h>
|
||||
#include <vortex_afu.h>
|
||||
#include <scope-defs.h>
|
||||
#include "scope-defs.h"
|
||||
|
||||
#define FRAME_FLUSH_SIZE 100
|
||||
|
||||
#define CHECK_RES(_expr) \
|
||||
do { \
|
||||
fpga_result res = _expr; \
|
||||
if (res == FPGA_OK) \
|
||||
break; \
|
||||
printf("OPAE Error: '%s' returned %d, %s!\n", \
|
||||
#_expr, (int)res, fpgaErrStr(res)); \
|
||||
return -1; \
|
||||
} while (false)
|
||||
|
||||
#define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4)
|
||||
#define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4)
|
||||
|
||||
|
@ -49,10 +40,12 @@ static constexpr int fwidth = calcFrameWidth();
|
|||
static std::thread g_timeout_thread;
|
||||
static std::mutex g_timeout_mutex;
|
||||
|
||||
static void timeout_callback(fpga_handle fpga) {
|
||||
static void timeout_callback(vx_device_h hdevice) {
|
||||
auto device = ((vx_device*)hdevice);
|
||||
auto api = device->api;
|
||||
std::this_thread::sleep_for(std::chrono::seconds{HANG_TIMEOUT});
|
||||
vx_scope_stop(fpga);
|
||||
fpgaClose(fpga);
|
||||
vx_scope_stop(hdevice);
|
||||
api.fpgaClose(device->fpga);
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
|
@ -94,42 +87,54 @@ void dump_module(std::ofstream& ofs, int parent) {
|
|||
}
|
||||
}
|
||||
|
||||
int vx_scope_start(fpga_handle hfpga, uint64_t start_time, uint64_t stop_time) {
|
||||
if (nullptr == hfpga)
|
||||
return -1;
|
||||
int vx_scope_start(vx_device_h hdevice, uint64_t start_time, uint64_t stop_time) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
auto device = ((vx_device*)hdevice);
|
||||
auto api = device->api;
|
||||
|
||||
if (stop_time != uint64_t(-1)) {
|
||||
// set stop time
|
||||
uint64_t cmd_stop = ((stop_time << 3) | CMD_SET_STOP);
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_stop), {
|
||||
return -1;
|
||||
});
|
||||
std::cout << "scope stop time: " << std::dec << stop_time << "s" << std::endl;
|
||||
}
|
||||
|
||||
// start recording
|
||||
uint64_t cmd_delay = ((start_time << 3) | CMD_SET_START);
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_delay));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_delay), {
|
||||
return -1;
|
||||
});
|
||||
std::cout << "scope start time: " << std::dec << start_time << "s" << std::endl;
|
||||
|
||||
#ifdef HANG_TIMEOUT
|
||||
g_timeout_thread = std::thread(timeout_callback, hfpga);
|
||||
g_timeout_thread = std::thread(timeout_callback, device);
|
||||
g_timeout_thread.detach();
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vx_scope_stop(fpga_handle hfpga) {
|
||||
int vx_scope_stop(vx_device_h hdevice) {
|
||||
#ifdef HANG_TIMEOUT
|
||||
if (!g_timeout_mutex.try_lock())
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
if (nullptr == hfpga)
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
auto device = ((vx_device*)hdevice);
|
||||
auto api = device->api;
|
||||
|
||||
// forced stop
|
||||
uint64_t cmd_stop = ((0 << 3) | CMD_SET_STOP);
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_stop), {
|
||||
return -1;
|
||||
});
|
||||
|
||||
std::cout << "scope trace dump begin..." << std::endl;
|
||||
|
||||
|
@ -152,17 +157,25 @@ int vx_scope_stop(fpga_handle hfpga) {
|
|||
int signal_offset = 0;
|
||||
|
||||
// wait for recording to terminate
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID), {
|
||||
return -1;
|
||||
});
|
||||
do {
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
|
||||
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &data_valid), {
|
||||
return -1;
|
||||
});
|
||||
if (data_valid)
|
||||
break;
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
} while (true);
|
||||
|
||||
// get frame width
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_WIDTH));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &frame_width));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_WIDTH), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &frame_width), {
|
||||
return -1;
|
||||
});
|
||||
std::cout << "scope::frame_width=" << std::dec << frame_width << std::endl;
|
||||
|
||||
if (fwidth != (int)frame_width) {
|
||||
|
@ -171,19 +184,31 @@ int vx_scope_stop(fpga_handle hfpga) {
|
|||
}
|
||||
|
||||
// get max frames
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_COUNT));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &max_frames));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_COUNT), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &max_frames), {
|
||||
return -1;
|
||||
});
|
||||
std::cout << "scope::max_frames=" << std::dec << max_frames << std::endl;
|
||||
|
||||
// get offset
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_OFFSET));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &offset));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_OFFSET), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &offset), {
|
||||
return -1;
|
||||
});
|
||||
|
||||
// get data
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA), {
|
||||
return -1;
|
||||
});
|
||||
|
||||
// print clock header
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta));
|
||||
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &delta), {
|
||||
return -1;
|
||||
});
|
||||
timestamp = print_clock(ofs, offset + delta + 2, timestamp);
|
||||
signal_id = num_taps;
|
||||
|
||||
|
@ -192,15 +217,23 @@ int vx_scope_stop(fpga_handle hfpga) {
|
|||
do {
|
||||
if (frame_no == (max_frames-1)) {
|
||||
// verify last frame is valid
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &data_valid), {
|
||||
return -1;
|
||||
});
|
||||
assert(data_valid == 1);
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA), {
|
||||
return -1;
|
||||
});
|
||||
}
|
||||
|
||||
// read next data words
|
||||
uint64_t word;
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &word));
|
||||
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &word), {
|
||||
return -1;
|
||||
});
|
||||
|
||||
do {
|
||||
int signal_width = scope_taps[signal_id-1].width;
|
||||
|
@ -225,7 +258,9 @@ int vx_scope_stop(fpga_handle hfpga) {
|
|||
|
||||
if (frame_no != max_frames) {
|
||||
// print clock header
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta));
|
||||
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &delta), {
|
||||
return -1;
|
||||
});
|
||||
timestamp = print_clock(ofs, delta + 1, timestamp);
|
||||
signal_id = num_taps;
|
||||
if (0 == (frame_no % FRAME_FLUSH_SIZE)) {
|
||||
|
@ -242,8 +277,12 @@ int vx_scope_stop(fpga_handle hfpga) {
|
|||
std::cout << "scope trace dump done! - " << (timestamp/2) << " cycles" << std::endl;
|
||||
|
||||
// verify data not valid
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &data_valid), {
|
||||
return -1;
|
||||
});
|
||||
assert(data_valid == 0);
|
||||
|
||||
return 0;
|
13
runtime/opae/scope.h
Normal file
13
runtime/opae/scope.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include <vortex.h>
|
||||
|
||||
#if defined(USE_FPGA)
|
||||
#define HANG_TIMEOUT 60
|
||||
#else
|
||||
#define HANG_TIMEOUT (30*60)
|
||||
#endif
|
||||
|
||||
int vx_scope_start(vx_device_h hdevice, uint64_t start_time = 0, uint64_t stop_time = -1);
|
||||
|
||||
int vx_scope_stop(vx_device_h hdevice);
|
|
@ -1,8 +1,10 @@
|
|||
#include <stdint.h>
|
||||
#include "common.h"
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <uuid/uuid.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <cmath>
|
||||
|
@ -10,34 +12,14 @@
|
|||
#include <unordered_map>
|
||||
#include <list>
|
||||
|
||||
#if defined(USE_FPGA) || defined(USE_ASE)
|
||||
#include <opae/fpga.h>
|
||||
#include <uuid/uuid.h>
|
||||
#elif defined(USE_VLSIM)
|
||||
#include <fpga.h>
|
||||
#endif
|
||||
|
||||
#include "vx_utils.h"
|
||||
#include "vx_malloc.h"
|
||||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
#include <VX_types.h>
|
||||
#include "vortex_afu.h"
|
||||
|
||||
#ifdef SCOPE
|
||||
#include "vx_scope.h"
|
||||
#include "scope.h"
|
||||
#endif
|
||||
|
||||
#define CHECK_RES(_expr) \
|
||||
do { \
|
||||
fpga_result res = _expr; \
|
||||
if (res == FPGA_OK) \
|
||||
break; \
|
||||
printf("[VXDRV] Error: '%s' returned %d, %s!\n", \
|
||||
#_expr, (int)res, fpgaErrStr(res)); \
|
||||
return -1; \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define CMD_MEM_READ AFU_IMAGE_CMD_MEM_READ
|
||||
|
@ -57,45 +39,11 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device {
|
||||
public:
|
||||
vx_device()
|
||||
: mem_allocator(
|
||||
ALLOC_BASE_ADDR,
|
||||
ALLOC_BASE_ADDR + LOCAL_MEM_SIZE,
|
||||
4096,
|
||||
CACHE_BLOCK_SIZE)
|
||||
{}
|
||||
|
||||
~vx_device() {}
|
||||
|
||||
fpga_handle fpga;
|
||||
vortex::MemoryAllocator mem_allocator;
|
||||
DeviceConfig dcrs;
|
||||
unsigned version;
|
||||
unsigned num_cores;
|
||||
unsigned num_warps;
|
||||
unsigned num_threads;
|
||||
uint64_t isa_caps;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef struct vx_buffer_ {
|
||||
uint64_t wsid;
|
||||
void* host_ptr;
|
||||
uint64_t io_addr;
|
||||
vx_device_h hdevice;
|
||||
uint64_t size;
|
||||
} vx_buffer_t;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
auto device = ((vx_device*)hdevice);
|
||||
|
||||
switch (caps_id) {
|
||||
case VX_CAPS_VERSION:
|
||||
|
@ -138,91 +86,92 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
fpga_handle accel_handle;
|
||||
vx_device* device;
|
||||
vx_device* device;
|
||||
|
||||
#ifndef USE_VLSIM
|
||||
fpga_result res;
|
||||
fpga_handle accel_handle;
|
||||
fpga_token accel_token;
|
||||
fpga_properties filter = nullptr;
|
||||
fpga_properties filter;
|
||||
fpga_guid guid;
|
||||
|
||||
uint32_t num_matches;
|
||||
|
||||
opae_api_funcs_t api;
|
||||
memset(&api, 0, sizeof(opae_api_funcs_t));
|
||||
if (api_init(&api) !=0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Set up a filter that will search for an accelerator
|
||||
CHECK_RES(fpgaGetProperties(nullptr, &filter));
|
||||
res = fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
|
||||
if (res != FPGA_OK) {
|
||||
fprintf(stderr, "[VXDRV] Error: fpgaGetProperties() returned %d, %s!\n", (int)res, fpgaErrStr(res));
|
||||
fpgaDestroyProperties(&filter);
|
||||
CHECK_ERR(api.fpgaGetProperties(nullptr, &filter), {
|
||||
return -1;
|
||||
}
|
||||
});
|
||||
|
||||
CHECK_ERR(api.fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR), {
|
||||
api.fpgaDestroyProperties(&filter);
|
||||
return -1;
|
||||
});
|
||||
|
||||
// Add the desired UUID to the filter
|
||||
uuid_parse(AFU_ACCEL_UUID, guid);
|
||||
res = fpgaPropertiesSetGUID(filter, guid);
|
||||
if (res != FPGA_OK) {
|
||||
fprintf(stderr, "[VXDRV] Error: fpgaPropertiesSetGUID() returned %d, %s!\n", (int)res, fpgaErrStr(res));
|
||||
fpgaDestroyProperties(&filter);
|
||||
uuid_parse(AFU_ACCEL_UUID, guid);
|
||||
CHECK_ERR(api.fpgaPropertiesSetGUID(filter, guid), {
|
||||
api.fpgaDestroyProperties(&filter);
|
||||
return -1;
|
||||
}
|
||||
});
|
||||
|
||||
// Do the search across the available FPGA contexts
|
||||
num_matches = 1;
|
||||
res = fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
|
||||
if (res != FPGA_OK) {
|
||||
fprintf(stderr, "[VXDRV] Error: fpgaEnumerate() returned %d, %s!\n", (int)res, fpgaErrStr(res));
|
||||
fpgaDestroyProperties(&filter);
|
||||
CHECK_ERR(api.fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches), {
|
||||
api.fpgaDestroyProperties(&filter);
|
||||
return -1;
|
||||
}
|
||||
});
|
||||
|
||||
// Not needed anymore
|
||||
fpgaDestroyProperties(&filter);
|
||||
CHECK_ERR(api.fpgaDestroyProperties(&filter), {
|
||||
api.fpgaDestroyToken(&accel_token);
|
||||
return -1;
|
||||
});
|
||||
|
||||
if (num_matches < 1) {
|
||||
fprintf(stderr, "[VXDRV] Error: accelerator %s not found!\n", AFU_ACCEL_UUID);
|
||||
fpgaDestroyToken(&accel_token);
|
||||
api.fpgaDestroyToken(&accel_token);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Open accelerator
|
||||
res = fpgaOpen(accel_token, &accel_handle, 0);
|
||||
if (res != FPGA_OK) {
|
||||
fprintf(stderr, "[VXDRV] Error: fpgaOpen() returned %d, %s!\n", (int)res, fpgaErrStr(res));
|
||||
fpgaDestroyToken(&accel_token);
|
||||
CHECK_ERR(api.fpgaOpen(accel_token, &accel_handle, 0), {
|
||||
api.fpgaDestroyToken(&accel_token);
|
||||
return -1;
|
||||
}
|
||||
});
|
||||
|
||||
// Done with token
|
||||
fpgaDestroyToken(&accel_token);
|
||||
#else
|
||||
// Open accelerator
|
||||
CHECK_RES(fpgaOpen(NULL, &accel_handle, 0));
|
||||
#endif
|
||||
CHECK_ERR(api.fpgaDestroyToken(&accel_token), {
|
||||
api.fpgaClose(accel_handle);
|
||||
return -1;
|
||||
});
|
||||
|
||||
// allocate device object
|
||||
device = new vx_device();
|
||||
if (nullptr == device) {
|
||||
fpgaClose(accel_handle);
|
||||
api.fpgaClose(accel_handle);
|
||||
return -1;
|
||||
}
|
||||
|
||||
device->fpga = accel_handle;
|
||||
|
||||
device->api = api;
|
||||
device->fpga = accel_handle;
|
||||
|
||||
{
|
||||
// Load ISA CAPS
|
||||
int ret = fpgaReadMMIO64(device->fpga, 0, MMIO_ISA_CAPS, &device->isa_caps);
|
||||
if (ret != FPGA_OK) {
|
||||
fpgaClose(accel_handle);
|
||||
return ret;
|
||||
}
|
||||
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_ISA_CAPS, &device->isa_caps), {
|
||||
api.fpgaClose(accel_handle);
|
||||
return -1;
|
||||
});
|
||||
|
||||
// Load device CAPS
|
||||
uint64_t dev_caps;
|
||||
ret = fpgaReadMMIO64(device->fpga, 0, MMIO_DEV_CAPS, &dev_caps);
|
||||
if (ret != FPGA_OK) {
|
||||
fpgaClose(accel_handle);
|
||||
return ret;
|
||||
}
|
||||
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_DEV_CAPS, &dev_caps), {
|
||||
api.fpgaClose(accel_handle);
|
||||
return -1;
|
||||
});
|
||||
|
||||
device->version = (dev_caps >> 0) & 0xffff;
|
||||
device->num_cores = (dev_caps >> 16) & 0xffff;
|
||||
device->num_warps = (dev_caps >> 32) & 0xffff;
|
||||
|
@ -235,9 +184,9 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||
|
||||
#ifdef SCOPE
|
||||
{
|
||||
int ret = vx_scope_start(accel_handle, 0, -1);
|
||||
int ret = vx_scope_start(device, 0, -1);
|
||||
if (ret != 0) {
|
||||
fpgaClose(accel_handle);
|
||||
api.fpgaClose(accel_handle);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
@ -262,17 +211,18 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
auto device = ((vx_device*)hdevice);
|
||||
auto api = device->api;
|
||||
|
||||
#ifdef SCOPE
|
||||
vx_scope_stop(device->fpga);
|
||||
vx_scope_stop(hdevice);
|
||||
#endif
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
perf_remove_device(hdevice);
|
||||
#endif
|
||||
|
||||
fpgaClose(device->fpga);
|
||||
api.fpgaClose(device->fpga);
|
||||
|
||||
delete device;
|
||||
|
||||
|
@ -285,7 +235,7 @@ extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr)
|
|||
|| 0 == size)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
auto device = ((vx_device*)hdevice);
|
||||
return device->mem_allocator.allocate(size, dev_maddr);
|
||||
}
|
||||
|
||||
|
@ -293,12 +243,11 @@ extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr) {
|
|||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
auto device = ((vx_device*)hdevice);
|
||||
return device->mem_allocator.release(dev_maddr);
|
||||
}
|
||||
|
||||
extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
|
||||
fpga_result res;
|
||||
void* host_ptr;
|
||||
uint64_t wsid;
|
||||
uint64_t io_addr;
|
||||
|
@ -309,26 +258,25 @@ extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer
|
|||
|| nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
auto device = ((vx_device*)hdevice);
|
||||
auto api = device->api;
|
||||
|
||||
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
|
||||
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
|
||||
if (FPGA_OK != res) {
|
||||
CHECK_ERR(api.fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0), {
|
||||
return -1;
|
||||
}
|
||||
});
|
||||
|
||||
// Get the physical address of the buffer in the accelerator
|
||||
res = fpgaGetIOAddress(device->fpga, wsid, &io_addr);
|
||||
if (FPGA_OK != res) {
|
||||
fpgaReleaseBuffer(device->fpga, wsid);
|
||||
CHECK_ERR(api.fpgaGetIOAddress(device->fpga, wsid, &io_addr), {
|
||||
api.fpgaReleaseBuffer(device->fpga, wsid);
|
||||
return -1;
|
||||
}
|
||||
});
|
||||
|
||||
// allocate buffer object
|
||||
buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t));
|
||||
if (nullptr == buffer) {
|
||||
fpgaReleaseBuffer(device->fpga, wsid);
|
||||
api.fpgaReleaseBuffer(device->fpga, wsid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -347,7 +295,7 @@ extern void* vx_host_ptr(vx_buffer_h hbuffer) {
|
|||
if (nullptr == hbuffer)
|
||||
return nullptr;
|
||||
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
auto buffer = ((vx_buffer_t*)hbuffer);
|
||||
return buffer->host_ptr;
|
||||
}
|
||||
|
||||
|
@ -355,10 +303,11 @@ extern int vx_buf_free(vx_buffer_h hbuffer) {
|
|||
if (nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device *device = ((vx_device*)buffer->hdevice);
|
||||
auto buffer = ((vx_buffer_t*)hbuffer);
|
||||
auto device = ((vx_device*)buffer->hdevice);
|
||||
auto api = device->api;
|
||||
|
||||
fpgaReleaseBuffer(device->fpga, buffer->wsid);
|
||||
api.fpgaReleaseBuffer(device->fpga, buffer->wsid);
|
||||
|
||||
free(buffer);
|
||||
|
||||
|
@ -371,24 +320,22 @@ extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
|
|||
|
||||
std::unordered_map<uint32_t, std::stringstream> print_bufs;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
auto device = ((vx_device*)hdevice);
|
||||
auto api = device->api;
|
||||
|
||||
struct timespec sleep_time;
|
||||
|
||||
#if defined(USE_ASE)
|
||||
sleep_time.tv_sec = 1;
|
||||
sleep_time.tv_nsec = 0;
|
||||
#else
|
||||
sleep_time.tv_sec = 0;
|
||||
sleep_time.tv_nsec = 1000000;
|
||||
#endif
|
||||
|
||||
// to milliseconds
|
||||
uint64_t sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
|
||||
|
||||
for (;;) {
|
||||
uint64_t status;
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status));
|
||||
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status), {
|
||||
return -1;
|
||||
});
|
||||
|
||||
// check for console data
|
||||
uint32_t cout_data = status >> STATUS_STATE_BITS;
|
||||
|
@ -403,7 +350,9 @@ extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
|
|||
std::cout << std::dec << "#" << cout_tid << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status));
|
||||
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status), {
|
||||
return -1;
|
||||
});
|
||||
cout_data = status >> STATUS_STATE_BITS;
|
||||
} while (cout_data & 0x1);
|
||||
}
|
||||
|
@ -435,8 +384,9 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size
|
|||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device *device = ((vx_device*)buffer->hdevice);
|
||||
auto buffer = ((vx_buffer_t*)hbuffer);
|
||||
auto device = ((vx_device*)buffer->hdevice);
|
||||
auto api = device->api;
|
||||
|
||||
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
|
@ -459,10 +409,18 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size
|
|||
|
||||
auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE);
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG0, (buffer->io_addr + src_offset) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG1, dev_maddr >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG2, asize >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_WRITE));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG0, (buffer->io_addr + src_offset) >> ls_shift), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG1, dev_maddr >> ls_shift), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG2, asize >> ls_shift), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_WRITE), {
|
||||
return -1;
|
||||
});
|
||||
|
||||
// Wait for the write operation to finish
|
||||
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
|
||||
|
@ -476,8 +434,9 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t si
|
|||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device *device = ((vx_device*)buffer->hdevice);
|
||||
auto buffer = ((vx_buffer_t*)hbuffer);
|
||||
auto device = ((vx_device*)buffer->hdevice);
|
||||
auto api = device->api;
|
||||
|
||||
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
|
@ -500,10 +459,18 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t si
|
|||
|
||||
auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE);
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG0, (buffer->io_addr + dest_offset) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG1, dev_maddr >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG2, asize >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_READ));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG0, (buffer->io_addr + dest_offset) >> ls_shift), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG1, dev_maddr >> ls_shift), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG2, asize >> ls_shift), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_READ), {
|
||||
return -1;
|
||||
});
|
||||
|
||||
// Wait for the write operation to finish
|
||||
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
|
||||
|
@ -516,14 +483,17 @@ extern int vx_start(vx_device_h hdevice) {
|
|||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
auto device = ((vx_device*)hdevice);
|
||||
auto api = device->api;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, MAX_TIMEOUT) != 0)
|
||||
return -1;
|
||||
|
||||
// start execution
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN), {
|
||||
return -1;
|
||||
});
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -532,16 +502,23 @@ extern int vx_dcr_write(vx_device_h hdevice, uint32_t addr, uint64_t value) {
|
|||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
auto device = ((vx_device*)hdevice);
|
||||
auto api = device->api;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
// write DCR value
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG0, addr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG1, value));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_DCR_WRITE));
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG0, addr), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_ARG1, value), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_DCR_WRITE), {
|
||||
return -1;
|
||||
});
|
||||
|
||||
// save the value
|
||||
device->dcrs.write(addr, value);
|
|
@ -12,7 +12,7 @@ CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include
|
|||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)
|
||||
|
||||
LDFLAGS += -shared ../$(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L../$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
LDFLAGS += -L../$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread
|
||||
|
||||
# control RTL debug tracing states
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_PIPELINE
|
||||
|
|
|
@ -8,10 +8,49 @@
|
|||
#include "fpga.h"
|
||||
#include "opae_sim.h"
|
||||
#include <VX_config.h>
|
||||
#include <util.h>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern fpga_result fpgaGetProperties(fpga_token token, fpga_properties *prop) {
|
||||
__unused (token, prop);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPropertiesSetObjectType(fpga_properties prop, fpga_objtype objtype) {
|
||||
__unused (prop, objtype);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPropertiesSetGUID(fpga_properties prop, fpga_guid guid) {
|
||||
__unused (prop, guid);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaDestroyProperties(fpga_properties *prop) {
|
||||
__unused (prop);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaEnumerate(const fpga_properties *filters, uint32_t num_filters, fpga_token *tokens, uint32_t max_tokens, uint32_t *num_matches) {
|
||||
__unused (filters, num_filters, num_filters, tokens, max_tokens);
|
||||
if (num_matches) {
|
||||
*num_matches = 1;
|
||||
}
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaDestroyToken(fpga_token *token) {
|
||||
__unused (token);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags) {
|
||||
__unused (token);
|
||||
if (NULL == handle || flags != 0)
|
||||
return FPGA_INVALID_PARAM;
|
||||
auto sim = new opae_sim();
|
||||
|
@ -83,4 +122,8 @@ extern fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_
|
|||
|
||||
extern const char *fpgaErrStr(fpga_result e) {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef __FPGA_H__
|
||||
#define __FPGA_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -21,28 +21,21 @@ typedef enum {
|
|||
FPGA_RECONF_ERROR /**< Error while reconfiguring FPGA */
|
||||
} fpga_result;
|
||||
|
||||
typedef enum {
|
||||
FPGA_DEVICE = 0,
|
||||
FPGA_ACCELERATOR
|
||||
} fpga_objtype;
|
||||
|
||||
typedef void *fpga_handle;
|
||||
|
||||
typedef void *fpga_token;
|
||||
|
||||
fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags);
|
||||
typedef void *fpga_properties;
|
||||
|
||||
fpga_result fpgaClose(fpga_handle handle);
|
||||
|
||||
fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
|
||||
fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid);
|
||||
|
||||
fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr);
|
||||
|
||||
fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
|
||||
fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
|
||||
const char *fpgaErrStr(fpga_result e);
|
||||
typedef uint8_t fpga_guid[16];
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __FPGA_H__
|
||||
|
|
|
@ -48,6 +48,13 @@ else
|
|||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), asesim)
|
||||
OPAE_API_PATHS=libopae-c-ase.so
|
||||
endif
|
||||
ifeq ($(TARGET), opaesim)
|
||||
OPAE_API_PATHS=/nethome/btine3/dev/vortex-gfx/sim/opaesim/libopae-c-sim.so
|
||||
endif
|
||||
|
||||
all: $(PROJECT) kernel.bin kernel.dump
|
||||
|
||||
kernel.dump: kernel.elf
|
||||
|
@ -65,14 +72,8 @@ $(PROJECT): $(SRCS)
|
|||
run-simx: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-fpga: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/fpga:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-asesim: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/asesim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-vlsim: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
run-opae: $(PROJECT) kernel.bin
|
||||
OPAE_API_PATHS=$(OPAE_API_PATHS) LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue