runtime refactoring

This commit is contained in:
Blaise Tine 2024-05-27 15:59:41 -07:00
parent 405d6b468f
commit c1000f6a3b
13 changed files with 317 additions and 484 deletions

77
runtime/common/common.h Normal file
View file

@ -0,0 +1,77 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vortex.h>
#include <VX_config.h>
#include <VX_types.h>
#include <callbacks.h>
#include <malloc.h>
#include <cstdint>
#include <unordered_map>
#define CACHE_BLOCK_SIZE 64
#define RAM_PAGE_SIZE 4096
#define ALLOC_BASE_ADDR CACHE_BLOCK_SIZE
#if (XLEN == 64)
#define GLOBAL_MEM_SIZE 0x200000000 // 8 GB
#else
#define GLOBAL_MEM_SIZE 0x100000000 // 4 GB
#endif
#ifndef NDEBUG
#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0)
#else
#define DBGPRINT(format, ...) ((void)0)
#endif
#define CHECK_ERR(_expr, _cleanup) \
do { \
auto err = _expr; \
if (err == 0) \
break; \
printf("[VXDRV] Error: '%s' returned %d!\n", #_expr, (int)err); \
_cleanup \
} while (false)
class DeviceConfig {
public:
void write(uint32_t addr, uint32_t value) {
store_[addr] = value;
}
int read(uint32_t addr, uint32_t* value) const {
auto it = store_.find(addr);
if (it == store_.end())
return -1;
*value = it->second;
return 0;
}
private:
std::unordered_map<uint32_t, uint32_t> store_;
};
inline uint64_t aligned_size(uint64_t size, uint64_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
inline bool is_aligned(uint64_t addr, uint64_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return 0 == (addr & (alignment - 1));
}

View file

@ -1,50 +0,0 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vortex.h>
#include <cstdint>
#include <unordered_map>
#include <VX_config.h>
#include <VX_types.h>
class DeviceConfig {
public:
void write(uint32_t addr, uint32_t value);
int read(uint32_t addr, uint32_t* value) const;
private:
std::unordered_map<uint32_t, uint32_t> store_;
};
int dcr_initialize(vx_device_h device);
uint64_t aligned_size(uint64_t size, uint64_t alignment);
bool is_aligned(uint64_t addr, uint64_t alignment);
int profiling_add(vx_device_h device);
void profiling_remove(int id);
void profiling_begin(int id);
void profiling_end(int id);
#define CACHE_BLOCK_SIZE 64
#define ALLOC_BASE_ADDR CACHE_BLOCK_SIZE
#if (XLEN == 64)
#define GLOBAL_MEM_SIZE 0x200000000 // 8 GB
#else
#define GLOBAL_MEM_SIZE 0x100000000 // 4 GB
#endif

View file

@ -20,7 +20,7 @@ CXXFLAGS += $(CONFIGS)
LDFLAGS += -shared -luuid -ldl -pthread
SRCS = $(SRC_DIR)/vortex.cpp $(SRC_DIR)/driver.cpp $(COMMON_DIR)/utils.cpp
SRCS = $(SRC_DIR)/vortex.cpp $(SRC_DIR)/driver.cpp
# set up target types
ifeq ($(TARGET), opaesim)
@ -49,11 +49,6 @@ ifdef SCOPE
SRCS += $(COMMON_DIR)/scope.cpp
endif
# Enable perf counters
ifdef PERF
CXXFLAGS += -DPERF_ENABLE
endif
PROJECT := libvortex-opae.so
all: $(DESTDIR)/$(PROJECT)

View file

@ -11,9 +11,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <utils.h>
#include <malloc.h>
#include <common.h>
#include <vortex_afu.h>
#include "driver.h"
#ifdef SCOPE
#include "scope.h"
#endif
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
@ -29,16 +34,6 @@
#include <memory>
#include <list>
#include <VX_config.h>
#include <VX_types.h>
#include <vortex_afu.h>
#ifdef SCOPE
#include "scope.h"
#endif
#include <callbacks.h>
using namespace vortex;
#define CMD_MEM_READ AFU_IMAGE_CMD_MEM_READ
@ -58,14 +53,6 @@ using namespace vortex;
#define STATUS_STATE_BITS 8
#define RAM_PAGE_SIZE 4096
#ifndef NDEBUG
#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0)
#else
#define DBGPRINT(format, ...) ((void)0)
#endif
#define CHECK_HANDLE(handle, _expr, _cleanup) \
auto handle = _expr; \
if (handle == nullptr) { \
@ -82,15 +69,6 @@ using namespace vortex;
_cleanup \
} while (false)
#define CHECK_ERR(_expr, _cleanup) \
do { \
auto err = _expr; \
if (err == 0) \
break; \
printf("[VXDRV] Error: '%s' returned %d!\n", #_expr, (int)err); \
_cleanup \
} while (false)
///////////////////////////////////////////////////////////////////////////////
class vx_device {
@ -113,8 +91,6 @@ public:
}
api_.fpgaClose(fpga_);
}
profiling_remove(profiling_id_);
}
int init() {
@ -211,12 +187,6 @@ public:
}
#endif
CHECK_ERR(dcr_initialize(this), {
return err;
});
profiling_id_ = profiling_add(this);
return 0;
}
@ -406,8 +376,6 @@ public:
return err;
});
profiling_begin(profiling_id_);
// start execution
CHECK_FPGA_ERR(api_.fpgaWriteMMIO64(fpga_, 0, MMIO_CMD_TYPE, CMD_RUN), {
return -1;
@ -475,8 +443,6 @@ public:
timeout -= sleep_time_ms;
};
profiling_end(profiling_id_);
return 0;
}
@ -553,7 +519,6 @@ private:
uint8_t* staging_ptr_;
uint64_t staging_size_;
std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_;
int profiling_id_;
};
struct vx_buffer {

View file

@ -17,7 +17,7 @@ CXXFLAGS += $(CONFIGS)
LDFLAGS += -shared -pthread
LDFLAGS += -L$(DESTDIR) -lrtlsim
SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp
SRCS := $(SRC_DIR)/vortex.cpp
# Debugigng
ifdef DEBUG
@ -26,11 +26,6 @@ else
CXXFLAGS += -O2 -DNDEBUG
endif
# Enable perf counters
ifdef PERF
CXXFLAGS += -DPERF_ENABLE
endif
PROJECT := libvortex-rtlsim.so
all: $(DESTDIR)/$(PROJECT)

View file

@ -11,6 +11,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <common.h>
#include <mem.h>
#include <util.h>
#include <processor.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@ -20,39 +26,8 @@
#include <list>
#include <chrono>
#include <vortex.h>
#include <malloc.h>
#include <utils.h>
#include <VX_config.h>
#include <VX_types.h>
#include <mem.h>
#include <util.h>
#include <processor.h>
#include <callbacks.h>
using namespace vortex;
#define RAM_PAGE_SIZE 4096
#ifndef NDEBUG
#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0)
#else
#define DBGPRINT(format, ...) ((void)0)
#endif
#define CHECK_ERR(_expr, _cleanup) \
do { \
auto err = _expr; \
if (err == 0) \
break; \
printf("[VXDRV] Error: '%s' returned %d!\n", #_expr, (int)err); \
_cleanup \
} while (false)
///////////////////////////////////////////////////////////////////////////////
class vx_device {
public:
vx_device()
@ -66,14 +41,9 @@ public:
if (future_.valid()) {
future_.wait();
}
profiling_remove(profiling_id_);
}
int init() {
CHECK_ERR(dcr_initialize(this), {
return err;
});
profiling_id_ = profiling_add(this);
return 0;
}
@ -219,8 +189,6 @@ public:
this->dcr_write(VX_DCR_BASE_STARTUP_ARG0, args_addr & 0xffffffff);
this->dcr_write(VX_DCR_BASE_STARTUP_ARG1, args_addr >> 32);
profiling_begin(profiling_id_);
// start new run
future_ = std::async(std::launch::async, [&]{
processor_.run();
@ -245,7 +213,6 @@ public:
if (0 == timeout_sec--)
return -1;
}
profiling_end(profiling_id_);
return 0;
}
@ -284,7 +251,6 @@ private:
DeviceConfig dcrs_;
std::future<void> future_;
std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_;
int profiling_id_;
};
struct vx_buffer {

View file

@ -13,7 +13,7 @@ CXXFLAGS += -DXLEN_$(XLEN)
LDFLAGS += -shared -pthread
LDFLAGS += -L$(DESTDIR) -lsimx
SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp
SRCS := $(SRC_DIR)/vortex.cpp
# Debugigng
ifdef DEBUG

View file

@ -11,6 +11,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <common.h>
#include <util.h>
#include <processor.h>
#include <arch.h>
#include <mem.h>
#include <constants.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@ -19,40 +27,8 @@
#include <future>
#include <chrono>
#include <utils.h>
#include <malloc.h>
#include <VX_config.h>
#include <VX_types.h>
#include <util.h>
#include <processor.h>
#include <arch.h>
#include <mem.h>
#include <constants.h>
#include <callbacks.h>
using namespace vortex;
#ifndef NDEBUG
#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0)
#else
#define DBGPRINT(format, ...) ((void)0)
#endif
#define CHECK_ERR(_expr, _cleanup) \
do { \
auto err = _expr; \
if (err == 0) \
break; \
printf("[VXDRV] Error: '%s' returned %d!\n", #_expr, (int)err); \
_cleanup \
} while (false)
///////////////////////////////////////////////////////////////////////////////
class vx_device {
public:
vx_device()
@ -69,14 +45,9 @@ public:
if (future_.valid()) {
future_.wait();
}
profiling_remove(profiling_id_);
}
int init() {
CHECK_ERR(dcr_initialize(this), {
return err;
});
profiling_id_ = profiling_add(this);
return 0;
}
@ -213,8 +184,6 @@ public:
this->dcr_write(VX_DCR_BASE_STARTUP_ARG0, args_addr & 0xffffffff);
this->dcr_write(VX_DCR_BASE_STARTUP_ARG1, args_addr >> 32);
profiling_begin(profiling_id_);
// start new run
future_ = std::async(std::launch::async, [&]{
processor_.run();
@ -239,7 +208,6 @@ public:
if (0 == timeout_sec--)
return -1;
}
profiling_end(profiling_id_);
return 0;
}
@ -278,7 +246,6 @@ private:
DeviceConfig dcrs_;
std::future<void> future_;
std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_;
int profiling_id_;
};
struct vx_buffer {

View file

@ -4,13 +4,20 @@ DESTDIR ?= $(CURDIR)/..
SRC_DIR := $(VORTEX_HOME)/runtime/stub
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(SIM_DIR)/common
CXXFLAGS += -fPIC
LDFLAGS += -shared -pthread -ldl
SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp
SRCS := $(SRC_DIR)/vortex.cpp $(SRC_DIR)/utils.cpp
# Debugigng
ifdef DEBUG
CXXFLAGS += -g -O0
else
CXXFLAGS += -O2 -DNDEBUG
endif
PROJECT := libvortex.so

View file

@ -11,7 +11,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "utils.h"
#include <common.h>
#include <iostream>
#include <fstream>
#include <list>
@ -21,129 +22,30 @@
#include <vortex.h>
#include <assert.h>
#define RT_CHECK(_expr, _cleanup) \
do { \
int _ret = _expr; \
if (0 == _ret) \
break; \
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
_cleanup \
} while (false)
uint64_t aligned_size(uint64_t size, uint64_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
bool is_aligned(uint64_t addr, uint64_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return 0 == (addr & (alignment - 1));
}
///////////////////////////////////////////////////////////////////////////////
class AutoPerfDump {
class ProfilingMode {
public:
AutoPerfDump() : perf_class_(0) {
ProfilingMode() : perf_class_(0) {
auto profiling_s = getenv("VORTEX_PROFILING");
if (profiling_s) {
perf_class_ = std::atoi(profiling_s);
}
}
~AutoPerfDump() {}
int add(vx_device_h hdevice) {
int ret = devices_.size();
devices_[ret] = hdevice;
return ret;
}
void remove(int id) {
devices_.erase(id);
}
void begin(int id) {
auto device = devices_.at(id);
vx_dcr_write(device, VX_DCR_BASE_MPM_CLASS, perf_class_);
}
void end(int id) {
auto device = devices_.at(id);
vx_dump_perf(device, stdout);
}
~ProfilingMode() {}
int perf_class() const {
return perf_class_;
}
private:
std::unordered_map<int, vx_device_h> devices_;
int perf_class_;
};
static AutoPerfDump gAutoPerfDump;
int profiling_add(vx_device_h hdevice) {
return gAutoPerfDump.add(hdevice);
int get_profiling_mode() {
static ProfilingMode gProfilingMode;
return gProfilingMode.perf_class();
}
void profiling_remove(int id) {
gAutoPerfDump.remove(id);
}
void profiling_begin(int id) {
gAutoPerfDump.begin(id);
}
void profiling_end(int id) {
gAutoPerfDump.end(id);
}
///////////////////////////////////////////////////////////////////////////////
void DeviceConfig::write(uint32_t addr, uint32_t value) {
store_[addr] = value;
}
int DeviceConfig::read(uint32_t addr, uint32_t* value) const {
auto it = store_.find(addr);
if (it == store_.end())
return -1;
*value = it->second;
return 0;
}
///////////////////////////////////////////////////////////////////////////////
int dcr_initialize(vx_device_h hdevice) {
const uint64_t startup_addr(STARTUP_ADDR);
RT_CHECK(vx_dcr_write(hdevice, VX_DCR_BASE_STARTUP_ADDR0, startup_addr & 0xffffffff), {
return _ret;
});
RT_CHECK(vx_dcr_write(hdevice, VX_DCR_BASE_STARTUP_ADDR1, startup_addr >> 32), {
return _ret;
});
RT_CHECK(vx_dcr_write(hdevice, VX_DCR_BASE_STARTUP_ARG0, 0), {
return _ret;
});
RT_CHECK(vx_dcr_write(hdevice, VX_DCR_BASE_STARTUP_ARG1, 0), {
return _ret;
});
RT_CHECK(vx_dcr_write(hdevice, VX_DCR_BASE_MPM_CLASS, 0), {
return _ret;
});
return 0;
}
///////////////////////////////////////////////////////////////////////////////
extern int vx_upload_kernel_bytes(vx_device_h hdevice, const void* content, uint64_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice || nullptr == content || size <= 8 || nullptr == hbuffer)
return -1;
@ -157,30 +59,30 @@ extern int vx_upload_kernel_bytes(vx_device_h hdevice, const void* content, uint
vx_buffer_h _hbuffer;
#ifndef NDEBUG
RT_CHECK(vx_mem_reserve(hdevice, min_vma, runtime_size, 0, &_hbuffer), {
return _ret;
CHECK_ERR(vx_mem_reserve(hdevice, min_vma, runtime_size, 0, &_hbuffer), {
return err;
});
#else
RT_CHECK(vx_mem_alloc(hdevice, runtime_size, 0, &_hbuffer), {
return _ret;
CHECK_ERR(vx_mem_alloc(hdevice, runtime_size, 0, &_hbuffer), {
return err;
});
#endif
// mask binary region as read-only
RT_CHECK(vx_mem_access(_hbuffer, 0, bin_size, VX_MEM_READ), {
CHECK_ERR(vx_mem_access(_hbuffer, 0, bin_size, VX_MEM_READ), {
vx_mem_free(_hbuffer);
return _ret;
return err;
});
// mark global variables region as read-write
RT_CHECK(vx_mem_access(_hbuffer, bin_size, runtime_size - bin_size, VX_MEM_READ_WRITE), {
CHECK_ERR(vx_mem_access(_hbuffer, bin_size, runtime_size - bin_size, VX_MEM_READ_WRITE), {
vx_mem_free(_hbuffer);
return _ret;
return err;
});
RT_CHECK(vx_copy_to_dev(_hbuffer, bytes, 0, bin_size), {
CHECK_ERR(vx_copy_to_dev(_hbuffer, bytes, 0, bin_size), {
vx_mem_free(_hbuffer);
return _ret;
return err;
});
*hbuffer = _hbuffer;
@ -206,8 +108,8 @@ extern int vx_upload_kernel_file(vx_device_h hdevice, const char* filename, vx_b
ifs.read(content.data(), size);
// upload buffer
RT_CHECK(vx_upload_kernel_bytes(hdevice, content.data(), size, hbuffer), {
return _ret;
CHECK_ERR(vx_upload_kernel_bytes(hdevice, content.data(), size, hbuffer), {
return err;
});
return 0;
@ -219,13 +121,13 @@ extern int vx_upload_bytes(vx_device_h hdevice, const void* content, uint64_t si
vx_buffer_h _hbuffer;
RT_CHECK(vx_mem_alloc(hdevice, size, VX_MEM_READ, &_hbuffer), {
return _ret;
CHECK_ERR(vx_mem_alloc(hdevice, size, VX_MEM_READ, &_hbuffer), {
return err;
});
RT_CHECK(vx_copy_to_dev(_hbuffer, content, 0, size), {
CHECK_ERR(vx_copy_to_dev(_hbuffer, content, 0, size), {
vx_mem_free(_hbuffer);
return _ret;
return err;
});
*hbuffer = _hbuffer;
@ -251,8 +153,8 @@ extern int vx_upload_file(vx_device_h hdevice, const char* filename, vx_buffer_h
ifs.read(content.data(), size);
// upload buffer
RT_CHECK(vx_upload_bytes(hdevice, content.data(), size, hbuffer), {
return _ret;
CHECK_ERR(vx_upload_bytes(hdevice, content.data(), size, hbuffer), {
return err;
});
return 0;
@ -265,8 +167,6 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
uint64_t total_cycles = 0;
uint64_t max_cycles = 0;
#ifdef PERF_ENABLE
auto calcRatio = [&](uint64_t part, uint64_t total)->int {
if (total == 0)
return 0;
@ -283,8 +183,6 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
return int(caclAverage(part, total) * 100);
};
auto perf_class = gAutoPerfDump.perf_class();
// PERF: pipeline stalls
uint64_t sched_idles = 0;
uint64_t sched_stalls = 0;
@ -319,45 +217,44 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
uint64_t mem_reads = 0;
uint64_t mem_writes = 0;
uint64_t mem_lat = 0;
#endif
uint64_t num_cores;
RT_CHECK(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), {
return _ret;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), {
return err;
});
#ifdef PERF_ENABLE
uint64_t isa_flags;
RT_CHECK(vx_dev_caps(hdevice, VX_CAPS_ISA_FLAGS, &isa_flags), {
return _ret;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_ISA_FLAGS, &isa_flags), {
return err;
});
bool icache_enable = isa_flags & VX_ISA_EXT_ICACHE;
bool dcache_enable = isa_flags & VX_ISA_EXT_DCACHE;
bool l2cache_enable = isa_flags & VX_ISA_EXT_L2CACHE;
bool l3cache_enable = isa_flags & VX_ISA_EXT_L3CACHE;
bool lmem_enable = isa_flags & VX_ISA_EXT_LMEM;
#endif
auto perf_class = get_profiling_mode();
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
uint64_t cycles_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MCYCLE, core_id, &cycles_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MCYCLE, core_id, &cycles_per_core), {
return err;
});
uint64_t instrs_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MINSTRET, core_id, &instrs_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MINSTRET, core_id, &instrs_per_core), {
return err;
});
#ifdef PERF_ENABLE
switch (perf_class) {
case VX_DCR_MPM_CLASS_CORE: {
// PERF: pipeline
// scheduler idles
{
uint64_t sched_idles_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_SCHED_ID, core_id, &sched_idles_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCHED_ID, core_id, &sched_idles_per_core), {
return err;
});
if (num_cores > 1) {
int idles_percent_per_core = calcAvgPercent(sched_idles_per_core, cycles_per_core);
@ -368,8 +265,8 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
// scheduler stalls
{
uint64_t sched_stalls_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_SCHED_ST, core_id, &sched_stalls_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCHED_ST, core_id, &sched_stalls_per_core), {
return err;
});
if (num_cores > 1) {
int stalls_percent_per_core = calcAvgPercent(sched_stalls_per_core, cycles_per_core);
@ -380,8 +277,8 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
// ibuffer_stalls
{
uint64_t ibuffer_stalls_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_IBUF_ST, core_id, &ibuffer_stalls_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_IBUF_ST, core_id, &ibuffer_stalls_per_core), {
return err;
});
if (num_cores > 1) {
int ibuffer_percent_per_core = calcAvgPercent(ibuffer_stalls_per_core, cycles_per_core);
@ -392,24 +289,24 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
// issue_stalls
{
uint64_t scrb_stalls_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_ST, core_id, &scrb_stalls_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_ST, core_id, &scrb_stalls_per_core), {
return err;
});
uint64_t scrb_alu_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_ALU, core_id, &scrb_alu_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_ALU, core_id, &scrb_alu_per_core), {
return err;
});
uint64_t scrb_fpu_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_FPU, core_id, &scrb_fpu_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_FPU, core_id, &scrb_fpu_per_core), {
return err;
});
uint64_t scrb_lsu_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_LSU, core_id, &scrb_lsu_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_LSU, core_id, &scrb_lsu_per_core), {
return err;
});
uint64_t scrb_sfu_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_SFU, core_id, &scrb_sfu_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_SFU, core_id, &scrb_sfu_per_core), {
return err;
});
scrb_alu += scrb_alu_per_core;
scrb_fpu += scrb_fpu_per_core;
@ -428,16 +325,16 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
// sfu_stalls
{
uint64_t scrb_sfu_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_SFU, core_id, &scrb_sfu_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_SFU, core_id, &scrb_sfu_per_core), {
return err;
});
uint64_t scrb_wctl_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_WCTL, core_id, &scrb_wctl_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_WCTL, core_id, &scrb_wctl_per_core), {
return err;
});
uint64_t scrb_csrs_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_CSRS, core_id, &scrb_csrs_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_CSRS, core_id, &scrb_csrs_per_core), {
return err;
});
if (num_cores > 1) {
uint64_t sfu_total = scrb_wctl_per_core + scrb_csrs_per_core;
@ -455,15 +352,15 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
// ifetches
{
uint64_t ifetches_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_IFETCHES, core_id, &ifetches_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_IFETCHES, core_id, &ifetches_per_core), {
return err;
});
if (num_cores > 1) fprintf(stream, "PERF: core%d: ifetches=%ld\n", core_id, ifetches_per_core);
ifetches += ifetches_per_core;
uint64_t ifetch_lat_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_IFETCH_LT, core_id, &ifetch_lat_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_IFETCH_LT, core_id, &ifetch_lat_per_core), {
return err;
});
if (num_cores > 1) {
int mem_avg_lat = caclAverage(ifetch_lat_per_core, ifetches_per_core);
@ -474,15 +371,15 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
// loads
{
uint64_t loads_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_LOADS, core_id, &loads_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_LOADS, core_id, &loads_per_core), {
return err;
});
if (num_cores > 1) fprintf(stream, "PERF: core%d: loads=%ld\n", core_id, loads_per_core);
loads += loads_per_core;
uint64_t load_lat_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_LOAD_LT, core_id, &load_lat_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_LOAD_LT, core_id, &load_lat_per_core), {
return err;
});
if (num_cores > 1) {
int mem_avg_lat = caclAverage(load_lat_per_core, loads_per_core);
@ -493,8 +390,8 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
// stores
{
uint64_t stores_per_core;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_STORES, core_id, &stores_per_core), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_STORES, core_id, &stores_per_core), {
return err;
});
if (num_cores > 1) fprintf(stream, "PERF: core%d: stores=%ld\n", core_id, stores_per_core);
stores += stores_per_core;
@ -504,16 +401,16 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
if (lmem_enable) {
// PERF: lmem
uint64_t lmem_reads;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_LMEM_READS, core_id, &lmem_reads), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_LMEM_READS, core_id, &lmem_reads), {
return err;
});
uint64_t lmem_writes;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_LMEM_WRITES, core_id, &lmem_writes), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_LMEM_WRITES, core_id, &lmem_writes), {
return err;
});
uint64_t lmem_bank_stalls;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_LMEM_BANK_ST, core_id, &lmem_bank_stalls), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_LMEM_BANK_ST, core_id, &lmem_bank_stalls), {
return err;
});
int lmem_bank_utilization = calcAvgPercent(lmem_reads + lmem_writes, lmem_reads + lmem_writes + lmem_bank_stalls);
fprintf(stream, "PERF: core%d: lmem reads=%ld\n", core_id, lmem_reads);
@ -524,16 +421,16 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
if (icache_enable) {
// PERF: Icache
uint64_t icache_reads;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_ICACHE_READS, core_id, &icache_reads), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_ICACHE_READS, core_id, &icache_reads), {
return err;
});
uint64_t icache_read_misses;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_ICACHE_MISS_R, core_id, &icache_read_misses), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_ICACHE_MISS_R, core_id, &icache_read_misses), {
return err;
});
uint64_t icache_mshr_stalls;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_ICACHE_MSHR_ST, core_id, &icache_mshr_stalls), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_ICACHE_MSHR_ST, core_id, &icache_mshr_stalls), {
return err;
});
int icache_read_hit_ratio = calcRatio(icache_read_misses, icache_reads);
int mshr_utilization = calcAvgPercent(icache_read_misses, icache_read_misses + icache_mshr_stalls);
@ -545,28 +442,28 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
if (dcache_enable) {
// PERF: Dcache
uint64_t dcache_reads;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_DCACHE_READS, core_id, &dcache_reads), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_DCACHE_READS, core_id, &dcache_reads), {
return err;
});
uint64_t dcache_writes;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_DCACHE_WRITES, core_id, &dcache_writes), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_DCACHE_WRITES, core_id, &dcache_writes), {
return err;
});
uint64_t dcache_read_misses;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_DCACHE_MISS_R, core_id, &dcache_read_misses), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_DCACHE_MISS_R, core_id, &dcache_read_misses), {
return err;
});
uint64_t dcache_write_misses;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_DCACHE_MISS_W, core_id, &dcache_write_misses), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_DCACHE_MISS_W, core_id, &dcache_write_misses), {
return err;
});
uint64_t dcache_bank_stalls;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_DCACHE_BANK_ST, core_id, &dcache_bank_stalls), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_DCACHE_BANK_ST, core_id, &dcache_bank_stalls), {
return err;
});
uint64_t dcache_mshr_stalls;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_DCACHE_MSHR_ST, core_id, &dcache_mshr_stalls), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_DCACHE_MSHR_ST, core_id, &dcache_mshr_stalls), {
return err;
});
int dcache_read_hit_ratio = calcRatio(dcache_read_misses, dcache_reads);
int dcache_write_hit_ratio = calcRatio(dcache_write_misses, dcache_writes);
@ -583,74 +480,73 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
if (l2cache_enable) {
// PERF: L2cache
uint64_t tmp;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_L2CACHE_READS, core_id, &tmp), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L2CACHE_READS, core_id, &tmp), {
return err;
});
l2cache_reads += tmp;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_L2CACHE_WRITES, core_id, &tmp), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L2CACHE_WRITES, core_id, &tmp), {
return err;
});
l2cache_writes += tmp;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_L2CACHE_MISS_R, core_id, &tmp), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L2CACHE_MISS_R, core_id, &tmp), {
return err;
});
l2cache_read_misses += tmp;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_L2CACHE_MISS_W, core_id, &tmp), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L2CACHE_MISS_W, core_id, &tmp), {
return err;
});
l2cache_write_misses += tmp;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_L2CACHE_BANK_ST, core_id, &tmp), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L2CACHE_BANK_ST, core_id, &tmp), {
return err;
});
l2cache_bank_stalls += tmp;
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_L2CACHE_MSHR_ST, core_id, &tmp), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L2CACHE_MSHR_ST, core_id, &tmp), {
return err;
});
l2cache_mshr_stalls += tmp;
}
if (0 == core_id) {
if (l3cache_enable) {
// PERF: L3cache
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_READS, core_id, &l3cache_reads), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_READS, core_id, &l3cache_reads), {
return err;
});
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_WRITES, core_id, &l3cache_writes), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_WRITES, core_id, &l3cache_writes), {
return err;
});
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_MISS_R, core_id, &l3cache_read_misses), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_MISS_R, core_id, &l3cache_read_misses), {
return err;
});
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_MISS_W, core_id, &l3cache_write_misses), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_MISS_W, core_id, &l3cache_write_misses), {
return err;
});
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_BANK_ST, core_id, &l3cache_bank_stalls), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_BANK_ST, core_id, &l3cache_bank_stalls), {
return err;
});
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_MSHR_ST, core_id, &l3cache_mshr_stalls), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_MSHR_ST, core_id, &l3cache_mshr_stalls), {
return err;
});
}
// PERF: memory
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_READS, core_id, &mem_reads), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_READS, core_id, &mem_reads), {
return err;
});
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_WRITES, core_id, &mem_writes), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_WRITES, core_id, &mem_writes), {
return err;
});
RT_CHECK(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), {
return _ret;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), {
return err;
});
}
} break;
default:
break;
}
#endif
float IPC = (float)(double(instrs_per_core) / double(cycles_per_core));
if (num_cores > 1) fprintf(stream, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs_per_core, cycles_per_core, IPC);
@ -659,7 +555,6 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
max_cycles = std::max<uint64_t>(cycles_per_core, max_cycles);
}
#ifdef PERF_ENABLE
switch (perf_class) {
case VX_DCR_MPM_CLASS_CORE: {
int sched_idles_percent = calcAvgPercent(sched_idles, total_cycles);
@ -728,7 +623,6 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
default:
break;
}
#endif
float IPC = (float)(double(total_instrs) / double(max_cycles));
fprintf(stream, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, max_cycles, IPC);
@ -741,11 +635,11 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
int vx_check_occupancy(vx_device_h hdevice, uint32_t group_size, uint32_t* max_barriers, uint32_t* max_localmem) {
// check group size
uint64_t warps_per_core, threads_per_warp;
RT_CHECK(vx_dev_caps(hdevice, VX_CAPS_NUM_WARPS, &warps_per_core), {
return _ret;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_WARPS, &warps_per_core), {
return err;
});
RT_CHECK(vx_dev_caps(hdevice, VX_CAPS_NUM_THREADS, &threads_per_warp), {
return _ret;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_THREADS, &threads_per_warp), {
return err;
});
uint32_t threads_per_core = warps_per_core * threads_per_warp;
if (group_size > threads_per_core) {
@ -760,8 +654,8 @@ int vx_check_occupancy(vx_device_h hdevice, uint32_t group_size, uint32_t* max_b
// check barriers capacity
if (max_barriers) {
uint64_t num_barriers;
RT_CHECK(vx_dev_caps(hdevice, VX_CAPS_NUM_BARRIERS, &num_barriers), {
return _ret;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_BARRIERS, &num_barriers), {
return err;
});
if (warps_per_group < 2) {
*max_barriers = -1;
@ -773,8 +667,8 @@ int vx_check_occupancy(vx_device_h hdevice, uint32_t group_size, uint32_t* max_b
// check local memory capacity
if (max_localmem) {
uint64_t local_mem_size;
RT_CHECK(vx_dev_caps(hdevice, VX_CAPS_LOCAL_MEM_SIZE, &local_mem_size), {
return _ret;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_LOCAL_MEM_SIZE, &local_mem_size), {
return err;
});
*max_localmem = local_mem_size / groups_per_core;
}

View file

@ -11,7 +11,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <callbacks.h>
#include <common.h>
#include <unistd.h>
#include <string.h>
#include <string>
@ -19,12 +20,42 @@
#include <dlfcn.h>
#include <iostream>
int get_profiling_mode();
static int dcr_initialize(vx_device_h hdevice) {
const uint64_t startup_addr(STARTUP_ADDR);
CHECK_ERR(vx_dcr_write(hdevice, VX_DCR_BASE_STARTUP_ADDR0, startup_addr & 0xffffffff), {
return err;
});
CHECK_ERR(vx_dcr_write(hdevice, VX_DCR_BASE_STARTUP_ADDR1, startup_addr >> 32), {
return err;
});
CHECK_ERR(vx_dcr_write(hdevice, VX_DCR_BASE_STARTUP_ARG0, 0), {
return err;
});
CHECK_ERR(vx_dcr_write(hdevice, VX_DCR_BASE_STARTUP_ARG1, 0), {
return err;
});
CHECK_ERR(vx_dcr_write(hdevice, VX_DCR_BASE_MPM_CLASS, 0), {
return err;
});
return 0;
}
///////////////////////////////////////////////////////////////////////////////
static callbacks_t g_callbacks;
static void* g_drv_handle = nullptr;
typedef int (*vx_dev_init_t)(callbacks_t*);
int vx_dev_open(vx_device_h* hdevice) {
extern int vx_dev_open(vx_device_h* hdevice) {
{
const char* driverName = getenv("VORTEX_DRIVER");
if (driverName == nullptr) {
@ -50,67 +81,86 @@ int vx_dev_open(vx_device_h* hdevice) {
g_drv_handle = handle;
}
return (g_callbacks.dev_open)(hdevice);
vx_device_h _hdevice;
CHECK_ERR((g_callbacks.dev_open)(&_hdevice), {
return err;
});
CHECK_ERR(dcr_initialize(_hdevice), {
return err;
});
*hdevice = _hdevice;
return 0;
}
int vx_dev_close(vx_device_h hdevice) {
extern int vx_dev_close(vx_device_h hdevice) {
vx_dump_perf(hdevice, stdout);
int ret = (g_callbacks.dev_close)(hdevice);
dlclose(g_drv_handle);
return ret;
}
int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t* value) {
extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t* value) {
return (g_callbacks.dev_caps)(hdevice, caps_id, value);
}
int vx_mem_alloc(vx_device_h hdevice, uint64_t size, int flags, vx_buffer_h* hbuffer) {
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, int flags, vx_buffer_h* hbuffer) {
return (g_callbacks.mem_alloc)(hdevice, size, flags, hbuffer);
}
int vx_mem_reserve(vx_device_h hdevice, uint64_t address, uint64_t size, int flags, vx_buffer_h* hbuffer) {
extern int vx_mem_reserve(vx_device_h hdevice, uint64_t address, uint64_t size, int flags, vx_buffer_h* hbuffer) {
return (g_callbacks.mem_reserve)(hdevice, address, size, flags, hbuffer);
}
int vx_mem_free(vx_buffer_h hbuffer) {
extern int vx_mem_free(vx_buffer_h hbuffer) {
return (g_callbacks.mem_free)(hbuffer);
}
int vx_mem_access(vx_buffer_h hbuffer, uint64_t offset, uint64_t size, int flags) {
extern int vx_mem_access(vx_buffer_h hbuffer, uint64_t offset, uint64_t size, int flags) {
return (g_callbacks.mem_access)(hbuffer, offset, size, flags);
}
int vx_mem_address(vx_buffer_h hbuffer, uint64_t* address) {
extern int vx_mem_address(vx_buffer_h hbuffer, uint64_t* address) {
return (g_callbacks.mem_address)(hbuffer, address);
}
int vx_mem_info(vx_device_h hdevice, uint64_t* mem_free, uint64_t* mem_used) {
extern int vx_mem_info(vx_device_h hdevice, uint64_t* mem_free, uint64_t* mem_used) {
return (g_callbacks.mem_info)(hdevice, mem_free, mem_used);
}
int vx_copy_to_dev(vx_buffer_h hbuffer, const void* host_ptr, uint64_t dst_offset, uint64_t size) {
extern int vx_copy_to_dev(vx_buffer_h hbuffer, const void* host_ptr, uint64_t dst_offset, uint64_t size) {
return (g_callbacks.copy_to_dev)(hbuffer, host_ptr, dst_offset, size);
}
int vx_copy_from_dev(void* host_ptr, vx_buffer_h hbuffer, uint64_t src_offset, uint64_t size) {
extern int vx_copy_from_dev(void* host_ptr, vx_buffer_h hbuffer, uint64_t src_offset, uint64_t size) {
return (g_callbacks.copy_from_dev)(host_ptr, hbuffer, src_offset, size);
}
int vx_start(vx_device_h hdevice, vx_buffer_h hkernel, vx_buffer_h harguments) {
extern int vx_start(vx_device_h hdevice, vx_buffer_h hkernel, vx_buffer_h harguments) {
int profiling_mode = get_profiling_mode();
if (profiling_mode != 0) {
CHECK_ERR(vx_dcr_write(hdevice, VX_DCR_BASE_MPM_CLASS, profiling_mode), {
return err;
});
}
return (g_callbacks.start)(hdevice, hkernel, harguments);
}
int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
return (g_callbacks.ready_wait)(hdevice, timeout);
}
int vx_dcr_read(vx_device_h hdevice, uint32_t addr, uint32_t* value) {
extern int vx_dcr_read(vx_device_h hdevice, uint32_t addr, uint32_t* value) {
return (g_callbacks.dcr_read)(hdevice, addr, value);
}
int vx_dcr_write(vx_device_h hdevice, uint32_t addr, uint32_t value) {
extern int vx_dcr_write(vx_device_h hdevice, uint32_t addr, uint32_t value) {
return (g_callbacks.dcr_write)(hdevice, addr, value);
}
int vx_mpm_query(vx_device_h hdevice, uint32_t addr, uint32_t core_id, uint64_t* value) {
extern int vx_mpm_query(vx_device_h hdevice, uint32_t addr, uint32_t core_id, uint64_t* value) {
return (g_callbacks.mpm_query)(hdevice, addr, core_id, value);
}

View file

@ -13,7 +13,7 @@ CXXFLAGS += -fPIC
LDFLAGS += -shared -pthread
LDFLAGS += -L$(XILINX_XRT)/lib
SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp $(SIM_DIR)/common/util.cpp
SRCS := $(SRC_DIR)/vortex.cpp $(SIM_DIR)/common/util.cpp
# set up target types
ifeq ($(TARGET), xrtsim)

View file

@ -11,16 +11,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <malloc.h>
#include <utils.h>
#include <VX_config.h>
#include <VX_types.h>
#include <stdarg.h>
#include <util.h>
#include <limits>
#include <vector>
#include <string>
#include <unordered_map>
#include <common.h>
#ifdef SCOPE
#include "scope.h"
@ -38,7 +29,12 @@
#include <fpga.h>
#endif
#include <callbacks.h>
#include <stdarg.h>
#include <util.h>
#include <limits>
#include <vector>
#include <string>
#include <unordered_map>
using namespace vortex;
@ -91,20 +87,12 @@ static const platform_info_t g_platforms [] = {
#endif
#define RAM_PAGE_SIZE 4096
#define DEFAULT_DEVICE_INDEX 0
#define DEFAULT_XCLBIN_PATH "vortex_afu.xclbin"
#define KERNEL_NAME "vortex_afu"
#ifndef NDEBUG
#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0)
#else
#define DBGPRINT(format, ...) ((void)0)
#endif
#define CHECK_HANDLE(handle, _expr, _cleanup) \
auto handle = _expr; \
if (handle == nullptr) { \
@ -112,15 +100,6 @@ static const platform_info_t g_platforms [] = {
_cleanup \
}
#define CHECK_ERR(_expr, _cleanup) \
do { \
auto err = _expr; \
if (err == 0) \
break; \
printf("[VXDRV] Error: '%s' returned %d!\n", #_expr, (int)err); \
_cleanup \
} while (false)
#ifndef CPP_API
static void dump_xrt_error(xrtDeviceHandle xrtDevice, xrtErrorCode err) {
@ -164,7 +143,6 @@ public:
#ifndef CPP_API
~vx_device() {
profiling_remove(profiling_id_);
for (auto& entry : xrtBuffers_) {
#ifdef BANK_INTERLEAVE
xrtBOFree(entry);
@ -238,12 +216,6 @@ public:
}
#endif
CHECK_ERR(dcr_initialize(this), {
return err;
});
profiling_id_ = profiling_add(this);
return 0;
}
@ -522,8 +494,6 @@ public:
return err;
});
profiling_begin(profiling_id_);
// start execution
CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_START), {
return err;
@ -563,8 +533,6 @@ public:
timeout -= sleep_time_ms;
};
profiling_end(profiling_id_);
return 0;
}
@ -608,7 +576,6 @@ private:
uint64_t global_mem_size_;
DeviceConfig dcrs_;
std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_;
int profiling_id_;
#ifdef BANK_INTERLEAVE