mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
perf counters profiling refactoring
This commit is contained in:
parent
dc27d3c014
commit
98f080340a
10 changed files with 161 additions and 137 deletions
|
@ -1,12 +1,12 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -91,12 +91,12 @@ case $i in
|
|||
;;
|
||||
--scope)
|
||||
SCOPE=1
|
||||
CORES=1
|
||||
CORES=1
|
||||
shift
|
||||
;;
|
||||
--perf=*)
|
||||
PERF_FLAG=-DPERF_ENABLE
|
||||
PERF_CLASS=${i#*=}
|
||||
PERF_CLASS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--args=*)
|
||||
|
@ -117,8 +117,8 @@ case $i in
|
|||
exit 0
|
||||
;;
|
||||
*)
|
||||
show_usage
|
||||
exit -1
|
||||
show_usage
|
||||
exit -1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
@ -162,7 +162,7 @@ else
|
|||
exit -1
|
||||
fi
|
||||
|
||||
if [ "$DRIVER" = "gpu" ];
|
||||
if [ "$DRIVER" = "gpu" ];
|
||||
then
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
|
@ -183,11 +183,11 @@ CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_TH
|
|||
|
||||
echo "CONFIGS=$CONFIGS"
|
||||
|
||||
if [ $REBUILD -ne 0 ]
|
||||
if [ $REBUILD -ne 0 ]
|
||||
then
|
||||
BLACKBOX_CACHE=blackbox.$DRIVER.cache
|
||||
BLACKBOX_CACHE=blackbox.$DRIVER.cache
|
||||
if [ -f "$BLACKBOX_CACHE" ]
|
||||
then
|
||||
then
|
||||
LAST_CONFIGS=`cat $BLACKBOX_CACHE`
|
||||
fi
|
||||
|
||||
|
@ -199,7 +199,7 @@ then
|
|||
fi
|
||||
|
||||
# export performance monitor class identifier
|
||||
export PERF_CLASS=$PERF_CLASS
|
||||
export VORTEX_PROFILING=$PERF_CLASS
|
||||
|
||||
status=0
|
||||
|
||||
|
@ -210,7 +210,7 @@ make -C $ROOT_DIR/hw config > /dev/null
|
|||
make -C $ROOT_DIR/runtime/stub > /dev/null
|
||||
|
||||
if [ $DEBUG -ne 0 ]
|
||||
then
|
||||
then
|
||||
# running application
|
||||
if [ $TEMPBUILD -eq 1 ]
|
||||
then
|
||||
|
@ -265,18 +265,18 @@ then
|
|||
status=$?
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
if [ -f "$APP_PATH/trace.vcd" ]
|
||||
then
|
||||
then
|
||||
mv -f $APP_PATH/trace.vcd .
|
||||
fi
|
||||
else
|
||||
else
|
||||
if [ $TEMPBUILD -eq 1 ]
|
||||
then
|
||||
# setup temp directory
|
||||
TEMPDIR=$(mktemp -d)
|
||||
mkdir -p "$TEMPDIR/$DRIVER"
|
||||
|
||||
|
||||
# driver initialization
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
|
@ -286,7 +286,7 @@ else
|
|||
echo "running: DESTDIR=$TEMPDIR/$DRIVER CONFIGS=$CONFIGS make -C $DRIVER_PATH"
|
||||
DESTDIR="$TEMPDIR/$DRIVER" CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null
|
||||
fi
|
||||
|
||||
|
||||
# running application
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
|
@ -302,7 +302,7 @@ else
|
|||
# cleanup temp directory
|
||||
trap "rm -rf $TEMPDIR" EXIT
|
||||
else
|
||||
|
||||
|
||||
# driver initialization
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
|
|
|
@ -17,81 +17,87 @@
|
|||
#include <list>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <vortex.h>
|
||||
#include <assert.h>
|
||||
|
||||
#define RT_CHECK(_expr, _cleanup) \
|
||||
do { \
|
||||
int _ret = _expr; \
|
||||
if (0 == _ret) \
|
||||
break; \
|
||||
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
||||
_cleanup \
|
||||
} while (false)
|
||||
do { \
|
||||
int _ret = _expr; \
|
||||
if (0 == _ret) \
|
||||
break; \
|
||||
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
||||
_cleanup \
|
||||
} while (false)
|
||||
|
||||
uint64_t aligned_size(uint64_t size, uint64_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
bool is_aligned(uint64_t addr, uint64_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return 0 == (addr & (alignment - 1));
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return 0 == (addr & (alignment - 1));
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class AutoPerfDump {
|
||||
public:
|
||||
AutoPerfDump() : perf_class_(0) {}
|
||||
|
||||
~AutoPerfDump() {
|
||||
for (auto hdevice : hdevices_) {
|
||||
vx_dump_perf(hdevice, stdout);
|
||||
}
|
||||
AutoPerfDump() : perf_class_(0) {
|
||||
auto profiling_s = getenv("VORTEX_PROFILING");
|
||||
if (profiling_s) {
|
||||
perf_class_ = std::atoi(profiling_s);
|
||||
}
|
||||
}
|
||||
|
||||
void add_device(vx_device_h hdevice) {
|
||||
auto perf_class_s = getenv("PERF_CLASS");
|
||||
if (perf_class_s) {
|
||||
perf_class_ = std::atoi(perf_class_s);
|
||||
vx_dcr_write(hdevice, VX_DCR_BASE_MPM_CLASS, perf_class_);
|
||||
}
|
||||
hdevices_.push_back(hdevice);
|
||||
}
|
||||
~AutoPerfDump() {}
|
||||
|
||||
void remove_device(vx_device_h hdevice) {
|
||||
hdevices_.remove(hdevice);
|
||||
vx_dump_perf(hdevice, stdout);
|
||||
}
|
||||
int add(vx_device_h hdevice) {
|
||||
int ret = devices_.size();
|
||||
devices_[ret] = hdevice;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int get_perf_class() const {
|
||||
return perf_class_;
|
||||
}
|
||||
void remove(int id) {
|
||||
devices_.erase(id);
|
||||
}
|
||||
|
||||
void begin(int id) {
|
||||
auto device = devices_.at(id);
|
||||
vx_dcr_write(device, VX_DCR_BASE_MPM_CLASS, perf_class_);
|
||||
}
|
||||
|
||||
void end(int id) {
|
||||
auto device = devices_.at(id);
|
||||
vx_dump_perf(device, stdout);
|
||||
}
|
||||
|
||||
int get_perf_class() const {
|
||||
return perf_class_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::list<vx_device_h> hdevices_;
|
||||
int perf_class_;
|
||||
std::unordered_map<int, vx_device_h> devices_;
|
||||
int perf_class_;
|
||||
};
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
AutoPerfDump gAutoPerfDump;
|
||||
#endif
|
||||
|
||||
void perf_add_device(vx_device_h hdevice) {
|
||||
#ifdef DUMP_PERF_STATS
|
||||
gAutoPerfDump.add_device(hdevice);
|
||||
#else
|
||||
(void)hdevice;
|
||||
#endif
|
||||
int profiling_add(vx_device_h hdevice) {
|
||||
return gAutoPerfDump.add(hdevice);
|
||||
}
|
||||
|
||||
void perf_remove_device(vx_device_h hdevice) {
|
||||
#ifdef DUMP_PERF_STATS
|
||||
gAutoPerfDump.remove_device(hdevice);
|
||||
#else
|
||||
(void)hdevice;
|
||||
#endif
|
||||
void profiling_remove(int id) {
|
||||
gAutoPerfDump.remove(id);
|
||||
}
|
||||
|
||||
void profiling_begin(int id) {
|
||||
gAutoPerfDump.begin(id);
|
||||
}
|
||||
|
||||
void profiling_end(int id) {
|
||||
gAutoPerfDump.end(id);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -33,9 +33,13 @@ uint64_t aligned_size(uint64_t size, uint64_t alignment);
|
|||
|
||||
bool is_aligned(uint64_t addr, uint64_t alignment);
|
||||
|
||||
void perf_add_device(vx_device_h device);
|
||||
int profiling_add(vx_device_h device);
|
||||
|
||||
void perf_remove_device(vx_device_h device);
|
||||
void profiling_remove(int id);
|
||||
|
||||
void profiling_begin(int id);
|
||||
|
||||
void profiling_end(int id);
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
#define ALLOC_BASE_ADDR CACHE_BLOCK_SIZE
|
||||
|
|
|
@ -18,9 +18,6 @@ CXXFLAGS += -fPIC
|
|||
# Add external configuration
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Dump perf stats
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -luuid -ldl -pthread
|
||||
|
||||
SRCS = $(SRC_DIR)/vortex.cpp $(SRC_DIR)/driver.cpp $(COMMON_DIR)/utils.cpp
|
||||
|
@ -42,13 +39,13 @@ endif
|
|||
# Debugigng
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
else
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable scope logic analyzer
|
||||
ifdef SCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SRCS += $(COMMON_DIR)/scope.cpp
|
||||
endif
|
||||
|
||||
|
|
|
@ -112,6 +112,8 @@ public:
|
|||
}
|
||||
api_.fpgaClose(fpga_);
|
||||
}
|
||||
|
||||
profiling_remove(profiling_id_);
|
||||
}
|
||||
|
||||
int init() {
|
||||
|
@ -208,7 +210,13 @@ public:
|
|||
}
|
||||
#endif
|
||||
|
||||
return dcr_initialize(this);
|
||||
CHECK_ERR(dcr_initialize(this), {
|
||||
return err;
|
||||
});
|
||||
|
||||
profiling_id_ = profiling_add(this);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_caps(uint32_t caps_id, uint64_t *value) {
|
||||
|
@ -397,6 +405,8 @@ public:
|
|||
return err;
|
||||
});
|
||||
|
||||
profiling_begin(profiling_id_);
|
||||
|
||||
// start execution
|
||||
CHECK_FPGA_ERR(api_.fpgaWriteMMIO64(fpga_, 0, MMIO_CMD_TYPE, CMD_RUN), {
|
||||
return -1;
|
||||
|
@ -455,6 +465,7 @@ public:
|
|||
}
|
||||
if (state != 0) {
|
||||
fprintf(stdout, "[VXDRV] ready-wait timed out: state=%d\n", state);
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -462,6 +473,9 @@ public:
|
|||
nanosleep(&sleep_time, nullptr);
|
||||
timeout -= sleep_time_ms;
|
||||
};
|
||||
|
||||
profiling_end(profiling_id_);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -538,6 +552,7 @@ private:
|
|||
uint8_t* staging_ptr_;
|
||||
uint64_t staging_size_;
|
||||
std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_;
|
||||
int profiling_id_;
|
||||
};
|
||||
|
||||
struct vx_buffer {
|
||||
|
@ -569,10 +584,6 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||
return err;
|
||||
});
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
perf_add_device(device);
|
||||
#endif
|
||||
|
||||
DBGPRINT("DEV_OPEN: hdevice=%p\n", (void*)device);
|
||||
|
||||
*hdevice = device;
|
||||
|
@ -592,10 +603,6 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
vx_scope_stop(hdevice);
|
||||
#endif
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
perf_remove_device(hdevice);
|
||||
#endif
|
||||
|
||||
delete device;
|
||||
|
||||
drv_close();
|
||||
|
@ -758,11 +765,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, const void* host_ptr, uint64_t ds
|
|||
|
||||
DBGPRINT("COPY_TO_DEV: hbuffer=%p, host_addr=%p, dst_offset=%ld, size=%ld\n", hbuffer, host_ptr, dst_offset, size);
|
||||
|
||||
CHECK_ERR(device->upload(buffer->addr + dst_offset, host_ptr, size), {
|
||||
return err;
|
||||
});
|
||||
|
||||
return 0;
|
||||
return device->upload(buffer->addr + dst_offset, host_ptr, size);
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(void* host_ptr, vx_buffer_h hbuffer, uint64_t src_offset, uint64_t size) {
|
||||
|
@ -777,11 +780,7 @@ extern int vx_copy_from_dev(void* host_ptr, vx_buffer_h hbuffer, uint64_t src_of
|
|||
|
||||
DBGPRINT("COPY_FROM_DEV: hbuffer=%p, host_addr=%p, src_offset=%ld, size=%ld\n", hbuffer, host_ptr, src_offset, size);
|
||||
|
||||
CHECK_ERR(device->download(host_ptr, buffer->addr + src_offset, size), {
|
||||
return err;
|
||||
});
|
||||
|
||||
return 0;
|
||||
return device->download(host_ptr, buffer->addr + src_offset, size);
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h hdevice, vx_buffer_h hkernel, vx_buffer_h harguments) {
|
||||
|
|
|
@ -14,9 +14,6 @@ CXXFLAGS += -fPIC
|
|||
# Add external configuration
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Dump perf stats
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
LDFLAGS += -L$(DESTDIR) -lrtlsim
|
||||
|
||||
|
@ -25,7 +22,7 @@ SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp
|
|||
# Debugigng
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
else
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
|
@ -37,7 +34,7 @@ endif
|
|||
PROJECT := libvortex.so
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS)
|
||||
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/rtlsim $(DESTDIR)/librtlsim.so
|
||||
$(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $@
|
||||
|
|
|
@ -64,6 +64,15 @@ public:
|
|||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
profiling_remove(profiling_id_);
|
||||
}
|
||||
|
||||
int init() {
|
||||
CHECK_ERR(dcr_initialize(this), {
|
||||
return err;
|
||||
});
|
||||
profiling_id_ = profiling_add(this);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_caps(uint32_t caps_id, uint64_t *value) {
|
||||
|
@ -208,6 +217,8 @@ public:
|
|||
this->dcr_write(VX_DCR_BASE_STARTUP_ARG0, args_addr & 0xffffffff);
|
||||
this->dcr_write(VX_DCR_BASE_STARTUP_ARG1, args_addr >> 32);
|
||||
|
||||
profiling_begin(profiling_id_);
|
||||
|
||||
// start new run
|
||||
future_ = std::async(std::launch::async, [&]{
|
||||
processor_.run();
|
||||
|
@ -227,10 +238,12 @@ public:
|
|||
for (;;) {
|
||||
// wait for 1 sec and check status
|
||||
auto status = future_.wait_for(wait_time);
|
||||
if (status == std::future_status::ready
|
||||
|| 0 == timeout_sec--)
|
||||
if (status == std::future_status::ready)
|
||||
break;
|
||||
if (0 == timeout_sec--)
|
||||
return -1;
|
||||
}
|
||||
profiling_end(profiling_id_);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -269,6 +282,7 @@ private:
|
|||
DeviceConfig dcrs_;
|
||||
std::future<void> future_;
|
||||
std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_;
|
||||
int profiling_id_;
|
||||
};
|
||||
|
||||
struct vx_buffer {
|
||||
|
@ -287,15 +301,10 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||
if (device == nullptr)
|
||||
return -1;
|
||||
|
||||
int err = dcr_initialize(device);
|
||||
if (err != 0) {
|
||||
CHECK_ERR(device->init(), {
|
||||
delete device;
|
||||
return err;
|
||||
}
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
perf_add_device(device);
|
||||
#endif
|
||||
});
|
||||
|
||||
DBGPRINT("DEV_OPEN: hdevice=%p\n", (void*)device);
|
||||
|
||||
|
@ -310,11 +319,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
|
||||
DBGPRINT("DEV_CLOSE: hdevice=%p\n", hdevice);
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
perf_remove_device(hdevice);
|
||||
#endif
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
auto device = ((vx_device*)hdevice);
|
||||
|
||||
delete device;
|
||||
|
||||
|
@ -512,6 +517,7 @@ extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
|
|||
DBGPRINT("READY_WAIT: hdevice=%p, timeout=%ld\n", hdevice, timeout);
|
||||
|
||||
auto device = ((vx_device*)hdevice);
|
||||
|
||||
return device->ready_wait(timeout);
|
||||
}
|
||||
|
||||
|
|
|
@ -8,7 +8,6 @@ CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
|||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I$(INC_DIR) -I../common -I$(ROOT_DIR)/hw -I$(SIM_DIR)/simx -I$(COMMON_DIR) -I$(SIM_DIR)/common
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
|
@ -19,7 +18,7 @@ SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp
|
|||
# Debugigng
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
else
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
|
|
|
@ -68,6 +68,15 @@ public:
|
|||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
profiling_remove(profiling_id_);
|
||||
}
|
||||
|
||||
int init() {
|
||||
CHECK_ERR(dcr_initialize(this), {
|
||||
return err;
|
||||
});
|
||||
profiling_id_ = profiling_add(this);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_caps(uint32_t caps_id, uint64_t *value) {
|
||||
|
@ -203,6 +212,8 @@ public:
|
|||
this->dcr_write(VX_DCR_BASE_STARTUP_ARG0, args_addr & 0xffffffff);
|
||||
this->dcr_write(VX_DCR_BASE_STARTUP_ARG1, args_addr >> 32);
|
||||
|
||||
profiling_begin(profiling_id_);
|
||||
|
||||
// start new run
|
||||
future_ = std::async(std::launch::async, [&]{
|
||||
processor_.run();
|
||||
|
@ -222,10 +233,12 @@ public:
|
|||
for (;;) {
|
||||
// wait for 1 sec and check status
|
||||
auto status = future_.wait_for(wait_time);
|
||||
if (status == std::future_status::ready
|
||||
|| 0 == timeout_sec--)
|
||||
if (status == std::future_status::ready)
|
||||
break;
|
||||
if (0 == timeout_sec--)
|
||||
return -1;
|
||||
}
|
||||
profiling_end(profiling_id_);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -264,6 +277,7 @@ private:
|
|||
DeviceConfig dcrs_;
|
||||
std::future<void> future_;
|
||||
std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_;
|
||||
int profiling_id_;
|
||||
};
|
||||
|
||||
struct vx_buffer {
|
||||
|
@ -282,15 +296,10 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||
if (device == nullptr)
|
||||
return -1;
|
||||
|
||||
int err = dcr_initialize(device);
|
||||
if (err != 0) {
|
||||
CHECK_ERR(device->init(), {
|
||||
delete device;
|
||||
return err;
|
||||
}
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
perf_add_device(device);
|
||||
#endif
|
||||
});
|
||||
|
||||
DBGPRINT("DEV_OPEN: hdevice=%p\n", (void*)device);
|
||||
|
||||
|
@ -307,10 +316,6 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
|
||||
auto device = ((vx_device*)hdevice);
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
perf_remove_device(hdevice);
|
||||
#endif
|
||||
|
||||
delete device;
|
||||
|
||||
return 0;
|
||||
|
@ -507,6 +512,7 @@ extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
|
|||
DBGPRINT("READY_WAIT: hdevice=%p, timeout=%ld\n", hdevice, timeout);
|
||||
|
||||
auto device = ((vx_device*)hdevice);
|
||||
|
||||
return device->ready_wait(timeout);
|
||||
}
|
||||
|
||||
|
|
|
@ -154,6 +154,7 @@ public:
|
|||
#ifndef CPP_API
|
||||
|
||||
~vx_device() {
|
||||
profiling_remove(profiling_id_);
|
||||
for (auto& entry : xrtBuffers_) {
|
||||
#ifdef BANK_INTERLEAVE
|
||||
xrtBOFree(entry);
|
||||
|
@ -227,6 +228,12 @@ public:
|
|||
}
|
||||
#endif
|
||||
|
||||
CHECK_ERR(dcr_initialize(this), {
|
||||
return err;
|
||||
});
|
||||
|
||||
profiling_id_ = profiling_add(this);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -505,6 +512,8 @@ public:
|
|||
return err;
|
||||
});
|
||||
|
||||
profiling_begin(profiling_id_);
|
||||
|
||||
// start execution
|
||||
CHECK_ERR(device->write_register(MMIO_CTL_ADDR, CTL_AP_START), {
|
||||
return err;
|
||||
|
@ -535,12 +544,17 @@ public:
|
|||
return err;
|
||||
});
|
||||
bool is_done = (status & CTL_AP_DONE) == CTL_AP_DONE;
|
||||
if (is_done || 0 == timeout) {
|
||||
if (is_done)
|
||||
break;
|
||||
if (0 == timeout) {
|
||||
return -1;
|
||||
}
|
||||
nanosleep(&sleep_time, nullptr);
|
||||
timeout -= sleep_time_ms;
|
||||
};
|
||||
|
||||
profiling_end(profiling_id_);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -584,6 +598,7 @@ private:
|
|||
uint64_t global_mem_size_;
|
||||
DeviceConfig dcrs_;
|
||||
std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_;
|
||||
int profiling_id_;
|
||||
|
||||
#ifdef BANK_INTERLEAVE
|
||||
|
||||
|
@ -841,15 +856,6 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||
}
|
||||
#endif
|
||||
|
||||
CHECK_ERR(dcr_initialize(device), {
|
||||
delete device;
|
||||
return err;
|
||||
});
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
perf_add_device(device);
|
||||
#endif
|
||||
|
||||
DBGPRINT("DEV_OPEN: hdevice=%p\n", (void*)device);
|
||||
|
||||
*hdevice = device;
|
||||
|
@ -1078,7 +1084,11 @@ extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
|
|||
|
||||
auto device = ((vx_device*)hdevice);
|
||||
|
||||
return device->ready_wait(timeout);
|
||||
CHECK_ERR(device->ready_wait(timeout), {
|
||||
return err;
|
||||
});
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dcr_read(vx_device_h hdevice, uint32_t addr, uint32_t* value) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue