mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
local memory runtime refactoring
This commit is contained in:
parent
2776f2cdf0
commit
35a782a7ba
18 changed files with 139 additions and 232 deletions
|
@ -32,8 +32,9 @@ typedef void* vx_device_h;
|
|||
#define VX_CAPS_CACHE_LINE_SIZE 0x4
|
||||
#define VX_CAPS_GLOBAL_MEM_SIZE 0x5
|
||||
#define VX_CAPS_LOCAL_MEM_SIZE 0x6
|
||||
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
|
||||
#define VX_CAPS_ISA_FLAGS 0x8
|
||||
#define VX_CAPS_LOCAL_MEM_ADDR 0x7
|
||||
#define VX_CAPS_KERNEL_BASE_ADDR 0x8
|
||||
#define VX_CAPS_ISA_FLAGS 0x9
|
||||
|
||||
// device isa flags
|
||||
#define VX_ISA_STD_A (1ull << 0)
|
||||
|
@ -54,10 +55,6 @@ typedef void* vx_device_h;
|
|||
#define VX_ISA_EXT_L3CACHE (1ull << 35)
|
||||
#define VX_ISA_EXT_LMEM (1ull << 36)
|
||||
|
||||
// device memory types
|
||||
#define VX_MEM_TYPE_GLOBAL 0
|
||||
#define VX_MEM_TYPE_LOCAL 1
|
||||
|
||||
// ready wait timeout
|
||||
#define VX_MAX_TIMEOUT (24*60*60*1000) // 24 Hr
|
||||
|
||||
|
@ -71,13 +68,13 @@ int vx_dev_close(vx_device_h hdevice);
|
|||
int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value);
|
||||
|
||||
// allocate device memory and return address
|
||||
int vx_mem_alloc(vx_device_h hdevice, uint64_t size, int type, uint64_t* dev_addr);
|
||||
int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_addr);
|
||||
|
||||
// release device memory
|
||||
int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr);
|
||||
|
||||
// get device memory info
|
||||
int vx_mem_info(vx_device_h hdevice, int type, uint64_t* mem_free, uint64_t* mem_used);
|
||||
int vx_mem_info(vx_device_h hdevice, uint64_t* mem_free, uint64_t* mem_used);
|
||||
|
||||
// Copy bytes from host to device memory
|
||||
int vx_copy_to_dev(vx_device_h hdevice, uint64_t dev_addr, const void* host_ptr, uint64_t size);
|
||||
|
|
|
@ -118,7 +118,6 @@ public:
|
|||
opae_drv_api_t api;
|
||||
fpga_handle fpga;
|
||||
std::shared_ptr<vortex::MemoryAllocator> global_mem;
|
||||
std::shared_ptr<vortex::MemoryAllocator> local_mem;
|
||||
DeviceConfig dcrs;
|
||||
uint64_t dev_caps;
|
||||
uint64_t isa_caps;
|
||||
|
@ -159,6 +158,9 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
|
|||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
*value = 1ull << ((device->dev_caps >> 40) & 0xff);
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_ADDR:
|
||||
*value = LMEM_BASE_ADDR;
|
||||
break;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
*value = (uint64_t(device->dcrs.read(VX_DCR_BASE_STARTUP_ADDR1)) << 32) |
|
||||
device->dcrs.read(VX_DCR_BASE_STARTUP_ADDR0);
|
||||
|
@ -275,13 +277,6 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||
|
||||
device->global_mem = std::make_shared<vortex::MemoryAllocator>(
|
||||
ALLOC_BASE_ADDR, ALLOC_MAX_ADDR - ALLOC_BASE_ADDR, RAM_PAGE_SIZE, CACHE_BLOCK_SIZE);
|
||||
|
||||
uint64_t local_mem_size = 0;
|
||||
vx_dev_caps(device, VX_CAPS_LOCAL_MEM_SIZE, &local_mem_size);
|
||||
if (local_mem_size <= 1) {
|
||||
device->local_mem = std::make_shared<vortex::MemoryAllocator>(
|
||||
LMEM_BASE_ADDR, local_mem_size, RAM_PAGE_SIZE, 1);
|
||||
}
|
||||
|
||||
#ifdef SCOPE
|
||||
{
|
||||
|
@ -348,19 +343,14 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, int type, uint64_t* dev_addr) {
|
||||
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_addr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_addr
|
||||
|| 0 == size)
|
||||
return -1;
|
||||
|
||||
auto device = ((vx_device*)hdevice);
|
||||
if (type == VX_MEM_TYPE_GLOBAL) {
|
||||
return device->global_mem->allocate(size, dev_addr);
|
||||
} else if (type == VX_MEM_TYPE_LOCAL) {
|
||||
return device->local_mem->allocate(size, dev_addr);
|
||||
}
|
||||
return -1;
|
||||
return device->global_mem->allocate(size, dev_addr);
|
||||
}
|
||||
|
||||
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
|
||||
|
@ -371,31 +361,18 @@ extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
|
|||
return 0;
|
||||
|
||||
auto device = ((vx_device*)hdevice);
|
||||
if (dev_addr >= LMEM_BASE_ADDR) {
|
||||
return device->local_mem->release(dev_addr);
|
||||
} else {
|
||||
return device->global_mem->release(dev_addr);
|
||||
}
|
||||
return device->global_mem->release(dev_addr);
|
||||
}
|
||||
|
||||
extern int vx_mem_info(vx_device_h hdevice, int type, uint64_t* mem_free, uint64_t* mem_used) {
|
||||
extern int vx_mem_info(vx_device_h hdevice, uint64_t* mem_free, uint64_t* mem_used) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
auto device = ((vx_device*)hdevice);
|
||||
if (type == VX_MEM_TYPE_GLOBAL) {
|
||||
if (mem_free)
|
||||
*mem_free = device->global_mem->free();
|
||||
if (mem_used)
|
||||
*mem_used = device->global_mem->allocated();
|
||||
} else if (type == VX_MEM_TYPE_LOCAL) {
|
||||
if (mem_free)
|
||||
*mem_free = device->local_mem->free();
|
||||
if (mem_used)
|
||||
*mem_free = device->local_mem->allocated();
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto device = ((vx_device*)hdevice);
|
||||
if (mem_free)
|
||||
*mem_free = device->global_mem->free();
|
||||
if (mem_used)
|
||||
*mem_used = device->global_mem->allocated();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -45,11 +45,6 @@ public:
|
|||
ALLOC_MAX_ADDR - ALLOC_BASE_ADDR,
|
||||
RAM_PAGE_SIZE,
|
||||
CACHE_BLOCK_SIZE)
|
||||
, local_mem_(
|
||||
LMEM_BASE_ADDR,
|
||||
(1ull << LMEM_LOG_SIZE),
|
||||
RAM_PAGE_SIZE,
|
||||
1)
|
||||
{
|
||||
processor_.attach_ram(&ram_);
|
||||
}
|
||||
|
@ -60,37 +55,19 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
int mem_alloc(uint64_t size, int type, uint64_t* dev_addr) {
|
||||
if (type == VX_MEM_TYPE_GLOBAL) {
|
||||
return global_mem_.allocate(size, dev_addr);
|
||||
} else if (type == VX_MEM_TYPE_LOCAL) {
|
||||
return local_mem_.allocate(size, dev_addr);
|
||||
}
|
||||
return -1;
|
||||
int mem_alloc(uint64_t size, uint64_t* dev_addr) {
|
||||
return global_mem_.allocate(size, dev_addr);
|
||||
}
|
||||
|
||||
int mem_free(uint64_t dev_addr) {
|
||||
if (dev_addr >= LMEM_BASE_ADDR) {
|
||||
return local_mem_.release(dev_addr);
|
||||
} else {
|
||||
return global_mem_.release(dev_addr);
|
||||
}
|
||||
return global_mem_.release(dev_addr);
|
||||
}
|
||||
|
||||
int mem_info(int type, uint64_t* mem_free, uint64_t* mem_used) const {
|
||||
if (type == VX_MEM_TYPE_GLOBAL) {
|
||||
if (mem_free)
|
||||
*mem_free = global_mem_.free();
|
||||
if (mem_used)
|
||||
*mem_used = global_mem_.allocated();
|
||||
} else if (type == VX_MEM_TYPE_LOCAL) {
|
||||
if (mem_free)
|
||||
*mem_free = local_mem_.free();
|
||||
if (mem_used)
|
||||
*mem_free = local_mem_.allocated();
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
int mem_info(uint64_t* mem_free, uint64_t* mem_used) const {
|
||||
if (mem_free)
|
||||
*mem_free = global_mem_.free();
|
||||
if (mem_used)
|
||||
*mem_used = global_mem_.allocated();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -176,7 +153,6 @@ private:
|
|||
RAM ram_;
|
||||
Processor processor_;
|
||||
MemoryAllocator global_mem_;
|
||||
MemoryAllocator local_mem_;
|
||||
DeviceConfig dcrs_;
|
||||
std::future<void> future_;
|
||||
};
|
||||
|
@ -208,6 +184,12 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
|
|||
case VX_CAPS_GLOBAL_MEM_SIZE:
|
||||
*value = GLOBAL_MEM_SIZE;
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
*value = (1 << LMEM_LOG_SIZE);
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_ADDR:
|
||||
*value = LMEM_BASE_ADDR;
|
||||
break;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
*value = (uint64_t(device->read_dcr(VX_DCR_BASE_STARTUP_ADDR1)) << 32)
|
||||
| device->read_dcr(VX_DCR_BASE_STARTUP_ADDR0);
|
||||
|
@ -262,14 +244,14 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, int type, uint64_t* dev_addr) {
|
||||
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_addr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_addr
|
||||
|| 0 == size)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
return device->mem_alloc(size, type, dev_addr);
|
||||
return device->mem_alloc(size, dev_addr);
|
||||
}
|
||||
|
||||
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
|
||||
|
@ -283,12 +265,12 @@ extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
|
|||
return device->mem_free(dev_addr);
|
||||
}
|
||||
|
||||
extern int vx_mem_info(vx_device_h hdevice, int type, uint64_t* mem_free, uint64_t* mem_used) {
|
||||
extern int vx_mem_info(vx_device_h hdevice, uint64_t* mem_free, uint64_t* mem_used) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
auto device = ((vx_device*)hdevice);
|
||||
return device->mem_info(type, mem_free, mem_used);
|
||||
return device->mem_info(mem_free, mem_used);
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_device_h hdevice, uint64_t dev_addr, const void* host_ptr, uint64_t size) {
|
||||
|
|
|
@ -95,11 +95,6 @@ public:
|
|||
ALLOC_MAX_ADDR - ALLOC_BASE_ADDR,
|
||||
RAM_PAGE_SIZE,
|
||||
CACHE_BLOCK_SIZE)
|
||||
, local_mem_(
|
||||
LMEM_BASE_ADDR,
|
||||
(1ull << LMEM_LOG_SIZE),
|
||||
RAM_PAGE_SIZE,
|
||||
1)
|
||||
{
|
||||
// attach memory module
|
||||
processor_.attach_ram(&ram_);
|
||||
|
@ -111,37 +106,19 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
int mem_alloc(uint64_t size, int type, uint64_t* dev_addr) {
|
||||
if (type == VX_MEM_TYPE_GLOBAL) {
|
||||
return global_mem_.allocate(size, dev_addr);
|
||||
} else if (type == VX_MEM_TYPE_LOCAL) {
|
||||
return local_mem_.allocate(size, dev_addr);
|
||||
}
|
||||
return -1;
|
||||
int mem_alloc(uint64_t size, uint64_t* dev_addr) {
|
||||
return global_mem_.allocate(size, dev_addr);
|
||||
}
|
||||
|
||||
int mem_free(uint64_t dev_addr) {
|
||||
if (dev_addr >= LMEM_BASE_ADDR) {
|
||||
return local_mem_.release(dev_addr);
|
||||
} else {
|
||||
return global_mem_.release(dev_addr);
|
||||
}
|
||||
return global_mem_.release(dev_addr);
|
||||
}
|
||||
|
||||
int mem_info(int type, uint64_t* mem_free, uint64_t* mem_used) const {
|
||||
if (type == VX_MEM_TYPE_GLOBAL) {
|
||||
if (mem_free)
|
||||
*mem_free = global_mem_.free();
|
||||
if (mem_used)
|
||||
*mem_used = global_mem_.allocated();
|
||||
} else if (type == VX_MEM_TYPE_LOCAL) {
|
||||
if (mem_free)
|
||||
*mem_free = local_mem_.free();
|
||||
if (mem_used)
|
||||
*mem_free = local_mem_.allocated();
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
int mem_info(uint64_t* mem_free, uint64_t* mem_used) const {
|
||||
if (mem_free)
|
||||
*mem_free = global_mem_.free();
|
||||
if (mem_used)
|
||||
*mem_used = global_mem_.allocated();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -222,7 +199,6 @@ private:
|
|||
RAM ram_;
|
||||
Processor processor_;
|
||||
MemoryAllocator global_mem_;
|
||||
MemoryAllocator local_mem_;
|
||||
DeviceConfig dcrs_;
|
||||
std::future<void> future_;
|
||||
};
|
||||
|
@ -296,6 +272,12 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
|
|||
case VX_CAPS_GLOBAL_MEM_SIZE:
|
||||
*value = GLOBAL_MEM_SIZE;
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
*value = (1 << LMEM_LOG_SIZE);
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_ADDR:
|
||||
*value = LMEM_BASE_ADDR;
|
||||
break;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
*value = (uint64_t(device->read_dcr(VX_DCR_BASE_STARTUP_ADDR1)) << 32)
|
||||
| device->read_dcr(VX_DCR_BASE_STARTUP_ADDR0);
|
||||
|
@ -312,14 +294,14 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, int type, uint64_t* dev_addr) {
|
||||
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_addr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_addr
|
||||
|| 0 == size)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
return device->mem_alloc(size, type, dev_addr);
|
||||
return device->mem_alloc(size, dev_addr);
|
||||
}
|
||||
|
||||
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
|
||||
|
@ -333,12 +315,12 @@ extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
|
|||
return device->mem_free(dev_addr);
|
||||
}
|
||||
|
||||
extern int vx_mem_info(vx_device_h hdevice, int type, uint64_t* mem_free, uint64_t* mem_used) {
|
||||
extern int vx_mem_info(vx_device_h hdevice, uint64_t* mem_free, uint64_t* mem_used) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
auto device = ((vx_device*)hdevice);
|
||||
return device->mem_info(type, mem_free, mem_used);
|
||||
return device->mem_info(mem_free, mem_used);
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_device_h hdevice, uint64_t dev_addr, const void* host_ptr, uint64_t size) {
|
||||
|
|
|
@ -25,7 +25,7 @@ extern int vx_dev_caps(vx_device_h /*hdevice*/, uint32_t /*caps_id*/, uint64_t*
|
|||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_mem_alloc(vx_device_h /*hdevice*/, uint64_t /*size*/, int /*type*/, uint64_t* /*dev_addr*/) {
|
||||
extern int vx_mem_alloc(vx_device_h /*hdevice*/, uint64_t /*size*/, uint64_t* /*dev_addr*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -33,7 +33,7 @@ extern int vx_mem_free(vx_device_h /*hdevice*/, uint64_t /*dev_addr*/) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_mem_info(vx_device_h /*hdevice*/, int /*type*/, uint64_t* /*mem_free*/, uint64_t* /*mem_used*/) {
|
||||
extern int vx_mem_info(vx_device_h /*hdevice*/, uint64_t* /*mem_free*/, uint64_t* /*mem_used*/) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -213,13 +213,6 @@ public:
|
|||
this->global_mem_ = std::make_shared<vortex::MemoryAllocator>(
|
||||
ALLOC_BASE_ADDR, ALLOC_MAX_ADDR, RAM_PAGE_SIZE, CACHE_BLOCK_SIZE);
|
||||
|
||||
uint64_t local_mem_size = 0;
|
||||
vx_dev_caps(this, VX_CAPS_LOCAL_MEM_SIZE, &local_mem_size);
|
||||
if (local_mem_size <= 1) {
|
||||
this->local_mem_ = std::make_shared<vortex::MemoryAllocator>(
|
||||
LMEM_BASE_ADDR, local_mem_size, RAM_PAGE_SIZE, 1);
|
||||
}
|
||||
|
||||
#ifdef BANK_INTERLEAVE
|
||||
xrtBuffers_.reserve(num_banks);
|
||||
for (uint32_t i = 0; i < num_banks; ++i) {
|
||||
|
@ -238,91 +231,66 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int mem_alloc(uint64_t size, int type, uint64_t* dev_addr) {
|
||||
int mem_alloc(uint64_t size, uint64_t* dev_addr) {
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
|
||||
uint64_t addr;
|
||||
|
||||
if (type == VX_MEM_TYPE_GLOBAL) {
|
||||
CHECK_ERR(global_mem_->allocate(asize, &addr), {
|
||||
return -1;
|
||||
});
|
||||
#ifndef BANK_INTERLEAVE
|
||||
uint32_t bank_id;
|
||||
CHECK_ERR(this->get_bank_info(addr, &bank_id, nullptr), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(get_buffer(bank_id, nullptr), {
|
||||
return -1;
|
||||
});
|
||||
#endif
|
||||
} else if (type == VX_MEM_TYPE_LOCAL) {
|
||||
if CHECK_ERR(local_mem_->allocate(asize, &addr), {
|
||||
return -1;
|
||||
});
|
||||
} else {
|
||||
CHECK_ERR(global_mem_->allocate(asize, &addr), {
|
||||
return -1;
|
||||
}
|
||||
});
|
||||
#ifndef BANK_INTERLEAVE
|
||||
uint32_t bank_id;
|
||||
CHECK_ERR(this->get_bank_info(addr, &bank_id, nullptr), {
|
||||
return -1;
|
||||
});
|
||||
CHECK_ERR(get_buffer(bank_id, nullptr), {
|
||||
return -1;
|
||||
});
|
||||
#endif
|
||||
*dev_addr = addr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mem_free(uint64_t dev_addr) {
|
||||
if (dev_addr >= LMEM_BASE_ADDR) {
|
||||
CHECK_ERR(local_mem_->release(dev_addr), {
|
||||
return -1;
|
||||
});
|
||||
} else {
|
||||
CHECK_ERR(global_mem_->release(dev_addr), {
|
||||
return -1;
|
||||
});
|
||||
#ifdef BANK_INTERLEAVE
|
||||
if (0 == global_mem_->allocated()) {
|
||||
#ifndef CPP_API
|
||||
for (auto& entry : xrtBuffers_) {
|
||||
xrtBOFree(entry);
|
||||
}
|
||||
#endif
|
||||
xrtBuffers_.clear();
|
||||
}
|
||||
#else
|
||||
uint32_t bank_id;
|
||||
CHECK_ERR(this->get_bank_info(dev_addr, &bank_id, nullptr), {
|
||||
return -1;
|
||||
});
|
||||
auto it = xrtBuffers_.find(bank_id);
|
||||
if (it != xrtBuffers_.end()) {
|
||||
auto count = --it->second.count;
|
||||
if (0 == count) {
|
||||
printf("freeing bank%d...\n", bank_id);
|
||||
#ifndef CPP_API
|
||||
xrtBOFree(it->second.xrtBuffer);
|
||||
#endif
|
||||
xrtBuffers_.erase(it);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "[VXDRV] Error: invalid device memory address: 0x%lx\n", dev_addr);
|
||||
return -1;
|
||||
CHECK_ERR(global_mem_->release(dev_addr), {
|
||||
return -1;
|
||||
});
|
||||
#ifdef BANK_INTERLEAVE
|
||||
if (0 == global_mem_->allocated()) {
|
||||
#ifndef CPP_API
|
||||
for (auto& entry : xrtBuffers_) {
|
||||
xrtBOFree(entry);
|
||||
}
|
||||
#endif
|
||||
xrtBuffers_.clear();
|
||||
}
|
||||
#else
|
||||
uint32_t bank_id;
|
||||
CHECK_ERR(this->get_bank_info(dev_addr, &bank_id, nullptr), {
|
||||
return -1;
|
||||
});
|
||||
auto it = xrtBuffers_.find(bank_id);
|
||||
if (it != xrtBuffers_.end()) {
|
||||
auto count = --it->second.count;
|
||||
if (0 == count) {
|
||||
printf("freeing bank%d...\n", bank_id);
|
||||
#ifndef CPP_API
|
||||
xrtBOFree(it->second.xrtBuffer);
|
||||
#endif
|
||||
xrtBuffers_.erase(it);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "[VXDRV] Error: invalid device memory address: 0x%lx\n", dev_addr);
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mem_info(int type, uint64_t* mem_free, uint64_t* mem_used) const {
|
||||
if (type == VX_MEM_TYPE_GLOBAL) {
|
||||
if (mem_free)
|
||||
*mem_free = global_mem_->free();
|
||||
if (mem_used)
|
||||
*mem_used = global_mem_->allocated();
|
||||
} else if (type == VX_MEM_TYPE_LOCAL) {
|
||||
if (mem_free)
|
||||
*mem_free = local_mem_->free();
|
||||
if (mem_used)
|
||||
*mem_free = local_mem_->allocated();
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
int mem_info(uint64_t* mem_free, uint64_t* mem_used) const {
|
||||
if (mem_free)
|
||||
*mem_free = global_mem_->free();
|
||||
if (mem_used)
|
||||
*mem_used = global_mem_->allocated();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -433,8 +401,6 @@ private:
|
|||
xrt_kernel_t xrtKernel_;
|
||||
const platform_info_t platform_;
|
||||
std::shared_ptr<vortex::MemoryAllocator> global_mem_;
|
||||
std::shared_ptr<vortex::MemoryAllocator> local_mem_;
|
||||
|
||||
#ifdef BANK_INTERLEAVE
|
||||
|
||||
std::vector<xrt_buffer_t> xrtBuffers_;
|
||||
|
@ -549,6 +515,9 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
|
|||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
*value = 1ull << ((device->dev_caps >> 40) & 0xff);
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_ADDR:
|
||||
*value = LMEM_BASE_ADDR;
|
||||
break;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
*value = (uint64_t(device->dcrs.read(VX_DCR_BASE_STARTUP_ADDR1)) << 32) |
|
||||
device->dcrs.read(VX_DCR_BASE_STARTUP_ADDR0);
|
||||
|
@ -762,14 +731,14 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, int type, uint64_t* dev_addr) {
|
||||
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_addr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_addr
|
||||
|| 0 == size)
|
||||
return -1;
|
||||
|
||||
auto device = ((vx_device*)hdevice);
|
||||
return device->mem_alloc(size, type, dev_addr);
|
||||
return device->mem_alloc(size, dev_addr);
|
||||
}
|
||||
|
||||
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
|
||||
|
@ -783,12 +752,12 @@ extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
|
|||
return device->mem_free(dev_addr);
|
||||
}
|
||||
|
||||
extern int vx_mem_info(vx_device_h hdevice, int type, uint64_t* mem_free, uint64_t* mem_used) {
|
||||
extern int vx_mem_info(vx_device_h hdevice, uint64_t* mem_free, uint64_t* mem_used) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
auto device = (vx_device*)hdevice;
|
||||
return device->mem_info(type, mem_free, mem_used);
|
||||
return device->mem_info(mem_free, mem_used);
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_device_h hdevice, uint64_t dev_addr, const void* host_ptr, uint64_t size) {
|
||||
|
|
|
@ -236,8 +236,8 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.dst_addr));
|
||||
|
||||
kernel_arg.count = num_points;
|
||||
|
||||
|
|
|
@ -179,9 +179,9 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src0_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src1_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.dst_addr));
|
||||
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.task_size = count;
|
||||
|
|
|
@ -216,8 +216,8 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, src_buf_size, &kernel_arg.src_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &kernel_arg.dst_addr));
|
||||
|
||||
kernel_arg.num_points = num_points;
|
||||
|
||||
|
|
|
@ -119,9 +119,9 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src0_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src1_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.dst_addr));
|
||||
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.task_size = count;
|
||||
|
|
|
@ -129,9 +129,9 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src0_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src1_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.dst_addr));
|
||||
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.task_size = count;
|
||||
|
|
|
@ -152,7 +152,7 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
uint32_t num_points = count;
|
||||
|
||||
RT_CHECK(vx_mem_alloc(device, NUM_ADDRS * sizeof(int32_t), VX_MEM_TYPE_GLOBAL, &usr_test_mem));
|
||||
RT_CHECK(vx_mem_alloc(device, NUM_ADDRS * sizeof(int32_t), &usr_test_mem));
|
||||
|
||||
// generate input data
|
||||
gen_src_addrs(num_points);
|
||||
|
@ -172,9 +172,9 @@ int main(int argc, char *argv[]) {
|
|||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &value));
|
||||
RT_CHECK(vx_mem_alloc(device, src_buf_size, &value));
|
||||
kernel_arg.src_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &value));
|
||||
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
kernel_arg.num_points = num_points;
|
||||
|
||||
|
|
|
@ -216,9 +216,9 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, addr_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, addr_buf_size, &kernel_arg.src0_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, src_buf_size, &kernel_arg.src1_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &kernel_arg.dst_addr));
|
||||
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.stride = count;
|
||||
|
|
|
@ -122,8 +122,8 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.dst_addr));
|
||||
|
||||
kernel_arg.size = num_points;
|
||||
|
||||
|
|
|
@ -98,7 +98,7 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src_addr));
|
||||
|
||||
kernel_arg.num_points = num_points;
|
||||
|
||||
|
|
|
@ -146,9 +146,9 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.A_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.B_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.C_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.A_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.B_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.C_addr));
|
||||
|
||||
kernel_arg.num_tasks = num_points;
|
||||
kernel_arg.size = size;
|
||||
|
|
|
@ -160,8 +160,8 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, src_buf_size, &kernel_arg.src_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &kernel_arg.dst_addr));
|
||||
|
||||
kernel_arg.num_points = num_points;
|
||||
|
||||
|
|
|
@ -181,9 +181,9 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src0_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src1_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.dst_addr));
|
||||
|
||||
kernel_arg.num_points = num_points;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue