local memory runtime refactoring

This commit is contained in:
Blaise Tine 2024-03-21 09:31:55 -07:00
parent 2776f2cdf0
commit 35a782a7ba
18 changed files with 139 additions and 232 deletions

View file

@ -32,8 +32,9 @@ typedef void* vx_device_h;
#define VX_CAPS_CACHE_LINE_SIZE 0x4
#define VX_CAPS_GLOBAL_MEM_SIZE 0x5
#define VX_CAPS_LOCAL_MEM_SIZE 0x6
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
#define VX_CAPS_ISA_FLAGS 0x8
#define VX_CAPS_LOCAL_MEM_ADDR 0x7
#define VX_CAPS_KERNEL_BASE_ADDR 0x8
#define VX_CAPS_ISA_FLAGS 0x9
// device isa flags
#define VX_ISA_STD_A (1ull << 0)
@ -54,10 +55,6 @@ typedef void* vx_device_h;
#define VX_ISA_EXT_L3CACHE (1ull << 35)
#define VX_ISA_EXT_LMEM (1ull << 36)
// device memory types
#define VX_MEM_TYPE_GLOBAL 0
#define VX_MEM_TYPE_LOCAL 1
// ready wait timeout
#define VX_MAX_TIMEOUT (24*60*60*1000) // 24 Hr
@ -71,13 +68,13 @@ int vx_dev_close(vx_device_h hdevice);
int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value);
// allocate device memory and return address
int vx_mem_alloc(vx_device_h hdevice, uint64_t size, int type, uint64_t* dev_addr);
int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_addr);
// release device memory
int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr);
// get device memory info
int vx_mem_info(vx_device_h hdevice, int type, uint64_t* mem_free, uint64_t* mem_used);
int vx_mem_info(vx_device_h hdevice, uint64_t* mem_free, uint64_t* mem_used);
// Copy bytes from host to device memory
int vx_copy_to_dev(vx_device_h hdevice, uint64_t dev_addr, const void* host_ptr, uint64_t size);

View file

@ -118,7 +118,6 @@ public:
opae_drv_api_t api;
fpga_handle fpga;
std::shared_ptr<vortex::MemoryAllocator> global_mem;
std::shared_ptr<vortex::MemoryAllocator> local_mem;
DeviceConfig dcrs;
uint64_t dev_caps;
uint64_t isa_caps;
@ -159,6 +158,9 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
case VX_CAPS_LOCAL_MEM_SIZE:
*value = 1ull << ((device->dev_caps >> 40) & 0xff);
break;
case VX_CAPS_LOCAL_MEM_ADDR:
*value = LMEM_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = (uint64_t(device->dcrs.read(VX_DCR_BASE_STARTUP_ADDR1)) << 32) |
device->dcrs.read(VX_DCR_BASE_STARTUP_ADDR0);
@ -275,13 +277,6 @@ extern int vx_dev_open(vx_device_h* hdevice) {
device->global_mem = std::make_shared<vortex::MemoryAllocator>(
ALLOC_BASE_ADDR, ALLOC_MAX_ADDR - ALLOC_BASE_ADDR, RAM_PAGE_SIZE, CACHE_BLOCK_SIZE);
uint64_t local_mem_size = 0;
vx_dev_caps(device, VX_CAPS_LOCAL_MEM_SIZE, &local_mem_size);
if (local_mem_size <= 1) {
device->local_mem = std::make_shared<vortex::MemoryAllocator>(
LMEM_BASE_ADDR, local_mem_size, RAM_PAGE_SIZE, 1);
}
#ifdef SCOPE
{
@ -348,19 +343,14 @@ extern int vx_dev_close(vx_device_h hdevice) {
return 0;
}
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, int type, uint64_t* dev_addr) {
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_addr) {
if (nullptr == hdevice
|| nullptr == dev_addr
|| 0 == size)
return -1;
auto device = ((vx_device*)hdevice);
if (type == VX_MEM_TYPE_GLOBAL) {
return device->global_mem->allocate(size, dev_addr);
} else if (type == VX_MEM_TYPE_LOCAL) {
return device->local_mem->allocate(size, dev_addr);
}
return -1;
return device->global_mem->allocate(size, dev_addr);
}
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
@ -371,31 +361,18 @@ extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
return 0;
auto device = ((vx_device*)hdevice);
if (dev_addr >= LMEM_BASE_ADDR) {
return device->local_mem->release(dev_addr);
} else {
return device->global_mem->release(dev_addr);
}
return device->global_mem->release(dev_addr);
}
extern int vx_mem_info(vx_device_h hdevice, int type, uint64_t* mem_free, uint64_t* mem_used) {
extern int vx_mem_info(vx_device_h hdevice, uint64_t* mem_free, uint64_t* mem_used) {
if (nullptr == hdevice)
return -1;
auto device = ((vx_device*)hdevice);
if (type == VX_MEM_TYPE_GLOBAL) {
if (mem_free)
*mem_free = device->global_mem->free();
if (mem_used)
*mem_used = device->global_mem->allocated();
} else if (type == VX_MEM_TYPE_LOCAL) {
if (mem_free)
*mem_free = device->local_mem->free();
if (mem_used)
*mem_free = device->local_mem->allocated();
} else {
return -1;
}
auto device = ((vx_device*)hdevice);
if (mem_free)
*mem_free = device->global_mem->free();
if (mem_used)
*mem_used = device->global_mem->allocated();
return 0;
}

View file

@ -45,11 +45,6 @@ public:
ALLOC_MAX_ADDR - ALLOC_BASE_ADDR,
RAM_PAGE_SIZE,
CACHE_BLOCK_SIZE)
, local_mem_(
LMEM_BASE_ADDR,
(1ull << LMEM_LOG_SIZE),
RAM_PAGE_SIZE,
1)
{
processor_.attach_ram(&ram_);
}
@ -60,37 +55,19 @@ public:
}
}
int mem_alloc(uint64_t size, int type, uint64_t* dev_addr) {
if (type == VX_MEM_TYPE_GLOBAL) {
return global_mem_.allocate(size, dev_addr);
} else if (type == VX_MEM_TYPE_LOCAL) {
return local_mem_.allocate(size, dev_addr);
}
return -1;
int mem_alloc(uint64_t size, uint64_t* dev_addr) {
return global_mem_.allocate(size, dev_addr);
}
int mem_free(uint64_t dev_addr) {
if (dev_addr >= LMEM_BASE_ADDR) {
return local_mem_.release(dev_addr);
} else {
return global_mem_.release(dev_addr);
}
return global_mem_.release(dev_addr);
}
int mem_info(int type, uint64_t* mem_free, uint64_t* mem_used) const {
if (type == VX_MEM_TYPE_GLOBAL) {
if (mem_free)
*mem_free = global_mem_.free();
if (mem_used)
*mem_used = global_mem_.allocated();
} else if (type == VX_MEM_TYPE_LOCAL) {
if (mem_free)
*mem_free = local_mem_.free();
if (mem_used)
*mem_free = local_mem_.allocated();
} else {
return -1;
}
int mem_info(uint64_t* mem_free, uint64_t* mem_used) const {
if (mem_free)
*mem_free = global_mem_.free();
if (mem_used)
*mem_used = global_mem_.allocated();
return 0;
}
@ -176,7 +153,6 @@ private:
RAM ram_;
Processor processor_;
MemoryAllocator global_mem_;
MemoryAllocator local_mem_;
DeviceConfig dcrs_;
std::future<void> future_;
};
@ -208,6 +184,12 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
case VX_CAPS_GLOBAL_MEM_SIZE:
*value = GLOBAL_MEM_SIZE;
break;
case VX_CAPS_LOCAL_MEM_SIZE:
*value = (1 << LMEM_LOG_SIZE);
break;
case VX_CAPS_LOCAL_MEM_ADDR:
*value = LMEM_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = (uint64_t(device->read_dcr(VX_DCR_BASE_STARTUP_ADDR1)) << 32)
| device->read_dcr(VX_DCR_BASE_STARTUP_ADDR0);
@ -262,14 +244,14 @@ extern int vx_dev_close(vx_device_h hdevice) {
return 0;
}
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, int type, uint64_t* dev_addr) {
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_addr) {
if (nullptr == hdevice
|| nullptr == dev_addr
|| 0 == size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->mem_alloc(size, type, dev_addr);
return device->mem_alloc(size, dev_addr);
}
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
@ -283,12 +265,12 @@ extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
return device->mem_free(dev_addr);
}
extern int vx_mem_info(vx_device_h hdevice, int type, uint64_t* mem_free, uint64_t* mem_used) {
extern int vx_mem_info(vx_device_h hdevice, uint64_t* mem_free, uint64_t* mem_used) {
if (nullptr == hdevice)
return -1;
auto device = ((vx_device*)hdevice);
return device->mem_info(type, mem_free, mem_used);
return device->mem_info(mem_free, mem_used);
}
extern int vx_copy_to_dev(vx_device_h hdevice, uint64_t dev_addr, const void* host_ptr, uint64_t size) {

View file

@ -95,11 +95,6 @@ public:
ALLOC_MAX_ADDR - ALLOC_BASE_ADDR,
RAM_PAGE_SIZE,
CACHE_BLOCK_SIZE)
, local_mem_(
LMEM_BASE_ADDR,
(1ull << LMEM_LOG_SIZE),
RAM_PAGE_SIZE,
1)
{
// attach memory module
processor_.attach_ram(&ram_);
@ -111,37 +106,19 @@ public:
}
}
int mem_alloc(uint64_t size, int type, uint64_t* dev_addr) {
if (type == VX_MEM_TYPE_GLOBAL) {
return global_mem_.allocate(size, dev_addr);
} else if (type == VX_MEM_TYPE_LOCAL) {
return local_mem_.allocate(size, dev_addr);
}
return -1;
int mem_alloc(uint64_t size, uint64_t* dev_addr) {
return global_mem_.allocate(size, dev_addr);
}
int mem_free(uint64_t dev_addr) {
if (dev_addr >= LMEM_BASE_ADDR) {
return local_mem_.release(dev_addr);
} else {
return global_mem_.release(dev_addr);
}
return global_mem_.release(dev_addr);
}
int mem_info(int type, uint64_t* mem_free, uint64_t* mem_used) const {
if (type == VX_MEM_TYPE_GLOBAL) {
if (mem_free)
*mem_free = global_mem_.free();
if (mem_used)
*mem_used = global_mem_.allocated();
} else if (type == VX_MEM_TYPE_LOCAL) {
if (mem_free)
*mem_free = local_mem_.free();
if (mem_used)
*mem_free = local_mem_.allocated();
} else {
return -1;
}
int mem_info(uint64_t* mem_free, uint64_t* mem_used) const {
if (mem_free)
*mem_free = global_mem_.free();
if (mem_used)
*mem_used = global_mem_.allocated();
return 0;
}
@ -222,7 +199,6 @@ private:
RAM ram_;
Processor processor_;
MemoryAllocator global_mem_;
MemoryAllocator local_mem_;
DeviceConfig dcrs_;
std::future<void> future_;
};
@ -296,6 +272,12 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
case VX_CAPS_GLOBAL_MEM_SIZE:
*value = GLOBAL_MEM_SIZE;
break;
case VX_CAPS_LOCAL_MEM_SIZE:
*value = (1 << LMEM_LOG_SIZE);
break;
case VX_CAPS_LOCAL_MEM_ADDR:
*value = LMEM_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = (uint64_t(device->read_dcr(VX_DCR_BASE_STARTUP_ADDR1)) << 32)
| device->read_dcr(VX_DCR_BASE_STARTUP_ADDR0);
@ -312,14 +294,14 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
return 0;
}
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, int type, uint64_t* dev_addr) {
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_addr) {
if (nullptr == hdevice
|| nullptr == dev_addr
|| 0 == size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->mem_alloc(size, type, dev_addr);
return device->mem_alloc(size, dev_addr);
}
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
@ -333,12 +315,12 @@ extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
return device->mem_free(dev_addr);
}
extern int vx_mem_info(vx_device_h hdevice, int type, uint64_t* mem_free, uint64_t* mem_used) {
extern int vx_mem_info(vx_device_h hdevice, uint64_t* mem_free, uint64_t* mem_used) {
if (nullptr == hdevice)
return -1;
auto device = ((vx_device*)hdevice);
return device->mem_info(type, mem_free, mem_used);
return device->mem_info(mem_free, mem_used);
}
extern int vx_copy_to_dev(vx_device_h hdevice, uint64_t dev_addr, const void* host_ptr, uint64_t size) {

View file

@ -25,7 +25,7 @@ extern int vx_dev_caps(vx_device_h /*hdevice*/, uint32_t /*caps_id*/, uint64_t*
return -1;
}
extern int vx_mem_alloc(vx_device_h /*hdevice*/, uint64_t /*size*/, int /*type*/, uint64_t* /*dev_addr*/) {
extern int vx_mem_alloc(vx_device_h /*hdevice*/, uint64_t /*size*/, uint64_t* /*dev_addr*/) {
return -1;
}
@ -33,7 +33,7 @@ extern int vx_mem_free(vx_device_h /*hdevice*/, uint64_t /*dev_addr*/) {
return -1;
}
extern int vx_mem_info(vx_device_h /*hdevice*/, int /*type*/, uint64_t* /*mem_free*/, uint64_t* /*mem_used*/) {
extern int vx_mem_info(vx_device_h /*hdevice*/, uint64_t* /*mem_free*/, uint64_t* /*mem_used*/) {
return 0;
}

View file

@ -213,13 +213,6 @@ public:
this->global_mem_ = std::make_shared<vortex::MemoryAllocator>(
ALLOC_BASE_ADDR, ALLOC_MAX_ADDR, RAM_PAGE_SIZE, CACHE_BLOCK_SIZE);
uint64_t local_mem_size = 0;
vx_dev_caps(this, VX_CAPS_LOCAL_MEM_SIZE, &local_mem_size);
if (local_mem_size <= 1) {
this->local_mem_ = std::make_shared<vortex::MemoryAllocator>(
LMEM_BASE_ADDR, local_mem_size, RAM_PAGE_SIZE, 1);
}
#ifdef BANK_INTERLEAVE
xrtBuffers_.reserve(num_banks);
for (uint32_t i = 0; i < num_banks; ++i) {
@ -238,91 +231,66 @@ public:
return 0;
}
int mem_alloc(uint64_t size, int type, uint64_t* dev_addr) {
int mem_alloc(uint64_t size, uint64_t* dev_addr) {
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
uint64_t addr;
if (type == VX_MEM_TYPE_GLOBAL) {
CHECK_ERR(global_mem_->allocate(asize, &addr), {
return -1;
});
#ifndef BANK_INTERLEAVE
uint32_t bank_id;
CHECK_ERR(this->get_bank_info(addr, &bank_id, nullptr), {
return -1;
});
CHECK_ERR(get_buffer(bank_id, nullptr), {
return -1;
});
#endif
} else if (type == VX_MEM_TYPE_LOCAL) {
if CHECK_ERR(local_mem_->allocate(asize, &addr), {
return -1;
});
} else {
CHECK_ERR(global_mem_->allocate(asize, &addr), {
return -1;
}
});
#ifndef BANK_INTERLEAVE
uint32_t bank_id;
CHECK_ERR(this->get_bank_info(addr, &bank_id, nullptr), {
return -1;
});
CHECK_ERR(get_buffer(bank_id, nullptr), {
return -1;
});
#endif
*dev_addr = addr;
return 0;
}
int mem_free(uint64_t dev_addr) {
if (dev_addr >= LMEM_BASE_ADDR) {
CHECK_ERR(local_mem_->release(dev_addr), {
return -1;
});
} else {
CHECK_ERR(global_mem_->release(dev_addr), {
return -1;
});
#ifdef BANK_INTERLEAVE
if (0 == global_mem_->allocated()) {
#ifndef CPP_API
for (auto& entry : xrtBuffers_) {
xrtBOFree(entry);
}
#endif
xrtBuffers_.clear();
}
#else
uint32_t bank_id;
CHECK_ERR(this->get_bank_info(dev_addr, &bank_id, nullptr), {
return -1;
});
auto it = xrtBuffers_.find(bank_id);
if (it != xrtBuffers_.end()) {
auto count = --it->second.count;
if (0 == count) {
printf("freeing bank%d...\n", bank_id);
#ifndef CPP_API
xrtBOFree(it->second.xrtBuffer);
#endif
xrtBuffers_.erase(it);
}
} else {
fprintf(stderr, "[VXDRV] Error: invalid device memory address: 0x%lx\n", dev_addr);
return -1;
CHECK_ERR(global_mem_->release(dev_addr), {
return -1;
});
#ifdef BANK_INTERLEAVE
if (0 == global_mem_->allocated()) {
#ifndef CPP_API
for (auto& entry : xrtBuffers_) {
xrtBOFree(entry);
}
#endif
xrtBuffers_.clear();
}
#else
uint32_t bank_id;
CHECK_ERR(this->get_bank_info(dev_addr, &bank_id, nullptr), {
return -1;
});
auto it = xrtBuffers_.find(bank_id);
if (it != xrtBuffers_.end()) {
auto count = --it->second.count;
if (0 == count) {
printf("freeing bank%d...\n", bank_id);
#ifndef CPP_API
xrtBOFree(it->second.xrtBuffer);
#endif
xrtBuffers_.erase(it);
}
} else {
fprintf(stderr, "[VXDRV] Error: invalid device memory address: 0x%lx\n", dev_addr);
return -1;
}
#endif
return 0;
}
int mem_info(int type, uint64_t* mem_free, uint64_t* mem_used) const {
if (type == VX_MEM_TYPE_GLOBAL) {
if (mem_free)
*mem_free = global_mem_->free();
if (mem_used)
*mem_used = global_mem_->allocated();
} else if (type == VX_MEM_TYPE_LOCAL) {
if (mem_free)
*mem_free = local_mem_->free();
if (mem_used)
*mem_free = local_mem_->allocated();
} else {
return -1;
}
int mem_info(uint64_t* mem_free, uint64_t* mem_used) const {
if (mem_free)
*mem_free = global_mem_->free();
if (mem_used)
*mem_used = global_mem_->allocated();
return 0;
}
@ -433,8 +401,6 @@ private:
xrt_kernel_t xrtKernel_;
const platform_info_t platform_;
std::shared_ptr<vortex::MemoryAllocator> global_mem_;
std::shared_ptr<vortex::MemoryAllocator> local_mem_;
#ifdef BANK_INTERLEAVE
std::vector<xrt_buffer_t> xrtBuffers_;
@ -549,6 +515,9 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
case VX_CAPS_LOCAL_MEM_SIZE:
*value = 1ull << ((device->dev_caps >> 40) & 0xff);
break;
case VX_CAPS_LOCAL_MEM_ADDR:
*value = LMEM_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = (uint64_t(device->dcrs.read(VX_DCR_BASE_STARTUP_ADDR1)) << 32) |
device->dcrs.read(VX_DCR_BASE_STARTUP_ADDR0);
@ -762,14 +731,14 @@ extern int vx_dev_close(vx_device_h hdevice) {
return 0;
}
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, int type, uint64_t* dev_addr) {
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_addr) {
if (nullptr == hdevice
|| nullptr == dev_addr
|| 0 == size)
return -1;
auto device = ((vx_device*)hdevice);
return device->mem_alloc(size, type, dev_addr);
return device->mem_alloc(size, dev_addr);
}
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
@ -783,12 +752,12 @@ extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
return device->mem_free(dev_addr);
}
extern int vx_mem_info(vx_device_h hdevice, int type, uint64_t* mem_free, uint64_t* mem_used) {
extern int vx_mem_info(vx_device_h hdevice, uint64_t* mem_free, uint64_t* mem_used) {
if (nullptr == hdevice)
return -1;
auto device = (vx_device*)hdevice;
return device->mem_info(type, mem_free, mem_used);
return device->mem_info(mem_free, mem_used);
}
extern int vx_copy_to_dev(vx_device_h hdevice, uint64_t dev_addr, const void* host_ptr, uint64_t size) {

View file

@ -236,8 +236,8 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.dst_addr));
kernel_arg.count = num_points;

View file

@ -179,9 +179,9 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src0_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src1_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.dst_addr));
kernel_arg.num_tasks = num_tasks;
kernel_arg.task_size = count;

View file

@ -216,8 +216,8 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr));
RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
RT_CHECK(vx_mem_alloc(device, src_buf_size, &kernel_arg.src_addr));
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &kernel_arg.dst_addr));
kernel_arg.num_points = num_points;

View file

@ -119,9 +119,9 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src0_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src1_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.dst_addr));
kernel_arg.num_tasks = num_tasks;
kernel_arg.task_size = count;

View file

@ -129,9 +129,9 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src0_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src1_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.dst_addr));
kernel_arg.num_tasks = num_tasks;
kernel_arg.task_size = count;

View file

@ -152,7 +152,7 @@ int main(int argc, char *argv[]) {
uint32_t num_points = count;
RT_CHECK(vx_mem_alloc(device, NUM_ADDRS * sizeof(int32_t), VX_MEM_TYPE_GLOBAL, &usr_test_mem));
RT_CHECK(vx_mem_alloc(device, NUM_ADDRS * sizeof(int32_t), &usr_test_mem));
// generate input data
gen_src_addrs(num_points);
@ -172,9 +172,9 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &value));
RT_CHECK(vx_mem_alloc(device, src_buf_size, &value));
kernel_arg.src_addr = value;
RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &value));
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value));
kernel_arg.dst_addr = value;
kernel_arg.num_points = num_points;

View file

@ -216,9 +216,9 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, addr_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr));
RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr));
RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
RT_CHECK(vx_mem_alloc(device, addr_buf_size, &kernel_arg.src0_addr));
RT_CHECK(vx_mem_alloc(device, src_buf_size, &kernel_arg.src1_addr));
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &kernel_arg.dst_addr));
kernel_arg.num_tasks = num_tasks;
kernel_arg.stride = count;

View file

@ -122,8 +122,8 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.dst_addr));
kernel_arg.size = num_points;

View file

@ -98,7 +98,7 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src_addr));
kernel_arg.num_points = num_points;

View file

@ -146,9 +146,9 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.A_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.B_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.C_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.A_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.B_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.C_addr));
kernel_arg.num_tasks = num_points;
kernel_arg.size = size;

View file

@ -160,8 +160,8 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr));
RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
RT_CHECK(vx_mem_alloc(device, src_buf_size, &kernel_arg.src_addr));
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &kernel_arg.dst_addr));
kernel_arg.num_points = num_points;

View file

@ -181,9 +181,9 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src0_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.src1_addr));
RT_CHECK(vx_mem_alloc(device, buf_size, &kernel_arg.dst_addr));
kernel_arg.num_points = num_points;