mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 22:07:41 -04:00
refactoring device memory allocation and cleanup
This commit is contained in:
parent
29df0da8b5
commit
f7887d8720
49 changed files with 875 additions and 373 deletions
|
@ -18,6 +18,7 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "vx_utils.h"
|
#include "vx_utils.h"
|
||||||
|
#include "vx_malloc.h"
|
||||||
#include <vortex.h>
|
#include <vortex.h>
|
||||||
#include <VX_config.h>
|
#include <VX_config.h>
|
||||||
#include "vortex_afu.h"
|
#include "vortex_afu.h"
|
||||||
|
@ -51,14 +52,25 @@
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
typedef struct vx_device_ {
|
class vx_device {
|
||||||
|
public:
|
||||||
|
vx_device()
|
||||||
|
: mem_allocator(
|
||||||
|
ALLOC_BASE_ADDR,
|
||||||
|
ALLOC_BASE_ADDR + LOCAL_MEM_SIZE,
|
||||||
|
4096,
|
||||||
|
CACHE_BLOCK_SIZE)
|
||||||
|
{}
|
||||||
|
|
||||||
|
~vx_device() {}
|
||||||
|
|
||||||
fpga_handle fpga;
|
fpga_handle fpga;
|
||||||
uint64_t mem_allocation;
|
vortex::MemoryAllocator mem_allocator;
|
||||||
unsigned version;
|
unsigned version;
|
||||||
unsigned num_cores;
|
unsigned num_cores;
|
||||||
unsigned num_warps;
|
unsigned num_warps;
|
||||||
unsigned num_threads;
|
unsigned num_threads;
|
||||||
} vx_device_t;
|
};
|
||||||
|
|
||||||
typedef struct vx_buffer_ {
|
typedef struct vx_buffer_ {
|
||||||
uint64_t wsid;
|
uint64_t wsid;
|
||||||
|
@ -102,7 +114,7 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
|
||||||
if (nullptr == hdevice)
|
if (nullptr == hdevice)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
|
||||||
switch (caps_id) {
|
switch (caps_id) {
|
||||||
case VX_CAPS_VERSION:
|
case VX_CAPS_VERSION:
|
||||||
|
@ -143,7 +155,7 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
fpga_handle accel_handle;
|
fpga_handle accel_handle;
|
||||||
vx_device_t* device;
|
vx_device* device;
|
||||||
|
|
||||||
#ifndef USE_VLSIM
|
#ifndef USE_VLSIM
|
||||||
fpga_result res;
|
fpga_result res;
|
||||||
|
@ -204,14 +216,13 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// allocate device object
|
// allocate device object
|
||||||
device = (vx_device_t*)malloc(sizeof(vx_device_t));
|
device = new vx_device();
|
||||||
if (nullptr == device) {
|
if (nullptr == device) {
|
||||||
fpgaClose(accel_handle);
|
fpgaClose(accel_handle);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
device->fpga = accel_handle;
|
device->fpga = accel_handle;
|
||||||
device->mem_allocation = ALLOC_BASE_ADDR;
|
|
||||||
|
|
||||||
{
|
{
|
||||||
// Load device CAPS
|
// Load device CAPS
|
||||||
|
@ -254,7 +265,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
||||||
if (nullptr == hdevice)
|
if (nullptr == hdevice)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
|
||||||
#ifdef SCOPE
|
#ifdef SCOPE
|
||||||
vx_scope_stop(device->fpga);
|
vx_scope_stop(device->fpga);
|
||||||
|
@ -267,30 +278,30 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
||||||
|
|
||||||
fpgaClose(device->fpga);
|
fpgaClose(device->fpga);
|
||||||
|
|
||||||
|
delete device;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
|
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
|
||||||
if (nullptr == hdevice
|
if (nullptr == hdevice
|
||||||
|| nullptr == dev_maddr
|
|| nullptr == dev_maddr
|
||||||
|| 0 >= size)
|
|| 0 >= size)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
return device->mem_allocator.allocate(size, dev_maddr);
|
||||||
size_t dev_mem_size = LOCAL_MEM_SIZE;
|
|
||||||
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
|
||||||
|
|
||||||
if (device->mem_allocation + asize > dev_mem_size)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
*dev_maddr = device->mem_allocation;
|
|
||||||
device->mem_allocation += asize;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
|
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr) {
|
||||||
|
if (nullptr == hdevice)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
return device->mem_allocator.release(dev_maddr);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
|
||||||
fpga_result res;
|
fpga_result res;
|
||||||
void* host_ptr;
|
void* host_ptr;
|
||||||
uint64_t wsid;
|
uint64_t wsid;
|
||||||
|
@ -302,7 +313,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h*
|
||||||
|| nullptr == hbuffer)
|
|| nullptr == hbuffer)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
|
||||||
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||||
|
|
||||||
|
@ -344,12 +355,12 @@ extern void* vx_host_ptr(vx_buffer_h hbuffer) {
|
||||||
return buffer->host_ptr;
|
return buffer->host_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
extern int vx_buf_free(vx_buffer_h hbuffer) {
|
||||||
if (nullptr == hbuffer)
|
if (nullptr == hbuffer)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
vx_device *device = ((vx_device*)buffer->hdevice);
|
||||||
|
|
||||||
fpgaReleaseBuffer(device->fpga, buffer->wsid);
|
fpgaReleaseBuffer(device->fpga, buffer->wsid);
|
||||||
|
|
||||||
|
@ -364,7 +375,7 @@ extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
|
||||||
|
|
||||||
std::unordered_map<int, std::stringstream> print_bufs;
|
std::unordered_map<int, std::stringstream> print_bufs;
|
||||||
|
|
||||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
|
||||||
struct timespec sleep_time;
|
struct timespec sleep_time;
|
||||||
|
|
||||||
|
@ -427,7 +438,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
vx_device *device = ((vx_device*)buffer->hdevice);
|
||||||
|
|
||||||
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
|
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||||
|
@ -468,7 +479,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t si
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
vx_device *device = ((vx_device*)buffer->hdevice);
|
||||||
|
|
||||||
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
|
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||||
|
@ -507,7 +518,7 @@ extern int vx_start(vx_device_h hdevice) {
|
||||||
if (nullptr == hdevice)
|
if (nullptr == hdevice)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
|
||||||
// Ensure ready for new command
|
// Ensure ready for new command
|
||||||
if (vx_ready_wait(hdevice, MAX_TIMEOUT) != 0)
|
if (vx_ready_wait(hdevice, MAX_TIMEOUT) != 0)
|
||||||
|
|
399
driver/common/vx_malloc.h
Normal file
399
driver/common/vx_malloc.h
Normal file
|
@ -0,0 +1,399 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
namespace vortex {
|
||||||
|
|
||||||
|
class MemoryAllocator {
|
||||||
|
public:
|
||||||
|
MemoryAllocator(
|
||||||
|
uint64_t minAddress,
|
||||||
|
uint64_t maxAddress,
|
||||||
|
uint32_t pageAlign,
|
||||||
|
uint32_t blockAlign)
|
||||||
|
: nextAddress_(minAddress)
|
||||||
|
, maxAddress_(maxAddress)
|
||||||
|
, pageAlign_(pageAlign)
|
||||||
|
, blockAlign_(blockAlign)
|
||||||
|
, pages_(nullptr)
|
||||||
|
{}
|
||||||
|
|
||||||
|
~MemoryAllocator() {
|
||||||
|
// Free allocated pages
|
||||||
|
page_t* pCurPage = pages_;
|
||||||
|
while (pCurPage) {
|
||||||
|
auto nextPage = pCurPage->next;
|
||||||
|
this->DeletePage(pCurPage);
|
||||||
|
pCurPage = nextPage;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int allocate(uint64_t size, uint64_t* addr) {
|
||||||
|
if (size == 0 || addr == nullptr)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Align allocation size
|
||||||
|
size = AlignSize(size, blockAlign_);
|
||||||
|
|
||||||
|
// Walk thru all pages to find a free block
|
||||||
|
block_t* pFreeBlock = nullptr;
|
||||||
|
auto pCurPage = pages_;
|
||||||
|
while (pCurPage) {
|
||||||
|
auto pCurBlock = pCurPage->pFreeSList;
|
||||||
|
if (pCurBlock) {
|
||||||
|
// The free list is already sorted with biggest block on top,
|
||||||
|
// just check if the last block has enough space.
|
||||||
|
if (pCurBlock->size >= size) {
|
||||||
|
// Find the smallest matching block
|
||||||
|
while (pCurBlock->nextFreeS
|
||||||
|
&& (pCurBlock->nextFreeS->size >= size)) {
|
||||||
|
pCurBlock = pCurBlock->nextFreeS;
|
||||||
|
}
|
||||||
|
// Return the free block
|
||||||
|
pFreeBlock = pCurBlock;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pCurPage = pCurPage->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nullptr == pFreeBlock) {
|
||||||
|
// Allocate a new page for this request
|
||||||
|
pCurPage = this->NewPage(size);
|
||||||
|
if (nullptr == pCurPage)
|
||||||
|
return -1;
|
||||||
|
pFreeBlock = pCurPage->pFreeSList;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove the block from the free lists
|
||||||
|
assert(pFreeBlock->size >= size);
|
||||||
|
pCurPage->RemoveFreeMBlock(pFreeBlock);
|
||||||
|
pCurPage->RemoveFreeSBlock(pFreeBlock);
|
||||||
|
|
||||||
|
// If the free block we have found is larger than what we are looking for,
|
||||||
|
// we may be able to split our free block in two.
|
||||||
|
uint64_t extraBytes = pFreeBlock->size - size;
|
||||||
|
if (extraBytes >= blockAlign_) {
|
||||||
|
// Reduce the free block size to the requested value
|
||||||
|
pFreeBlock->size = size;
|
||||||
|
|
||||||
|
// Allocate a new block to contain the extra buffer
|
||||||
|
auto nextAddr = pFreeBlock->addr + size;
|
||||||
|
auto pNewBlock = new block_t(nextAddr, extraBytes);
|
||||||
|
|
||||||
|
// Add the new block to the free lists
|
||||||
|
pCurPage->InsertFreeMBlock(pNewBlock);
|
||||||
|
pCurPage->InsertFreeSBlock(pNewBlock);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert the free block into the used list
|
||||||
|
pCurPage->InsertUsedBlock(pFreeBlock);
|
||||||
|
|
||||||
|
// Return the free block address
|
||||||
|
*addr = pFreeBlock->addr;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int release(uint64_t addr) {
|
||||||
|
// Walk all pages to find the pointer
|
||||||
|
block_t* pUsedBlock = nullptr;
|
||||||
|
auto pCurPage = pages_;
|
||||||
|
while (pCurPage) {
|
||||||
|
if ((pCurPage->addr < addr)
|
||||||
|
&& ((pCurPage->addr + pCurPage->size) > addr)) {
|
||||||
|
auto pCurBlock = pCurPage->pUsedList;
|
||||||
|
while (pCurBlock) {
|
||||||
|
if (pCurBlock->addr == addr) {
|
||||||
|
pUsedBlock = pCurBlock;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pCurBlock = pCurBlock->nextUsed;
|
||||||
|
}
|
||||||
|
if (pUsedBlock)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pCurPage = pCurPage->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
// found the corresponding block?
|
||||||
|
if (nullptr == pUsedBlock)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Remove the block from the used list
|
||||||
|
pCurPage->RemoveUsedBlock(pUsedBlock);
|
||||||
|
|
||||||
|
// Insert the block into the free M-list.
|
||||||
|
pCurPage->InsertFreeMBlock(pUsedBlock);
|
||||||
|
|
||||||
|
// Check if we can merge adjacent free blocks from the left.
|
||||||
|
if (pUsedBlock->prevFreeM) {
|
||||||
|
// Calculate the previous address
|
||||||
|
auto prevAddr = pUsedBlock->prevFreeM->addr + pUsedBlock->prevFreeM->size;
|
||||||
|
if (pUsedBlock->addr == prevAddr) {
|
||||||
|
auto pMergedBlock = pUsedBlock->prevFreeM;
|
||||||
|
|
||||||
|
// Detach left block from the free S-list
|
||||||
|
pCurPage->RemoveFreeSBlock(pMergedBlock);
|
||||||
|
|
||||||
|
// Merge the blocks to the left
|
||||||
|
pMergedBlock->size += pUsedBlock->size;
|
||||||
|
pMergedBlock->nextFreeM = pUsedBlock->nextFreeM;
|
||||||
|
if (pMergedBlock->nextFreeM) {
|
||||||
|
pMergedBlock->nextFreeM->prevFreeM = pMergedBlock;
|
||||||
|
}
|
||||||
|
pUsedBlock = pMergedBlock;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we can merge adjacent free blocks from the right.
|
||||||
|
if (pUsedBlock->nextFreeM) {
|
||||||
|
// Calculate the next allocation start address
|
||||||
|
auto nextMem = pUsedBlock->addr + pUsedBlock->size;
|
||||||
|
if (pUsedBlock->nextFreeM->addr == nextMem) {
|
||||||
|
auto nextBlock = pUsedBlock->nextFreeM;
|
||||||
|
|
||||||
|
// Detach right block from the free S-list
|
||||||
|
pCurPage->RemoveFreeSBlock(nextBlock);
|
||||||
|
|
||||||
|
// Merge the blocks to the right
|
||||||
|
pUsedBlock->size += nextBlock->size;
|
||||||
|
pUsedBlock->nextFreeM = nextBlock->nextFreeM;
|
||||||
|
if (pUsedBlock->nextFreeM) {
|
||||||
|
pUsedBlock->nextFreeM->prevFreeM = pUsedBlock;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert the block into the free S-list.
|
||||||
|
pCurPage->InsertFreeSBlock(pUsedBlock);
|
||||||
|
|
||||||
|
// Check if we can free empty pages
|
||||||
|
if (nullptr == pCurPage->pUsedList) {
|
||||||
|
// Try to delete the page
|
||||||
|
while (pCurPage && this->DeletePage(pCurPage)) {
|
||||||
|
pCurPage = this->NextEmptyPage();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
struct block_t {
|
||||||
|
block_t* nextFreeS;
|
||||||
|
block_t* prevFreeS;
|
||||||
|
|
||||||
|
block_t* nextFreeM;
|
||||||
|
block_t* prevFreeM;
|
||||||
|
|
||||||
|
block_t* nextUsed;
|
||||||
|
block_t* prevUsed;
|
||||||
|
|
||||||
|
uint64_t addr;
|
||||||
|
uint64_t size;
|
||||||
|
|
||||||
|
block_t(uint64_t addr, uint64_t size)
|
||||||
|
: nextFreeS(nullptr)
|
||||||
|
, prevFreeS(nullptr)
|
||||||
|
, nextFreeM(nullptr)
|
||||||
|
, prevFreeM(nullptr)
|
||||||
|
, nextUsed(nullptr)
|
||||||
|
, prevUsed(nullptr)
|
||||||
|
, addr(addr)
|
||||||
|
, size(size)
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct page_t {
|
||||||
|
page_t* next;
|
||||||
|
|
||||||
|
// List of used blocks
|
||||||
|
block_t* pUsedList;
|
||||||
|
|
||||||
|
// List with blocks sorted by descreasing sizes
|
||||||
|
// Used for block lookup during memory allocation.
|
||||||
|
block_t* pFreeSList;
|
||||||
|
|
||||||
|
// List with blocks sorted by increasing memory addresses
|
||||||
|
// Used for block merging during memory release.
|
||||||
|
block_t* pFreeMList;
|
||||||
|
|
||||||
|
uint64_t addr;
|
||||||
|
uint64_t size;
|
||||||
|
|
||||||
|
page_t(uint64_t addr, uint64_t size) :
|
||||||
|
next(nullptr),
|
||||||
|
pUsedList(nullptr),
|
||||||
|
addr(addr),
|
||||||
|
size(size) {
|
||||||
|
pFreeSList = pFreeMList = new block_t(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void InsertUsedBlock(block_t* pBlock) {
|
||||||
|
pBlock->nextUsed = pUsedList;
|
||||||
|
if (pUsedList) {
|
||||||
|
pUsedList->prevUsed = pBlock;
|
||||||
|
}
|
||||||
|
pUsedList = pBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RemoveUsedBlock(block_t* pBlock) {
|
||||||
|
if (pBlock->prevUsed) {
|
||||||
|
pBlock->prevUsed->nextUsed = pBlock->nextUsed;
|
||||||
|
} else {
|
||||||
|
pUsedList = pBlock->nextUsed;
|
||||||
|
}
|
||||||
|
if (pBlock->nextUsed) {
|
||||||
|
pBlock->nextUsed->prevUsed = pBlock->prevUsed;
|
||||||
|
}
|
||||||
|
pBlock->nextUsed = nullptr;
|
||||||
|
pBlock->prevUsed = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void InsertFreeMBlock(block_t* pBlock) {
|
||||||
|
block_t* pCurBlock = pFreeMList;
|
||||||
|
block_t* prevBlock = nullptr;
|
||||||
|
while (pCurBlock && (pCurBlock->addr < pBlock->addr)) {
|
||||||
|
prevBlock = pCurBlock;
|
||||||
|
pCurBlock = pCurBlock->nextFreeM;
|
||||||
|
}
|
||||||
|
pBlock->nextFreeM = pCurBlock;
|
||||||
|
pBlock->prevFreeM = prevBlock;
|
||||||
|
if (prevBlock) {
|
||||||
|
prevBlock->nextFreeM = pBlock;
|
||||||
|
} else {
|
||||||
|
pFreeMList = pBlock;
|
||||||
|
}
|
||||||
|
if (pCurBlock) {
|
||||||
|
pCurBlock->prevFreeM = pBlock;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RemoveFreeMBlock(block_t* pBlock) {
|
||||||
|
if (pBlock->prevFreeM) {
|
||||||
|
pBlock->prevFreeM->nextFreeM = pBlock->nextFreeM;
|
||||||
|
} else {
|
||||||
|
pFreeMList = pBlock->nextFreeM;
|
||||||
|
}
|
||||||
|
if (pBlock->nextFreeM) {
|
||||||
|
pBlock->nextFreeM->prevFreeM = pBlock->prevFreeM;
|
||||||
|
}
|
||||||
|
pBlock->nextFreeM = nullptr;
|
||||||
|
pBlock->prevFreeM = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void InsertFreeSBlock(block_t* pBlock) {
|
||||||
|
block_t* pCurBlock = this->pFreeSList;
|
||||||
|
block_t* prevBlock = nullptr;
|
||||||
|
while (pCurBlock && (pCurBlock->size > pBlock->size)) {
|
||||||
|
prevBlock = pCurBlock;
|
||||||
|
pCurBlock = pCurBlock->nextFreeS;
|
||||||
|
}
|
||||||
|
pBlock->nextFreeS = pCurBlock;
|
||||||
|
pBlock->prevFreeS = prevBlock;
|
||||||
|
if (prevBlock) {
|
||||||
|
prevBlock->nextFreeS = pBlock;
|
||||||
|
} else {
|
||||||
|
this->pFreeSList = pBlock;
|
||||||
|
}
|
||||||
|
if (pCurBlock) {
|
||||||
|
pCurBlock->prevFreeS = pBlock;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RemoveFreeSBlock(block_t* pBlock) {
|
||||||
|
if (pBlock->prevFreeS) {
|
||||||
|
pBlock->prevFreeS->nextFreeS = pBlock->nextFreeS;
|
||||||
|
} else {
|
||||||
|
pFreeSList = pBlock->nextFreeS;
|
||||||
|
}
|
||||||
|
if (pBlock->nextFreeS) {
|
||||||
|
pBlock->nextFreeS->prevFreeS = pBlock->prevFreeS;
|
||||||
|
}
|
||||||
|
pBlock->nextFreeS = nullptr;
|
||||||
|
pBlock->prevFreeS = nullptr;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
page_t* NewPage(uint64_t size) {
|
||||||
|
// Increase buffer size to include the page and first block size
|
||||||
|
// also add padding to ensure page aligment
|
||||||
|
size = AlignSize(size, pageAlign_);
|
||||||
|
|
||||||
|
// Allocate page memory
|
||||||
|
auto addr = nextAddress_;
|
||||||
|
nextAddress_ += size;
|
||||||
|
|
||||||
|
// Overflow check
|
||||||
|
if (nextAddress_ > maxAddress_)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
// Allocate the page
|
||||||
|
auto pNewPage = new page_t(addr, size);
|
||||||
|
|
||||||
|
// Insert the new page into the list
|
||||||
|
pNewPage->next = pages_;
|
||||||
|
pages_ = pNewPage;
|
||||||
|
|
||||||
|
return pNewPage;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DeletePage(page_t* pPage) {
|
||||||
|
// The page should be empty
|
||||||
|
assert(nullptr == pPage->pUsedList);
|
||||||
|
assert(pPage->pFreeMList && (nullptr == pPage->pFreeMList->nextFreeM));
|
||||||
|
|
||||||
|
// Only delete top-level pages
|
||||||
|
auto nextAddr = pPage->addr + pPage->size;
|
||||||
|
if (nextAddr != nextAddress_)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Remove the page from the list
|
||||||
|
page_t* prevPage = nullptr;
|
||||||
|
auto pCurPage = pages_;
|
||||||
|
while (pCurPage) {
|
||||||
|
if (pCurPage == pPage) {
|
||||||
|
if (prevPage) {
|
||||||
|
prevPage->next = pCurPage->next;
|
||||||
|
} else {
|
||||||
|
pages_ = pCurPage->next;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
prevPage = pCurPage;
|
||||||
|
pCurPage = pCurPage->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update next allocation address
|
||||||
|
nextAddress_ = pPage->addr;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
page_t* NextEmptyPage() {
|
||||||
|
auto pCurPage = pages_;
|
||||||
|
while (pCurPage) {
|
||||||
|
if (nullptr == pCurPage->pUsedList)
|
||||||
|
return pCurPage;
|
||||||
|
pCurPage = pCurPage->next;
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t AlignSize(uint64_t size, uint64_t alignment) {
|
||||||
|
assert(0 == (alignment & (alignment - 1)));
|
||||||
|
return (size + alignment - 1) & ~(alignment - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t nextAddress_;
|
||||||
|
uint64_t maxAddress_;
|
||||||
|
uint32_t pageAlign_;
|
||||||
|
uint32_t blockAlign_;
|
||||||
|
page_t* pages_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace vortex
|
|
@ -22,7 +22,7 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint6
|
||||||
if (NULL == content || 0 == size)
|
if (NULL == content || 0 == size)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
uint32_t buffer_transfer_size = 65536;
|
uint32_t buffer_transfer_size = 65536; // 64 KB
|
||||||
uint64_t kernel_base_addr;
|
uint64_t kernel_base_addr;
|
||||||
err = vx_dev_caps(device, VX_CAPS_KERNEL_BASE_ADDR, &kernel_base_addr);
|
err = vx_dev_caps(device, VX_CAPS_KERNEL_BASE_ADDR, &kernel_base_addr);
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
|
@ -30,7 +30,7 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint6
|
||||||
|
|
||||||
// allocate device buffer
|
// allocate device buffer
|
||||||
vx_buffer_h buffer;
|
vx_buffer_h buffer;
|
||||||
err = vx_alloc_shared_mem(device, buffer_transfer_size, &buffer);
|
err = vx_buf_alloc(device, buffer_transfer_size, &buffer);
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
@ -54,13 +54,13 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint6
|
||||||
|
|
||||||
err = vx_copy_to_dev(buffer, kernel_base_addr + offset, chunk_size, 0);
|
err = vx_copy_to_dev(buffer, kernel_base_addr + offset, chunk_size, 0);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
vx_buf_release(buffer);
|
vx_buf_free(buffer);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
offset += chunk_size;
|
offset += chunk_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
vx_buf_release(buffer);
|
vx_buf_free(buffer);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -149,7 +149,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
vx_buffer_h staging_buf;
|
vx_buffer_h staging_buf;
|
||||||
ret = vx_alloc_shared_mem(device, 64 * sizeof(uint32_t), &staging_buf);
|
ret = vx_buf_alloc(device, 64 * sizeof(uint32_t), &staging_buf);
|
||||||
if (ret != 0)
|
if (ret != 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
@ -158,7 +158,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||||
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
|
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
|
||||||
ret = vx_copy_from_dev(staging_buf, IO_CSR_ADDR + 64 * sizeof(uint32_t) * core_id, 64 * sizeof(uint32_t), 0);
|
ret = vx_copy_from_dev(staging_buf, IO_CSR_ADDR + 64 * sizeof(uint32_t) * core_id, 64 * sizeof(uint32_t), 0);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
vx_buf_release(staging_buf);
|
vx_buf_free(staging_buf);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -336,7 +336,21 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// release allocated resources
|
// release allocated resources
|
||||||
vx_buf_release(staging_buf);
|
vx_buf_free(staging_buf);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deprecated API functions
|
||||||
|
|
||||||
|
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
|
||||||
|
return vx_buf_alloc(hdevice, size, hbuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
||||||
|
return vx_buf_free(hbuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
|
||||||
|
return vx_mem_alloc(hdevice, size, dev_maddr);
|
||||||
}
|
}
|
|
@ -6,8 +6,7 @@ SCRIPT_DIR=../../hw/scripts
|
||||||
|
|
||||||
OPAE_SYN_DIR=../../hw/syn/opae
|
OPAE_SYN_DIR=../../hw/syn/opae
|
||||||
|
|
||||||
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
|
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
|
||||||
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
|
||||||
|
|
||||||
CXXFLAGS += -I. -I../include -I../../hw -I$(OPAE_HOME)/include -I$(OPAE_SYN_DIR)
|
CXXFLAGS += -I. -I../include -I../../hw -I$(OPAE_HOME)/include -I$(OPAE_SYN_DIR)
|
||||||
|
|
||||||
|
@ -39,6 +38,13 @@ PROJECT = libvortex.so
|
||||||
|
|
||||||
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
|
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
|
||||||
|
|
||||||
|
# Debugigng
|
||||||
|
ifdef DEBUG
|
||||||
|
CXXFLAGS += -g -O0
|
||||||
|
else
|
||||||
|
CXXFLAGS += -O2 -DNDEBUG
|
||||||
|
endif
|
||||||
|
|
||||||
# Enable scope analyzer
|
# Enable scope analyzer
|
||||||
ifdef SCOPE
|
ifdef SCOPE
|
||||||
CXXFLAGS += -DSCOPE
|
CXXFLAGS += -DSCOPE
|
||||||
|
|
|
@ -35,16 +35,19 @@ int vx_dev_close(vx_device_h hdevice);
|
||||||
int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value);
|
int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value);
|
||||||
|
|
||||||
// Allocate shared buffer with device
|
// Allocate shared buffer with device
|
||||||
int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer);
|
int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer);
|
||||||
|
|
||||||
|
// release buffer
|
||||||
|
int vx_buf_free(vx_buffer_h hbuffer);
|
||||||
|
|
||||||
// Get host pointer address
|
// Get host pointer address
|
||||||
void* vx_host_ptr(vx_buffer_h hbuffer);
|
void* vx_host_ptr(vx_buffer_h hbuffer);
|
||||||
|
|
||||||
// release buffer
|
|
||||||
int vx_buf_release(vx_buffer_h hbuffer);
|
|
||||||
|
|
||||||
// allocate device memory and return address
|
// allocate device memory and return address
|
||||||
int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr);
|
int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr);
|
||||||
|
|
||||||
|
// release device memory
|
||||||
|
int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr);
|
||||||
|
|
||||||
// Copy bytes from buffer to device local memory
|
// Copy bytes from buffer to device local memory
|
||||||
int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset);
|
int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset);
|
||||||
|
@ -69,6 +72,11 @@ int vx_upload_kernel_file(vx_device_h device, const char* filename);
|
||||||
// dump performance counters
|
// dump performance counters
|
||||||
int vx_dump_perf(vx_device_h device, FILE* stream);
|
int vx_dump_perf(vx_device_h device, FILE* stream);
|
||||||
|
|
||||||
|
//////////////////////////// DEPRECATED FUNCTIONS /////////////////////////////
|
||||||
|
int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr);
|
||||||
|
int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer);
|
||||||
|
int vx_buf_release(vx_buffer_h hbuffer);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
RTLSIM_DIR = ../../sim/rtlsim
|
RTLSIM_DIR = ../../sim/rtlsim
|
||||||
|
|
||||||
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
|
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
|
||||||
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
|
||||||
|
|
||||||
CXXFLAGS += -I../include -I../common -I../../hw -I$(RTLSIM_DIR) -I$(RTLSIM_DIR)/../common
|
CXXFLAGS += -I../include -I../common -I../../hw -I$(RTLSIM_DIR) -I$(RTLSIM_DIR)/../common
|
||||||
|
|
||||||
|
@ -19,6 +18,13 @@ LDFLAGS += -L. -lrtlsim
|
||||||
|
|
||||||
SRCS = vortex.cpp ../common/vx_utils.cpp
|
SRCS = vortex.cpp ../common/vx_utils.cpp
|
||||||
|
|
||||||
|
# Debugigng
|
||||||
|
ifdef DEBUG
|
||||||
|
CXXFLAGS += -g -O0
|
||||||
|
else
|
||||||
|
CXXFLAGS += -O2 -DNDEBUG
|
||||||
|
endif
|
||||||
|
|
||||||
# Enable perf counters
|
# Enable perf counters
|
||||||
ifdef PERF
|
ifdef PERF
|
||||||
CXXFLAGS += -DPERF_ENABLE
|
CXXFLAGS += -DPERF_ENABLE
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
|
||||||
#include <vortex.h>
|
#include <vortex.h>
|
||||||
|
#include <vx_malloc.h>
|
||||||
#include <vx_utils.h>
|
#include <vx_utils.h>
|
||||||
#include <VX_config.h>
|
#include <VX_config.h>
|
||||||
#include <mem.h>
|
#include <mem.h>
|
||||||
|
@ -60,7 +61,11 @@ class vx_device {
|
||||||
public:
|
public:
|
||||||
vx_device()
|
vx_device()
|
||||||
: ram_(RAM_PAGE_SIZE)
|
: ram_(RAM_PAGE_SIZE)
|
||||||
, mem_allocation_(ALLOC_BASE_ADDR)
|
, mem_allocator_(
|
||||||
|
ALLOC_BASE_ADDR,
|
||||||
|
ALLOC_BASE_ADDR + LOCAL_MEM_SIZE,
|
||||||
|
RAM_PAGE_SIZE,
|
||||||
|
CACHE_BLOCK_SIZE)
|
||||||
{
|
{
|
||||||
processor_.attach_ram(&ram_);
|
processor_.attach_ram(&ram_);
|
||||||
}
|
}
|
||||||
|
@ -72,13 +77,11 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) {
|
int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) {
|
||||||
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
|
return mem_allocator_.allocate(size, dev_maddr);
|
||||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
}
|
||||||
if (mem_allocation_ + asize > dev_mem_size)
|
|
||||||
return -1;
|
int free_local_mem(uint64_t dev_maddr) {
|
||||||
*dev_maddr = mem_allocation_;
|
return mem_allocator_.release(dev_maddr);
|
||||||
mem_allocation_ += asize;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
|
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
|
||||||
|
@ -149,7 +152,7 @@ private:
|
||||||
|
|
||||||
RAM ram_;
|
RAM ram_;
|
||||||
Processor processor_;
|
Processor processor_;
|
||||||
uint64_t mem_allocation_;
|
MemoryAllocator mem_allocator_;
|
||||||
std::future<void> future_;
|
std::future<void> future_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -250,7 +253,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
|
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
|
||||||
if (nullptr == hdevice
|
if (nullptr == hdevice
|
||||||
|| nullptr == dev_maddr
|
|| nullptr == dev_maddr
|
||||||
|| 0 >= size)
|
|| 0 >= size)
|
||||||
|
@ -260,8 +263,15 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_ma
|
||||||
return device->alloc_local_mem(size, dev_maddr);
|
return device->alloc_local_mem(size, dev_maddr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr) {
|
||||||
|
if (nullptr == hdevice)
|
||||||
|
return -1;
|
||||||
|
|
||||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
return device->free_local_mem(dev_maddr);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
|
||||||
if (nullptr == hdevice
|
if (nullptr == hdevice
|
||||||
|| 0 >= size
|
|| 0 >= size
|
||||||
|| nullptr == hbuffer)
|
|| nullptr == hbuffer)
|
||||||
|
@ -289,7 +299,7 @@ extern void* vx_host_ptr(vx_buffer_h hbuffer) {
|
||||||
return buffer->data();
|
return buffer->data();
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
extern int vx_buf_free(vx_buffer_h hbuffer) {
|
||||||
if (nullptr == hbuffer)
|
if (nullptr == hbuffer)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
SIMX_DIR = ../../sim/simx
|
SIMX_DIR = ../../sim/simx
|
||||||
|
|
||||||
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
||||||
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
|
||||||
|
|
||||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||||
CXXFLAGS += -I../include -I../common -I../../hw -I$(SIMX_DIR) -I$(SIMX_DIR)/../common
|
CXXFLAGS += -I../include -I../common -I../../hw -I$(SIMX_DIR) -I$(SIMX_DIR)/../common
|
||||||
|
@ -11,7 +10,14 @@ CXXFLAGS += -DDUMP_PERF_STATS
|
||||||
LDFLAGS += -shared -pthread
|
LDFLAGS += -shared -pthread
|
||||||
LDFLAGS += -L. -lsimx
|
LDFLAGS += -L. -lsimx
|
||||||
|
|
||||||
SRCS = vortex.cpp ../common/vx_utils.cpp
|
SRCS = vortex.cpp ../common/vx_utils.cpp
|
||||||
|
|
||||||
|
# Debugigng
|
||||||
|
ifdef DEBUG
|
||||||
|
CXXFLAGS += -g -O0
|
||||||
|
else
|
||||||
|
CXXFLAGS += -O2 -DNDEBUG
|
||||||
|
endif
|
||||||
|
|
||||||
PROJECT = libvortex.so
|
PROJECT = libvortex.so
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
|
|
||||||
#include <vortex.h>
|
#include <vortex.h>
|
||||||
#include <vx_utils.h>
|
#include <vx_utils.h>
|
||||||
|
#include <vx_malloc.h>
|
||||||
|
|
||||||
#include <VX_config.h>
|
#include <VX_config.h>
|
||||||
|
|
||||||
|
@ -66,7 +67,11 @@ public:
|
||||||
: arch_("rv32i", NUM_CORES * NUM_CLUSTERS, NUM_WARPS, NUM_THREADS)
|
: arch_("rv32i", NUM_CORES * NUM_CLUSTERS, NUM_WARPS, NUM_THREADS)
|
||||||
, ram_(RAM_PAGE_SIZE)
|
, ram_(RAM_PAGE_SIZE)
|
||||||
, processor_(arch_)
|
, processor_(arch_)
|
||||||
, mem_allocation_(ALLOC_BASE_ADDR)
|
, mem_allocator_(
|
||||||
|
ALLOC_BASE_ADDR,
|
||||||
|
ALLOC_BASE_ADDR + LOCAL_MEM_SIZE,
|
||||||
|
RAM_PAGE_SIZE,
|
||||||
|
CACHE_BLOCK_SIZE)
|
||||||
{
|
{
|
||||||
// attach memory module
|
// attach memory module
|
||||||
processor_.attach_ram(&ram_);
|
processor_.attach_ram(&ram_);
|
||||||
|
@ -76,16 +81,14 @@ public:
|
||||||
if (future_.valid()) {
|
if (future_.valid()) {
|
||||||
future_.wait();
|
future_.wait();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) {
|
int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) {
|
||||||
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
|
return mem_allocator_.allocate(size, dev_maddr);
|
||||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
}
|
||||||
if (mem_allocation_ + asize > dev_mem_size)
|
|
||||||
return -1;
|
int free_local_mem(uint64_t dev_maddr) {
|
||||||
*dev_maddr = mem_allocation_;
|
return mem_allocator_.release(dev_maddr);
|
||||||
mem_allocation_ += asize;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
|
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
|
||||||
|
@ -151,7 +154,7 @@ private:
|
||||||
ArchDef arch_;
|
ArchDef arch_;
|
||||||
RAM ram_;
|
RAM ram_;
|
||||||
Processor processor_;
|
Processor processor_;
|
||||||
uint64_t mem_allocation_;
|
MemoryAllocator mem_allocator_;
|
||||||
std::future<void> future_;
|
std::future<void> future_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -252,7 +255,7 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
|
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
|
||||||
if (nullptr == hdevice
|
if (nullptr == hdevice
|
||||||
|| nullptr == dev_maddr
|
|| nullptr == dev_maddr
|
||||||
|| 0 >= size)
|
|| 0 >= size)
|
||||||
|
@ -262,7 +265,15 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_ma
|
||||||
return device->alloc_local_mem(size, dev_maddr);
|
return device->alloc_local_mem(size, dev_maddr);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
|
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr) {
|
||||||
|
if (nullptr == hdevice)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
return device->free_local_mem(dev_maddr);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
|
||||||
if (nullptr == hdevice
|
if (nullptr == hdevice
|
||||||
|| 0 >= size
|
|| 0 >= size
|
||||||
|| nullptr == hbuffer)
|
|| nullptr == hbuffer)
|
||||||
|
@ -290,7 +301,7 @@ extern void* vx_host_ptr(vx_buffer_h hbuffer) {
|
||||||
return buffer->data();
|
return buffer->data();
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
extern int vx_buf_free(vx_buffer_h hbuffer) {
|
||||||
if (nullptr == hbuffer)
|
if (nullptr == hbuffer)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
CXXFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
|
||||||
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
|
|
||||||
|
|
||||||
CXXFLAGS += -I../include -I../../runtime -I../../hw
|
CXXFLAGS += -I../include -I../../runtime -I../../hw
|
||||||
|
|
||||||
|
|
|
@ -12,11 +12,15 @@ extern int vx_dev_caps(vx_device_h /*hdevice*/, uint32_t /*caps_id*/, uint64_t*
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, uint64_t /*size*/, uint64_t* /*dev_maddr*/) {
|
extern int vx_mem_alloc(vx_device_h /*hdevice*/, uint64_t /*size*/, uint64_t* /*dev_maddr*/) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, uint64_t /*size*/, vx_buffer_h* /*hbuffer*/) {
|
int vx_mem_free(vx_device_h /*hdevice*/, uint64_t /*dev_maddr*/) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_buf_alloc(vx_device_h /*hdevice*/, uint64_t /*size*/, vx_buffer_h* /*hbuffer*/) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,7 +28,7 @@ extern void* vx_host_ptr(vx_buffer_h /*hbuffer*/) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_buf_release(vx_buffer_h /*hbuffer*/) {
|
extern int vx_buf_free(vx_buffer_h /*hbuffer*/) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,8 +13,6 @@ LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||||
LDFLAGS += -L$(THIRD_PARTY_DIR)/cocogfx -lcocogfx
|
LDFLAGS += -L$(THIRD_PARTY_DIR)/cocogfx -lcocogfx
|
||||||
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator
|
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||||
|
|
||||||
TOP = vx_cache_sim
|
|
||||||
|
|
||||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||||
SRCS += args.cpp cache.cpp memsim.cpp warp.cpp core.cpp decode.cpp execute.cpp exeunit.cpp tex_unit.cpp processor.cpp
|
SRCS += args.cpp cache.cpp memsim.cpp warp.cpp core.cpp decode.cpp execute.cpp exeunit.cpp tex_unit.cpp processor.cpp
|
||||||
|
|
||||||
|
|
|
@ -5,8 +5,8 @@
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t count;
|
uint32_t count;
|
||||||
uint32_t src_ptr;
|
uint32_t src_addr;
|
||||||
uint32_t dst_ptr;
|
uint32_t dst_addr;
|
||||||
} kernel_arg_t;
|
} kernel_arg_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -5,8 +5,8 @@
|
||||||
void main() {
|
void main() {
|
||||||
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||||
uint32_t count = arg->count;
|
uint32_t count = arg->count;
|
||||||
int32_t* src_ptr = (int32_t*)arg->src_ptr;
|
int32_t* src_ptr = (int32_t*)arg->src_addr;
|
||||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||||
|
|
||||||
uint32_t offset = vx_core_id() * count;
|
uint32_t offset = vx_core_id() * count;
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@ uint32_t count = 0;
|
||||||
|
|
||||||
vx_device_h device = nullptr;
|
vx_device_h device = nullptr;
|
||||||
vx_buffer_h staging_buf = nullptr;
|
vx_buffer_h staging_buf = nullptr;
|
||||||
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Test." << std::endl;
|
std::cout << "Vortex Test." << std::endl;
|
||||||
|
@ -57,9 +58,11 @@ static void parse_args(int argc, char **argv) {
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (staging_buf) {
|
if (staging_buf) {
|
||||||
vx_buf_release(staging_buf);
|
vx_buf_free(staging_buf);
|
||||||
}
|
}
|
||||||
if (device) {
|
if (device) {
|
||||||
|
vx_mem_free(device, kernel_arg.src_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.dst_addr);
|
||||||
vx_dev_close(device);
|
vx_dev_close(device);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -152,7 +155,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer" << std::endl;
|
std::cout << "upload source buffer" << std::endl;
|
||||||
auto t0 = std::chrono::high_resolution_clock::now();
|
auto t0 = std::chrono::high_resolution_clock::now();
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, buf_size, 0));
|
||||||
auto t1 = std::chrono::high_resolution_clock::now();
|
auto t1 = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
// clear destination buffer
|
// clear destination buffer
|
||||||
|
@ -163,7 +166,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "clear destination buffer" << std::endl;
|
std::cout << "clear destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// start device
|
// start device
|
||||||
std::cout << "start execution" << std::endl;
|
std::cout << "start execution" << std::endl;
|
||||||
|
@ -175,7 +178,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
||||||
// read destination buffer from local memory
|
// read destination buffer from local memory
|
||||||
std::cout << "read destination buffer from local memory" << std::endl;
|
std::cout << "read destination buffer from local memory" << std::endl;
|
||||||
auto t4 = std::chrono::high_resolution_clock::now();
|
auto t4 = std::chrono::high_resolution_clock::now();
|
||||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
auto t5 = std::chrono::high_resolution_clock::now();
|
auto t5 = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
|
||||||
|
@ -215,8 +218,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
size_t value;
|
size_t value;
|
||||||
kernel_arg_t kernel_arg;
|
|
||||||
|
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
|
||||||
|
@ -238,25 +240,25 @@ int main(int argc, char *argv[]) {
|
||||||
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
|
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
|
||||||
|
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.src_ptr = value;
|
kernel_arg.src_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.dst_ptr = value;
|
kernel_arg.dst_addr = value;
|
||||||
|
|
||||||
kernel_arg.count = num_points;
|
kernel_arg.count = num_points;
|
||||||
|
|
||||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||||
|
|
||||||
// allocate shared memory
|
// allocate shared memory
|
||||||
std::cout << "allocate shared memory" << std::endl;
|
std::cout << "allocate shared memory" << std::endl;
|
||||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||||
|
|
||||||
// run tests
|
// run tests
|
||||||
if (0 == test || -1 == test) {
|
if (0 == test || -1 == test) {
|
||||||
std::cout << "run memcopy test" << std::endl;
|
std::cout << "run memcopy test" << std::endl;
|
||||||
RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks));
|
RT_CHECK(run_memcopy_test(kernel_arg.src_addr, 0x0badf00d40ff40ff, num_blocks));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (1 == test || -1 == test) {
|
if (1 == test || -1 == test) {
|
||||||
|
|
|
@ -6,9 +6,9 @@
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t num_tasks;
|
uint32_t num_tasks;
|
||||||
uint32_t task_size;
|
uint32_t task_size;
|
||||||
uint32_t src0_ptr;
|
uint32_t src0_addr;
|
||||||
uint32_t src1_ptr;
|
uint32_t src1_addr;
|
||||||
uint32_t dst_ptr;
|
uint32_t dst_addr;
|
||||||
} kernel_arg_t;
|
} kernel_arg_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -5,9 +5,9 @@
|
||||||
|
|
||||||
void kernel_body(int task_id, kernel_arg_t* arg) {
|
void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||||
|
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ uint32_t count = 0;
|
||||||
|
|
||||||
vx_device_h device = nullptr;
|
vx_device_h device = nullptr;
|
||||||
vx_buffer_h staging_buf = nullptr;
|
vx_buffer_h staging_buf = nullptr;
|
||||||
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Test." << std::endl;
|
std::cout << "Vortex Test." << std::endl;
|
||||||
|
@ -51,9 +52,12 @@ static void parse_args(int argc, char **argv) {
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (staging_buf) {
|
if (staging_buf) {
|
||||||
vx_buf_release(staging_buf);
|
vx_buf_free(staging_buf);
|
||||||
}
|
}
|
||||||
if (device) {
|
if (device) {
|
||||||
|
vx_mem_free(device, kernel_arg.src0_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.src1_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.dst_addr);
|
||||||
vx_dev_close(device);
|
vx_dev_close(device);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -71,7 +75,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
|
|
||||||
// download destination buffer
|
// download destination buffer
|
||||||
std::cout << "download destination buffer" << std::endl;
|
std::cout << "download destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// verify result
|
// verify result
|
||||||
std::cout << "verify result" << std::endl;
|
std::cout << "verify result" << std::endl;
|
||||||
|
@ -99,7 +103,6 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
size_t value;
|
size_t value;
|
||||||
kernel_arg_t kernel_arg;
|
|
||||||
|
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
@ -131,24 +134,24 @@ int main(int argc, char *argv[]) {
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
std::cout << "allocate device memory" << std::endl;
|
std::cout << "allocate device memory" << std::endl;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.src0_ptr = value;
|
kernel_arg.src0_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.src1_ptr = value;
|
kernel_arg.src1_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.dst_ptr = value;
|
kernel_arg.dst_addr = value;
|
||||||
|
|
||||||
kernel_arg.num_tasks = num_tasks;
|
kernel_arg.num_tasks = num_tasks;
|
||||||
kernel_arg.task_size = count;
|
kernel_arg.task_size = count;
|
||||||
|
|
||||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
|
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::endl;
|
||||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
|
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::endl;
|
||||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||||
|
|
||||||
// allocate shared memory
|
// allocate shared memory
|
||||||
std::cout << "allocate shared memory" << std::endl;
|
std::cout << "allocate shared memory" << std::endl;
|
||||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||||
|
|
||||||
// upload kernel argument
|
// upload kernel argument
|
||||||
std::cout << "upload kernel argument" << std::endl;
|
std::cout << "upload kernel argument" << std::endl;
|
||||||
|
@ -166,7 +169,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer0" << std::endl;
|
std::cout << "upload source buffer0" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_addr, buf_size, 0));
|
||||||
|
|
||||||
// upload source buffer1
|
// upload source buffer1
|
||||||
{
|
{
|
||||||
|
@ -176,7 +179,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer1" << std::endl;
|
std::cout << "upload source buffer1" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_addr, buf_size, 0));
|
||||||
|
|
||||||
// clear destination buffer
|
// clear destination buffer
|
||||||
{
|
{
|
||||||
|
@ -186,7 +189,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "clear destination buffer" << std::endl;
|
std::cout << "clear destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// run tests
|
// run tests
|
||||||
std::cout << "run tests" << std::endl;
|
std::cout << "run tests" << std::endl;
|
||||||
|
|
|
@ -5,8 +5,8 @@
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t num_points;
|
uint32_t num_points;
|
||||||
uint32_t src_ptr;
|
uint32_t src_addr;
|
||||||
uint32_t dst_ptr;
|
uint32_t dst_addr;
|
||||||
} kernel_arg_t;
|
} kernel_arg_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -6,8 +6,8 @@
|
||||||
// Parallel Selection sort
|
// Parallel Selection sort
|
||||||
|
|
||||||
void kernel_body(int task_id, kernel_arg_t* arg) {
|
void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||||
int32_t* src_ptr = (int32_t*)arg->src_ptr;
|
int32_t* src_ptr = (int32_t*)arg->src_addr;
|
||||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||||
|
|
||||||
int value = src_ptr[task_id];
|
int value = src_ptr[task_id];
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ std::vector<int> ref_data;
|
||||||
|
|
||||||
vx_device_h device = nullptr;
|
vx_device_h device = nullptr;
|
||||||
vx_buffer_h staging_buf = nullptr;
|
vx_buffer_h staging_buf = nullptr;
|
||||||
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Test." << std::endl;
|
std::cout << "Vortex Test." << std::endl;
|
||||||
|
@ -55,9 +56,11 @@ static void parse_args(int argc, char **argv) {
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (staging_buf) {
|
if (staging_buf) {
|
||||||
vx_buf_release(staging_buf);
|
vx_buf_free(staging_buf);
|
||||||
}
|
}
|
||||||
if (device) {
|
if (device) {
|
||||||
|
vx_mem_free(device, kernel_arg.src_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.dst_addr);
|
||||||
vx_dev_close(device);
|
vx_dev_close(device);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -125,7 +128,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
|
|
||||||
// download destination buffer
|
// download destination buffer
|
||||||
std::cout << "download destination buffer" << std::endl;
|
std::cout << "download destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// verify result
|
// verify result
|
||||||
std::cout << "verify result" << std::endl;
|
std::cout << "verify result" << std::endl;
|
||||||
|
@ -153,7 +156,6 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
size_t value;
|
size_t value;
|
||||||
kernel_arg_t kernel_arg;
|
|
||||||
|
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
@ -189,22 +191,22 @@ int main(int argc, char *argv[]) {
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
std::cout << "allocate device memory" << std::endl;
|
std::cout << "allocate device memory" << std::endl;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, src_buf_size, &value));
|
||||||
kernel_arg.src_ptr = value;
|
kernel_arg.src_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value));
|
||||||
kernel_arg.dst_ptr = value;
|
kernel_arg.dst_addr = value;
|
||||||
|
|
||||||
kernel_arg.num_points = num_points;
|
kernel_arg.num_points = num_points;
|
||||||
|
|
||||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||||
|
|
||||||
// allocate shared memory
|
// allocate shared memory
|
||||||
std::cout << "allocate shared memory" << std::endl;
|
std::cout << "allocate shared memory" << std::endl;
|
||||||
uint32_t staging_buf_size = std::max<uint32_t>(src_buf_size,
|
uint32_t staging_buf_size = std::max<uint32_t>(src_buf_size,
|
||||||
std::max<uint32_t>(dst_buf_size,
|
std::max<uint32_t>(dst_buf_size,
|
||||||
sizeof(kernel_arg_t)));
|
sizeof(kernel_arg_t)));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf));
|
RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf));
|
||||||
|
|
||||||
// upload kernel argument
|
// upload kernel argument
|
||||||
std::cout << "upload kernel argument" << std::endl;
|
std::cout << "upload kernel argument" << std::endl;
|
||||||
|
@ -222,7 +224,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer" << std::endl;
|
std::cout << "upload source buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, src_buf_size, 0));
|
||||||
|
|
||||||
// clear destination buffer
|
// clear destination buffer
|
||||||
{
|
{
|
||||||
|
@ -232,7 +234,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "clear destination buffer" << std::endl;
|
std::cout << "clear destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0));
|
||||||
|
|
||||||
// run tests
|
// run tests
|
||||||
std::cout << "run tests" << std::endl;
|
std::cout << "run tests" << std::endl;
|
||||||
|
|
|
@ -7,9 +7,9 @@ typedef struct {
|
||||||
uint32_t testid;
|
uint32_t testid;
|
||||||
uint32_t num_tasks;
|
uint32_t num_tasks;
|
||||||
uint32_t task_size;
|
uint32_t task_size;
|
||||||
uint32_t src0_ptr;
|
uint32_t src0_addr;
|
||||||
uint32_t src1_ptr;
|
uint32_t src1_addr;
|
||||||
uint32_t dst_ptr;
|
uint32_t dst_addr;
|
||||||
} kernel_arg_t;
|
} kernel_arg_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -13,9 +13,9 @@ inline float __ieee754_sqrtf (float x) {
|
||||||
|
|
||||||
void kernel_iadd(int task_id, kernel_arg_t* arg) {
|
void kernel_iadd(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -28,9 +28,9 @@ void kernel_iadd(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_imul(int task_id, kernel_arg_t* arg) {
|
void kernel_imul(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -43,9 +43,9 @@ void kernel_imul(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_idiv(int task_id, kernel_arg_t* arg) {
|
void kernel_idiv(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -58,9 +58,9 @@ void kernel_idiv(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_idiv_mul(int task_id, kernel_arg_t* arg) {
|
void kernel_idiv_mul(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -75,9 +75,9 @@ void kernel_idiv_mul(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_fadd(int task_id, kernel_arg_t* arg) {
|
void kernel_fadd(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
float* src0_ptr = (float*)arg->src0_ptr;
|
float* src0_ptr = (float*)arg->src0_addr;
|
||||||
float* src1_ptr = (float*)arg->src1_ptr;
|
float* src1_ptr = (float*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -90,9 +90,9 @@ void kernel_fadd(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_fsub(int task_id, kernel_arg_t* arg) {
|
void kernel_fsub(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
float* src0_ptr = (float*)arg->src0_ptr;
|
float* src0_ptr = (float*)arg->src0_addr;
|
||||||
float* src1_ptr = (float*)arg->src1_ptr;
|
float* src1_ptr = (float*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -105,9 +105,9 @@ void kernel_fsub(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_fmul(int task_id, kernel_arg_t* arg) {
|
void kernel_fmul(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
float* src0_ptr = (float*)arg->src0_ptr;
|
float* src0_ptr = (float*)arg->src0_addr;
|
||||||
float* src1_ptr = (float*)arg->src1_ptr;
|
float* src1_ptr = (float*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -120,9 +120,9 @@ void kernel_fmul(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_fmadd(int task_id, kernel_arg_t* arg) {
|
void kernel_fmadd(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
float* src0_ptr = (float*)arg->src0_ptr;
|
float* src0_ptr = (float*)arg->src0_addr;
|
||||||
float* src1_ptr = (float*)arg->src1_ptr;
|
float* src1_ptr = (float*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -135,9 +135,9 @@ void kernel_fmadd(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_fmsub(int task_id, kernel_arg_t* arg) {
|
void kernel_fmsub(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
float* src0_ptr = (float*)arg->src0_ptr;
|
float* src0_ptr = (float*)arg->src0_addr;
|
||||||
float* src1_ptr = (float*)arg->src1_ptr;
|
float* src1_ptr = (float*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -150,9 +150,9 @@ void kernel_fmsub(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_fnmadd(int task_id, kernel_arg_t* arg) {
|
void kernel_fnmadd(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
float* src0_ptr = (float*)arg->src0_ptr;
|
float* src0_ptr = (float*)arg->src0_addr;
|
||||||
float* src1_ptr = (float*)arg->src1_ptr;
|
float* src1_ptr = (float*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -165,9 +165,9 @@ void kernel_fnmadd(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_fnmsub(int task_id, kernel_arg_t* arg) {
|
void kernel_fnmsub(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
float* src0_ptr = (float*)arg->src0_ptr;
|
float* src0_ptr = (float*)arg->src0_addr;
|
||||||
float* src1_ptr = (float*)arg->src1_ptr;
|
float* src1_ptr = (float*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -180,9 +180,9 @@ void kernel_fnmsub(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_fnmadd_madd(int task_id, kernel_arg_t* arg) {
|
void kernel_fnmadd_madd(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
float* src0_ptr = (float*)arg->src0_ptr;
|
float* src0_ptr = (float*)arg->src0_addr;
|
||||||
float* src1_ptr = (float*)arg->src1_ptr;
|
float* src1_ptr = (float*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -197,9 +197,9 @@ void kernel_fnmadd_madd(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_fdiv(int task_id, kernel_arg_t* arg) {
|
void kernel_fdiv(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
float* src0_ptr = (float*)arg->src0_ptr;
|
float* src0_ptr = (float*)arg->src0_addr;
|
||||||
float* src1_ptr = (float*)arg->src1_ptr;
|
float* src1_ptr = (float*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -212,9 +212,9 @@ void kernel_fdiv(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_fdiv2(int task_id, kernel_arg_t* arg) {
|
void kernel_fdiv2(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
float* src0_ptr = (float*)arg->src0_ptr;
|
float* src0_ptr = (float*)arg->src0_addr;
|
||||||
float* src1_ptr = (float*)arg->src1_ptr;
|
float* src1_ptr = (float*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -229,9 +229,9 @@ void kernel_fdiv2(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_fsqrt(int task_id, kernel_arg_t* arg) {
|
void kernel_fsqrt(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
float* src0_ptr = (float*)arg->src0_ptr;
|
float* src0_ptr = (float*)arg->src0_addr;
|
||||||
float* src1_ptr = (float*)arg->src1_ptr;
|
float* src1_ptr = (float*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -244,9 +244,9 @@ void kernel_fsqrt(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_ftoi(int task_id, kernel_arg_t* arg) {
|
void kernel_ftoi(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
float* src0_ptr = (float*)arg->src0_ptr;
|
float* src0_ptr = (float*)arg->src0_addr;
|
||||||
float* src1_ptr = (float*)arg->src1_ptr;
|
float* src1_ptr = (float*)arg->src1_addr;
|
||||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -260,9 +260,9 @@ void kernel_ftoi(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_ftou(int task_id, kernel_arg_t* arg) {
|
void kernel_ftou(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
float* src0_ptr = (float*)arg->src0_ptr;
|
float* src0_ptr = (float*)arg->src0_addr;
|
||||||
float* src1_ptr = (float*)arg->src1_ptr;
|
float* src1_ptr = (float*)arg->src1_addr;
|
||||||
uint32_t* dst_ptr = (uint32_t*)arg->dst_ptr;
|
uint32_t* dst_ptr = (uint32_t*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -276,9 +276,9 @@ void kernel_ftou(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_itof(int task_id, kernel_arg_t* arg) {
|
void kernel_itof(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
@ -292,9 +292,9 @@ void kernel_itof(int task_id, kernel_arg_t* arg) {
|
||||||
|
|
||||||
void kernel_utof(int task_id, kernel_arg_t* arg) {
|
void kernel_utof(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
|
|
@ -87,6 +87,7 @@ vx_buffer_h arg_buf = nullptr;
|
||||||
vx_buffer_h src1_buf = nullptr;
|
vx_buffer_h src1_buf = nullptr;
|
||||||
vx_buffer_h src2_buf = nullptr;
|
vx_buffer_h src2_buf = nullptr;
|
||||||
vx_buffer_h dst_buf = nullptr;
|
vx_buffer_h dst_buf = nullptr;
|
||||||
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Test." << std::endl;
|
std::cout << "Vortex Test." << std::endl;
|
||||||
|
@ -130,26 +131,28 @@ static void parse_args(int argc, char **argv) {
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (arg_buf) {
|
if (arg_buf) {
|
||||||
vx_buf_release(arg_buf);
|
vx_buf_free(arg_buf);
|
||||||
}
|
}
|
||||||
if (src1_buf) {
|
if (src1_buf) {
|
||||||
vx_buf_release(src1_buf);
|
vx_buf_free(src1_buf);
|
||||||
}
|
}
|
||||||
if (src2_buf) {
|
if (src2_buf) {
|
||||||
vx_buf_release(src2_buf);
|
vx_buf_free(src2_buf);
|
||||||
}
|
}
|
||||||
if (dst_buf) {
|
if (dst_buf) {
|
||||||
vx_buf_release(dst_buf);
|
vx_buf_free(dst_buf);
|
||||||
}
|
}
|
||||||
if (device) {
|
if (device) {
|
||||||
|
vx_mem_free(device, kernel_arg.src0_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.src1_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.dst_addr);
|
||||||
vx_dev_close(device);
|
vx_dev_close(device);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
int exitcode = 0;
|
int exitcode = 0;
|
||||||
size_t value;
|
size_t value;
|
||||||
kernel_arg_t kernel_arg;
|
|
||||||
|
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
@ -187,26 +190,26 @@ int main(int argc, char *argv[]) {
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
std::cout << "allocate device memory" << std::endl;
|
std::cout << "allocate device memory" << std::endl;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.src0_ptr = value;
|
kernel_arg.src0_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.src1_ptr = value;
|
kernel_arg.src1_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.dst_ptr = value;
|
kernel_arg.dst_addr = value;
|
||||||
|
|
||||||
kernel_arg.num_tasks = num_tasks;
|
kernel_arg.num_tasks = num_tasks;
|
||||||
kernel_arg.task_size = count;
|
kernel_arg.task_size = count;
|
||||||
|
|
||||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::dec << std::endl;
|
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::dec << std::endl;
|
||||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::dec << std::endl;
|
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::dec << std::endl;
|
||||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::dec << std::endl;
|
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::dec << std::endl;
|
||||||
|
|
||||||
// allocate shared memory
|
// allocate shared memory
|
||||||
std::cout << "allocate shared memory" << std::endl;
|
std::cout << "allocate shared memory" << std::endl;
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, sizeof(kernel_arg_t), &arg_buf));
|
RT_CHECK(vx_buf_alloc(device, sizeof(kernel_arg_t), &arg_buf));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src1_buf));
|
RT_CHECK(vx_buf_alloc(device, buf_size, &src1_buf));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src2_buf));
|
RT_CHECK(vx_buf_alloc(device, buf_size, &src2_buf));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &dst_buf));
|
RT_CHECK(vx_buf_alloc(device, buf_size, &dst_buf));
|
||||||
|
|
||||||
for (int t = testid_s; t <= testid_e; ++t) {
|
for (int t = testid_s; t <= testid_e; ++t) {
|
||||||
auto name = testMngr.get_name(t);
|
auto name = testMngr.get_name(t);
|
||||||
|
@ -226,18 +229,18 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
// upload source buffer0
|
// upload source buffer0
|
||||||
std::cout << "upload source buffer0" << std::endl;
|
std::cout << "upload source buffer0" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_addr, buf_size, 0));
|
||||||
|
|
||||||
// upload source buffer1
|
// upload source buffer1
|
||||||
std::cout << "upload source buffer1" << std::endl;
|
std::cout << "upload source buffer1" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_addr, buf_size, 0));
|
||||||
|
|
||||||
// clear destination buffer
|
// clear destination buffer
|
||||||
std::cout << "clear destination buffer" << std::endl;
|
std::cout << "clear destination buffer" << std::endl;
|
||||||
for (int i = 0; i < num_points; ++i) {
|
for (int i = 0; i < num_points; ++i) {
|
||||||
((uint32_t*)vx_host_ptr(dst_buf))[i] = 0xdeadbeef;
|
((uint32_t*)vx_host_ptr(dst_buf))[i] = 0xdeadbeef;
|
||||||
}
|
}
|
||||||
RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// start device
|
// start device
|
||||||
std::cout << "start device" << std::endl;
|
std::cout << "start device" << std::endl;
|
||||||
|
@ -249,7 +252,7 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
// download destination buffer
|
// download destination buffer
|
||||||
std::cout << "download destination buffer" << std::endl;
|
std::cout << "download destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// verify destination
|
// verify destination
|
||||||
std::cout << "verify test result" << std::endl;
|
std::cout << "verify test result" << std::endl;
|
||||||
|
|
|
@ -6,9 +6,9 @@
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t num_tasks;
|
uint32_t num_tasks;
|
||||||
uint32_t task_size;
|
uint32_t task_size;
|
||||||
uint32_t src0_ptr;
|
uint32_t src0_addr;
|
||||||
uint32_t src1_ptr;
|
uint32_t src1_addr;
|
||||||
uint32_t dst_ptr;
|
uint32_t dst_addr;
|
||||||
} kernel_arg_t;
|
} kernel_arg_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -5,9 +5,9 @@
|
||||||
|
|
||||||
void kernel_body(int task_id, kernel_arg_t* arg) {
|
void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t count = arg->task_size;
|
uint32_t count = arg->task_size;
|
||||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||||
|
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ uint32_t count = 0;
|
||||||
|
|
||||||
vx_device_h device = nullptr;
|
vx_device_h device = nullptr;
|
||||||
vx_buffer_h staging_buf = nullptr;
|
vx_buffer_h staging_buf = nullptr;
|
||||||
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Test." << std::endl;
|
std::cout << "Vortex Test." << std::endl;
|
||||||
|
@ -51,9 +52,12 @@ static void parse_args(int argc, char **argv) {
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (staging_buf) {
|
if (staging_buf) {
|
||||||
vx_buf_release(staging_buf);
|
vx_buf_free(staging_buf);
|
||||||
}
|
}
|
||||||
if (device) {
|
if (device) {
|
||||||
|
vx_mem_free(device, kernel_arg.src0_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.src1_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.dst_addr);
|
||||||
vx_dev_close(device);
|
vx_dev_close(device);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -71,7 +75,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
|
|
||||||
// download destination buffer
|
// download destination buffer
|
||||||
std::cout << "download destination buffer" << std::endl;
|
std::cout << "download destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// verify result
|
// verify result
|
||||||
std::cout << "verify result" << std::endl;
|
std::cout << "verify result" << std::endl;
|
||||||
|
@ -98,8 +102,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
size_t value;
|
size_t value;
|
||||||
kernel_arg_t kernel_arg;
|
|
||||||
|
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
@ -131,24 +134,24 @@ int main(int argc, char *argv[]) {
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
std::cout << "allocate device memory" << std::endl;
|
std::cout << "allocate device memory" << std::endl;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.src0_ptr = value;
|
kernel_arg.src0_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.src1_ptr = value;
|
kernel_arg.src1_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.dst_ptr = value;
|
kernel_arg.dst_addr = value;
|
||||||
|
|
||||||
kernel_arg.num_tasks = num_tasks;
|
kernel_arg.num_tasks = num_tasks;
|
||||||
kernel_arg.task_size = count;
|
kernel_arg.task_size = count;
|
||||||
|
|
||||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
|
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::endl;
|
||||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
|
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::endl;
|
||||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||||
|
|
||||||
// allocate shared memory
|
// allocate shared memory
|
||||||
std::cout << "allocate shared memory" << std::endl;
|
std::cout << "allocate shared memory" << std::endl;
|
||||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||||
|
|
||||||
// upload kernel argument
|
// upload kernel argument
|
||||||
std::cout << "upload kernel argument" << std::endl;
|
std::cout << "upload kernel argument" << std::endl;
|
||||||
|
@ -166,7 +169,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer0" << std::endl;
|
std::cout << "upload source buffer0" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_addr, buf_size, 0));
|
||||||
|
|
||||||
// upload source buffer1
|
// upload source buffer1
|
||||||
{
|
{
|
||||||
|
@ -176,7 +179,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer1" << std::endl;
|
std::cout << "upload source buffer1" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_addr, buf_size, 0));
|
||||||
|
|
||||||
// clear destination buffer
|
// clear destination buffer
|
||||||
{
|
{
|
||||||
|
@ -186,7 +189,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "clear destination buffer" << std::endl;
|
std::cout << "clear destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// run tests
|
// run tests
|
||||||
std::cout << "run tests" << std::endl;
|
std::cout << "run tests" << std::endl;
|
||||||
|
|
|
@ -5,8 +5,8 @@
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t num_points;
|
uint32_t num_points;
|
||||||
uint32_t src_ptr;
|
uint32_t src_addr;
|
||||||
uint32_t dst_ptr;
|
uint32_t dst_addr;
|
||||||
} kernel_arg_t;
|
} kernel_arg_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -4,8 +4,8 @@
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
void kernel_body(int task_id, kernel_arg_t* arg) {
|
void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t* src_ptr = (uint32_t*)arg->src_ptr;
|
uint32_t* src_ptr = (uint32_t*)arg->src_addr;
|
||||||
uint32_t* dst_ptr = (uint32_t*)arg->dst_ptr;
|
uint32_t* dst_ptr = (uint32_t*)arg->dst_addr;
|
||||||
|
|
||||||
int32_t* addr_ptr = (int32_t*)(src_ptr[task_id]);
|
int32_t* addr_ptr = (int32_t*)(src_ptr[task_id]);
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@ std::vector<int32_t> ref_data;
|
||||||
|
|
||||||
vx_device_h device = nullptr;
|
vx_device_h device = nullptr;
|
||||||
vx_buffer_h staging_buf = nullptr;
|
vx_buffer_h staging_buf = nullptr;
|
||||||
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Test." << std::endl;
|
std::cout << "Vortex Test." << std::endl;
|
||||||
|
@ -60,9 +61,11 @@ static void parse_args(int argc, char **argv) {
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (staging_buf) {
|
if (staging_buf) {
|
||||||
vx_buf_release(staging_buf);
|
vx_buf_free(staging_buf);
|
||||||
}
|
}
|
||||||
if (device) {
|
if (device) {
|
||||||
|
vx_mem_free(device, kernel_arg.src_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.dst_addr);
|
||||||
vx_dev_close(device);
|
vx_dev_close(device);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -105,7 +108,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
|
|
||||||
// download destination buffer
|
// download destination buffer
|
||||||
std::cout << "download destination buffer" << std::endl;
|
std::cout << "download destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// verify result
|
// verify result
|
||||||
std::cout << "verify result" << std::endl;
|
std::cout << "verify result" << std::endl;
|
||||||
|
@ -132,8 +135,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
size_t value;
|
size_t value;
|
||||||
kernel_arg_t kernel_arg;
|
|
||||||
|
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
@ -150,7 +152,7 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
uint32_t num_points = count;
|
uint32_t num_points = count;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, NUM_ADDRS * sizeof(uint32_t), &usr_test_mem));
|
RT_CHECK(vx_mem_alloc(device, NUM_ADDRS * sizeof(uint32_t), &usr_test_mem));
|
||||||
|
|
||||||
// generate input data
|
// generate input data
|
||||||
gen_input_data(num_points);
|
gen_input_data(num_points);
|
||||||
|
@ -171,15 +173,15 @@ int main(int argc, char *argv[]) {
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
std::cout << "allocate device memory" << std::endl;
|
std::cout << "allocate device memory" << std::endl;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, src_buf_size, &value));
|
||||||
kernel_arg.src_ptr = value;
|
kernel_arg.src_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value));
|
||||||
kernel_arg.dst_ptr = value;
|
kernel_arg.dst_addr = value;
|
||||||
|
|
||||||
kernel_arg.num_points = num_points;
|
kernel_arg.num_points = num_points;
|
||||||
|
|
||||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||||
|
|
||||||
// allocate shared memory
|
// allocate shared memory
|
||||||
std::cout << "allocate shared memory" << std::endl;
|
std::cout << "allocate shared memory" << std::endl;
|
||||||
|
@ -187,7 +189,7 @@ int main(int argc, char *argv[]) {
|
||||||
std::max<uint32_t>(src_buf_size,
|
std::max<uint32_t>(src_buf_size,
|
||||||
std::max<uint32_t>(dst_buf_size,
|
std::max<uint32_t>(dst_buf_size,
|
||||||
sizeof(kernel_arg_t))));
|
sizeof(kernel_arg_t))));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf));
|
RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf));
|
||||||
|
|
||||||
// upload kernel argument
|
// upload kernel argument
|
||||||
std::cout << "upload kernel argument" << std::endl;
|
std::cout << "upload kernel argument" << std::endl;
|
||||||
|
@ -215,7 +217,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer" << std::endl;
|
std::cout << "upload source buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, src_buf_size, 0));
|
||||||
|
|
||||||
// clear destination buffer
|
// clear destination buffer
|
||||||
{
|
{
|
||||||
|
@ -225,7 +227,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "clear destination buffer" << std::endl;
|
std::cout << "clear destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0));
|
||||||
|
|
||||||
// run tests
|
// run tests
|
||||||
std::cout << "run tests" << std::endl;
|
std::cout << "run tests" << std::endl;
|
||||||
|
|
|
@ -9,9 +9,9 @@ typedef struct {
|
||||||
uint32_t num_tasks;
|
uint32_t num_tasks;
|
||||||
uint32_t size;
|
uint32_t size;
|
||||||
uint32_t stride;
|
uint32_t stride;
|
||||||
uint32_t addr_ptr;
|
uint32_t src0_addr;
|
||||||
uint32_t src_ptr;
|
uint32_t src1_addr;
|
||||||
uint32_t dst_ptr;
|
uint32_t dst_addr;
|
||||||
} kernel_arg_t;
|
} kernel_arg_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -5,9 +5,9 @@
|
||||||
|
|
||||||
void kernel_body(int task_id, kernel_arg_t* arg) {
|
void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t stride = arg->stride;
|
uint32_t stride = arg->stride;
|
||||||
uint32_t* addr_ptr = (uint32_t*)arg->addr_ptr;
|
uint32_t* addr_ptr = (uint32_t*)arg->src0_addr;
|
||||||
float* src_ptr = (float*)arg->src_ptr;
|
float* src_ptr = (float*)arg->src1_addr;
|
||||||
float* dst_ptr = (float*)arg->dst_ptr;
|
float* dst_ptr = (float*)arg->dst_addr;
|
||||||
|
|
||||||
uint32_t offset = task_id * stride;
|
uint32_t offset = task_id * stride;
|
||||||
|
|
||||||
|
|
|
@ -73,6 +73,7 @@ std::vector<uint32_t> addr_table;
|
||||||
|
|
||||||
vx_device_h device = nullptr;
|
vx_device_h device = nullptr;
|
||||||
vx_buffer_h staging_buf = nullptr;
|
vx_buffer_h staging_buf = nullptr;
|
||||||
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Test." << std::endl;
|
std::cout << "Vortex Test." << std::endl;
|
||||||
|
@ -103,9 +104,12 @@ static void parse_args(int argc, char **argv) {
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (staging_buf) {
|
if (staging_buf) {
|
||||||
vx_buf_release(staging_buf);
|
vx_buf_free(staging_buf);
|
||||||
}
|
}
|
||||||
if (device) {
|
if (device) {
|
||||||
|
vx_mem_free(device, kernel_arg.src0_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.src1_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.dst_addr);
|
||||||
vx_dev_close(device);
|
vx_dev_close(device);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -140,7 +144,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
|
|
||||||
// download destination buffer
|
// download destination buffer
|
||||||
std::cout << "download destination buffer" << std::endl;
|
std::cout << "download destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
|
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0));
|
||||||
|
|
||||||
// verify result
|
// verify result
|
||||||
std::cout << "verify result" << std::endl;
|
std::cout << "verify result" << std::endl;
|
||||||
|
@ -178,8 +182,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
size_t value;
|
size_t value;
|
||||||
kernel_arg_t kernel_arg;
|
|
||||||
|
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
@ -219,19 +222,19 @@ int main(int argc, char *argv[]) {
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
std::cout << "allocate device memory" << std::endl;
|
std::cout << "allocate device memory" << std::endl;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, addr_buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, addr_buf_size, &value));
|
||||||
kernel_arg.addr_ptr = value;
|
kernel_arg.src0_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, src_buf_size, &value));
|
||||||
kernel_arg.src_ptr = value;
|
kernel_arg.src1_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value));
|
||||||
kernel_arg.dst_ptr = value;
|
kernel_arg.dst_addr = value;
|
||||||
|
|
||||||
kernel_arg.num_tasks = num_tasks;
|
kernel_arg.num_tasks = num_tasks;
|
||||||
kernel_arg.stride = count;
|
kernel_arg.stride = count;
|
||||||
|
|
||||||
std::cout << "dev_addr=" << std::hex << kernel_arg.addr_ptr << std::endl;
|
std::cout << "dev_addr=" << std::hex << kernel_arg.src0_addr << std::endl;
|
||||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
std::cout << "dev_src=" << std::hex << kernel_arg.src1_addr << std::endl;
|
||||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||||
|
|
||||||
// allocate shared memory
|
// allocate shared memory
|
||||||
std::cout << "allocate shared memory" << std::endl;
|
std::cout << "allocate shared memory" << std::endl;
|
||||||
|
@ -239,7 +242,7 @@ int main(int argc, char *argv[]) {
|
||||||
std::max<uint32_t>(addr_buf_size,
|
std::max<uint32_t>(addr_buf_size,
|
||||||
std::max<uint32_t>(dst_buf_size,
|
std::max<uint32_t>(dst_buf_size,
|
||||||
sizeof(kernel_arg_t))));
|
sizeof(kernel_arg_t))));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf));
|
RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf));
|
||||||
|
|
||||||
// upload kernel argument
|
// upload kernel argument
|
||||||
std::cout << "upload kernel argument" << std::endl;
|
std::cout << "upload kernel argument" << std::endl;
|
||||||
|
@ -257,7 +260,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload address buffer" << std::endl;
|
std::cout << "upload address buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.addr_ptr, addr_buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_addr, addr_buf_size, 0));
|
||||||
|
|
||||||
// upload source buffer1
|
// upload source buffer1
|
||||||
{
|
{
|
||||||
|
@ -267,7 +270,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer" << std::endl;
|
std::cout << "upload source buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_addr, src_buf_size, 0));
|
||||||
|
|
||||||
// clear destination buffer
|
// clear destination buffer
|
||||||
{
|
{
|
||||||
|
@ -277,7 +280,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "clear destination buffer" << std::endl;
|
std::cout << "clear destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0));
|
||||||
|
|
||||||
// run tests
|
// run tests
|
||||||
std::cout << "run tests" << std::endl;
|
std::cout << "run tests" << std::endl;
|
||||||
|
|
|
@ -5,8 +5,8 @@
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t size;
|
uint32_t size;
|
||||||
uint32_t src_ptr;
|
uint32_t src_addr;
|
||||||
uint32_t dst_ptr;
|
uint32_t dst_addr;
|
||||||
} kernel_arg_t;
|
} kernel_arg_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -7,8 +7,8 @@ void main() {
|
||||||
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||||
|
|
||||||
uint32_t size = arg->size;
|
uint32_t size = arg->size;
|
||||||
int32_t* src_ptr = (int32_t*)arg->src_ptr;
|
int32_t* src_ptr = (int32_t*)arg->src_addr;
|
||||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < size; ++i) {
|
for (uint32_t i = 0; i < size; ++i) {
|
||||||
dst_ptr[i] = src_ptr[i];
|
dst_ptr[i] = src_ptr[i];
|
||||||
|
|
|
@ -21,6 +21,7 @@ uint32_t count = 0;
|
||||||
|
|
||||||
vx_device_h device = nullptr;
|
vx_device_h device = nullptr;
|
||||||
vx_buffer_h staging_buf = nullptr;
|
vx_buffer_h staging_buf = nullptr;
|
||||||
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Test." << std::endl;
|
std::cout << "Vortex Test." << std::endl;
|
||||||
|
@ -51,9 +52,11 @@ static void parse_args(int argc, char **argv) {
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (staging_buf) {
|
if (staging_buf) {
|
||||||
vx_buf_release(staging_buf);
|
vx_buf_free(staging_buf);
|
||||||
}
|
}
|
||||||
if (device) {
|
if (device) {
|
||||||
|
vx_mem_free(device, kernel_arg.src_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.dst_addr);
|
||||||
vx_dev_close(device);
|
vx_dev_close(device);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -71,7 +74,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
|
|
||||||
// download destination buffer
|
// download destination buffer
|
||||||
std::cout << "download destination buffer" << std::endl;
|
std::cout << "download destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// verify result
|
// verify result
|
||||||
std::cout << "verify result" << std::endl;
|
std::cout << "verify result" << std::endl;
|
||||||
|
@ -98,8 +101,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
size_t value;
|
size_t value;
|
||||||
kernel_arg_t kernel_arg;
|
|
||||||
|
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
@ -125,21 +127,21 @@ int main(int argc, char *argv[]) {
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
std::cout << "allocate device memory" << std::endl;
|
std::cout << "allocate device memory" << std::endl;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.src_ptr = value;
|
kernel_arg.src_addr = value;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.dst_ptr = value;
|
kernel_arg.dst_addr = value;
|
||||||
|
|
||||||
kernel_arg.size = num_points;
|
kernel_arg.size = num_points;
|
||||||
|
|
||||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||||
|
|
||||||
// allocate shared memory
|
// allocate shared memory
|
||||||
std::cout << "allocate shared memory" << std::endl;
|
std::cout << "allocate shared memory" << std::endl;
|
||||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||||
|
|
||||||
// upload kernel argument
|
// upload kernel argument
|
||||||
std::cout << "upload kernel argument" << std::endl;
|
std::cout << "upload kernel argument" << std::endl;
|
||||||
|
@ -157,7 +159,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer" << std::endl;
|
std::cout << "upload source buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, buf_size, 0));
|
||||||
|
|
||||||
// clear destination buffer
|
// clear destination buffer
|
||||||
{
|
{
|
||||||
|
@ -167,7 +169,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "clear destination buffer" << std::endl;
|
std::cout << "clear destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// run tests
|
// run tests
|
||||||
std::cout << "run tests" << std::endl;
|
std::cout << "run tests" << std::endl;
|
||||||
|
|
|
@ -5,8 +5,8 @@
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t size;
|
uint32_t size;
|
||||||
uint32_t src_ptr;
|
uint32_t src_addr;
|
||||||
uint32_t dst_ptr;
|
uint32_t dst_addr;
|
||||||
} kernel_arg_t;
|
} kernel_arg_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -7,8 +7,8 @@ void main() {
|
||||||
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||||
|
|
||||||
uint32_t size = arg->size;
|
uint32_t size = arg->size;
|
||||||
int32_t* src_ptr = (int32_t*)arg->src_ptr;
|
int32_t* src_ptr = (int32_t*)arg->src_addr;
|
||||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < size; ++i) {
|
for (uint32_t i = 0; i < size; ++i) {
|
||||||
dst_ptr[i] = src_ptr[i];
|
dst_ptr[i] = src_ptr[i];
|
||||||
|
|
|
@ -21,6 +21,7 @@ uint32_t count = 0;
|
||||||
|
|
||||||
vx_device_h device = nullptr;
|
vx_device_h device = nullptr;
|
||||||
vx_buffer_h staging_buf = nullptr;
|
vx_buffer_h staging_buf = nullptr;
|
||||||
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Test." << std::endl;
|
std::cout << "Vortex Test." << std::endl;
|
||||||
|
@ -51,9 +52,11 @@ static void parse_args(int argc, char **argv) {
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (staging_buf) {
|
if (staging_buf) {
|
||||||
vx_buf_release(staging_buf);
|
vx_buf_free(staging_buf);
|
||||||
}
|
}
|
||||||
if (device) {
|
if (device) {
|
||||||
|
vx_mem_free(device, kernel_arg.src_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.dst_addr);
|
||||||
vx_dev_close(device);
|
vx_dev_close(device);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -71,7 +74,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
|
|
||||||
// download destination buffer
|
// download destination buffer
|
||||||
std::cout << "download destination buffer" << std::endl;
|
std::cout << "download destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// verify result
|
// verify result
|
||||||
std::cout << "verify result" << std::endl;
|
std::cout << "verify result" << std::endl;
|
||||||
|
@ -98,8 +101,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
size_t value;
|
size_t value;
|
||||||
kernel_arg_t kernel_arg;
|
|
||||||
|
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
@ -125,21 +127,21 @@ int main(int argc, char *argv[]) {
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
std::cout << "allocate device memory" << std::endl;
|
std::cout << "allocate device memory" << std::endl;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.src_ptr = value;
|
kernel_arg.src_addr = value;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.dst_ptr = value;
|
kernel_arg.dst_addr = value;
|
||||||
|
|
||||||
kernel_arg.size = num_points;
|
kernel_arg.size = num_points;
|
||||||
|
|
||||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||||
|
|
||||||
// allocate shared memory
|
// allocate shared memory
|
||||||
std::cout << "allocate shared memory" << std::endl;
|
std::cout << "allocate shared memory" << std::endl;
|
||||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||||
|
|
||||||
// upload kernel argument
|
// upload kernel argument
|
||||||
std::cout << "upload kernel argument" << std::endl;
|
std::cout << "upload kernel argument" << std::endl;
|
||||||
|
@ -157,7 +159,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer" << std::endl;
|
std::cout << "upload source buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, buf_size, 0));
|
||||||
|
|
||||||
// clear destination buffer
|
// clear destination buffer
|
||||||
{
|
{
|
||||||
|
@ -167,7 +169,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "clear destination buffer" << std::endl;
|
std::cout << "clear destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// run tests
|
// run tests
|
||||||
std::cout << "run tests" << std::endl;
|
std::cout << "run tests" << std::endl;
|
||||||
|
|
|
@ -6,9 +6,9 @@
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t num_tasks;
|
uint32_t num_tasks;
|
||||||
uint32_t task_size;
|
uint32_t task_size;
|
||||||
uint32_t src0_ptr;
|
uint32_t src0_addr;
|
||||||
uint32_t src1_ptr;
|
uint32_t src1_addr;
|
||||||
uint32_t dst_ptr;
|
uint32_t dst_addr;
|
||||||
} kernel_arg_t;
|
} kernel_arg_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -11,9 +11,9 @@ void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||||
uint32_t offset = task_id * count;
|
uint32_t offset = task_id * count;
|
||||||
uint32_t num_blocks = (count * 4 + BLOCK_SIZE-1) / BLOCK_SIZE;
|
uint32_t num_blocks = (count * 4 + BLOCK_SIZE-1) / BLOCK_SIZE;
|
||||||
|
|
||||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr + offset;
|
int32_t* src0_ptr = (int32_t*)arg->src0_addr + offset;
|
||||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr + offset;
|
int32_t* src1_ptr = (int32_t*)arg->src1_addr + offset;
|
||||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr + offset;
|
int32_t* dst_ptr = (int32_t*)arg->dst_addr + offset;
|
||||||
|
|
||||||
uint32_t src0_end = (uint32_t)(src0_ptr + count);
|
uint32_t src0_end = (uint32_t)(src0_ptr + count);
|
||||||
uint32_t src1_end = (uint32_t)(src1_ptr + count);
|
uint32_t src1_end = (uint32_t)(src1_ptr + count);
|
||||||
|
|
|
@ -21,6 +21,7 @@ uint32_t count = 0;
|
||||||
|
|
||||||
vx_device_h device = nullptr;
|
vx_device_h device = nullptr;
|
||||||
vx_buffer_h staging_buf = nullptr;
|
vx_buffer_h staging_buf = nullptr;
|
||||||
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Test." << std::endl;
|
std::cout << "Vortex Test." << std::endl;
|
||||||
|
@ -51,9 +52,12 @@ static void parse_args(int argc, char **argv) {
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (staging_buf) {
|
if (staging_buf) {
|
||||||
vx_buf_release(staging_buf);
|
vx_buf_free(staging_buf);
|
||||||
}
|
}
|
||||||
if (device) {
|
if (device) {
|
||||||
|
vx_mem_free(device, kernel_arg.src0_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.src1_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.dst_addr);
|
||||||
vx_dev_close(device);
|
vx_dev_close(device);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -71,7 +75,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
|
|
||||||
// download destination buffer
|
// download destination buffer
|
||||||
std::cout << "download destination buffer" << std::endl;
|
std::cout << "download destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// verify result
|
// verify result
|
||||||
std::cout << "verify result" << std::endl;
|
std::cout << "verify result" << std::endl;
|
||||||
|
@ -98,8 +102,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
size_t value;
|
size_t value;
|
||||||
kernel_arg_t kernel_arg;
|
|
||||||
|
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
@ -131,24 +134,24 @@ int main(int argc, char *argv[]) {
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
std::cout << "allocate device memory" << std::endl;
|
std::cout << "allocate device memory" << std::endl;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.src0_ptr = value;
|
kernel_arg.src0_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.src1_ptr = value;
|
kernel_arg.src1_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.dst_ptr = value;
|
kernel_arg.dst_addr = value;
|
||||||
|
|
||||||
kernel_arg.num_tasks = num_tasks;
|
kernel_arg.num_tasks = num_tasks;
|
||||||
kernel_arg.task_size = count;
|
kernel_arg.task_size = count;
|
||||||
|
|
||||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
|
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::endl;
|
||||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
|
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::endl;
|
||||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||||
|
|
||||||
// allocate shared memory
|
// allocate shared memory
|
||||||
std::cout << "allocate shared memory" << std::endl;
|
std::cout << "allocate shared memory" << std::endl;
|
||||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||||
|
|
||||||
// upload kernel argument
|
// upload kernel argument
|
||||||
std::cout << "upload kernel argument" << std::endl;
|
std::cout << "upload kernel argument" << std::endl;
|
||||||
|
@ -166,7 +169,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer0" << std::endl;
|
std::cout << "upload source buffer0" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_addr, buf_size, 0));
|
||||||
|
|
||||||
// upload source buffer1
|
// upload source buffer1
|
||||||
{
|
{
|
||||||
|
@ -176,7 +179,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer1" << std::endl;
|
std::cout << "upload source buffer1" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_addr, buf_size, 0));
|
||||||
|
|
||||||
// clear destination buffer
|
// clear destination buffer
|
||||||
{
|
{
|
||||||
|
@ -186,7 +189,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "clear destination buffer" << std::endl;
|
std::cout << "clear destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// run tests
|
// run tests
|
||||||
std::cout << "run tests" << std::endl;
|
std::cout << "run tests" << std::endl;
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t num_points;
|
uint32_t num_points;
|
||||||
uint32_t src_ptr;
|
uint32_t src_addr;
|
||||||
} kernel_arg_t;
|
} kernel_arg_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -5,7 +5,7 @@
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
void kernel_body(int task_id, kernel_arg_t* arg) {
|
void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||||
int* src_ptr = (int*)arg->src_ptr;
|
int* src_ptr = (int*)arg->src_addr;
|
||||||
vx_printf("task=%d, value=%d\n", task_id, src_ptr[task_id]);
|
vx_printf("task=%d, value=%d\n", task_id, src_ptr[task_id]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ uint32_t count = 4;
|
||||||
|
|
||||||
vx_device_h device = nullptr;
|
vx_device_h device = nullptr;
|
||||||
vx_buffer_h staging_buf = nullptr;
|
vx_buffer_h staging_buf = nullptr;
|
||||||
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Test." << std::endl;
|
std::cout << "Vortex Test." << std::endl;
|
||||||
|
@ -51,9 +52,10 @@ static void parse_args(int argc, char **argv) {
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (staging_buf) {
|
if (staging_buf) {
|
||||||
vx_buf_release(staging_buf);
|
vx_buf_free(staging_buf);
|
||||||
}
|
}
|
||||||
if (device) {
|
if (device) {
|
||||||
|
vx_mem_free(device, kernel_arg.src_addr);
|
||||||
vx_dev_close(device);
|
vx_dev_close(device);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -71,8 +73,7 @@ int run_test() {
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
size_t value;
|
size_t value;
|
||||||
kernel_arg_t kernel_arg;
|
|
||||||
|
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
@ -103,17 +104,17 @@ int main(int argc, char *argv[]) {
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
std::cout << "allocate device memory" << std::endl;
|
std::cout << "allocate device memory" << std::endl;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||||
kernel_arg.src_ptr = value;
|
kernel_arg.src_addr = value;
|
||||||
|
|
||||||
kernel_arg.num_points = num_points;
|
kernel_arg.num_points = num_points;
|
||||||
|
|
||||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||||
|
|
||||||
// allocate shared memory
|
// allocate shared memory
|
||||||
std::cout << "allocate shared memory" << std::endl;
|
std::cout << "allocate shared memory" << std::endl;
|
||||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||||
|
|
||||||
// upload kernel argument
|
// upload kernel argument
|
||||||
std::cout << "upload kernel argument" << std::endl;
|
std::cout << "upload kernel argument" << std::endl;
|
||||||
|
@ -131,7 +132,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer" << std::endl;
|
std::cout << "upload source buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, buf_size, 0));
|
||||||
|
|
||||||
// run tests
|
// run tests
|
||||||
std::cout << "run tests" << std::endl;
|
std::cout << "run tests" << std::endl;
|
||||||
|
|
|
@ -5,8 +5,8 @@
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t num_points;
|
uint32_t num_points;
|
||||||
uint32_t src_ptr;
|
uint32_t src_addr;
|
||||||
uint32_t dst_ptr;
|
uint32_t dst_addr;
|
||||||
} kernel_arg_t;
|
} kernel_arg_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -5,8 +5,8 @@
|
||||||
|
|
||||||
void kernel_body(int __DIVERGENT__ task_id, kernel_arg_t* arg) {
|
void kernel_body(int __DIVERGENT__ task_id, kernel_arg_t* arg) {
|
||||||
uint32_t num_points = arg->num_points;
|
uint32_t num_points = arg->num_points;
|
||||||
int32_t* src_ptr = (int32_t*)arg->src_ptr;
|
int32_t* src_ptr = (int32_t*)arg->src_addr;
|
||||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||||
|
|
||||||
int32_t ref_value = src_ptr[task_id];
|
int32_t ref_value = src_ptr[task_id];
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ std::vector<int32_t> ref_data;
|
||||||
|
|
||||||
vx_device_h device = nullptr;
|
vx_device_h device = nullptr;
|
||||||
vx_buffer_h staging_buf = nullptr;
|
vx_buffer_h staging_buf = nullptr;
|
||||||
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Test." << std::endl;
|
std::cout << "Vortex Test." << std::endl;
|
||||||
|
@ -55,9 +56,11 @@ static void parse_args(int argc, char **argv) {
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (staging_buf) {
|
if (staging_buf) {
|
||||||
vx_buf_release(staging_buf);
|
vx_buf_free(staging_buf);
|
||||||
}
|
}
|
||||||
if (device) {
|
if (device) {
|
||||||
|
vx_mem_free(device, kernel_arg.src_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.dst_addr);
|
||||||
vx_dev_close(device);
|
vx_dev_close(device);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -102,7 +105,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
|
|
||||||
// download destination buffer
|
// download destination buffer
|
||||||
std::cout << "download destination buffer" << std::endl;
|
std::cout << "download destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||||
|
|
||||||
// verify result
|
// verify result
|
||||||
std::cout << "verify result" << std::endl;
|
std::cout << "verify result" << std::endl;
|
||||||
|
@ -129,8 +132,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
size_t value;
|
size_t value;
|
||||||
kernel_arg_t kernel_arg;
|
|
||||||
|
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
@ -166,22 +168,22 @@ int main(int argc, char *argv[]) {
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
std::cout << "allocate device memory" << std::endl;
|
std::cout << "allocate device memory" << std::endl;
|
||||||
|
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, src_buf_size, &value));
|
||||||
kernel_arg.src_ptr = value;
|
kernel_arg.src_addr = value;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value));
|
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value));
|
||||||
kernel_arg.dst_ptr = value;
|
kernel_arg.dst_addr = value;
|
||||||
|
|
||||||
kernel_arg.num_points = num_points;
|
kernel_arg.num_points = num_points;
|
||||||
|
|
||||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||||
|
|
||||||
// allocate shared memory
|
// allocate shared memory
|
||||||
std::cout << "allocate shared memory" << std::endl;
|
std::cout << "allocate shared memory" << std::endl;
|
||||||
uint32_t staging_buf_size = std::max<uint32_t>(src_buf_size,
|
uint32_t staging_buf_size = std::max<uint32_t>(src_buf_size,
|
||||||
std::max<uint32_t>(dst_buf_size,
|
std::max<uint32_t>(dst_buf_size,
|
||||||
sizeof(kernel_arg_t)));
|
sizeof(kernel_arg_t)));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf));
|
RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf));
|
||||||
|
|
||||||
// upload kernel argument
|
// upload kernel argument
|
||||||
std::cout << "upload kernel argument" << std::endl;
|
std::cout << "upload kernel argument" << std::endl;
|
||||||
|
@ -199,7 +201,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "upload source buffer" << std::endl;
|
std::cout << "upload source buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, src_buf_size, 0));
|
||||||
|
|
||||||
// clear destination buffer
|
// clear destination buffer
|
||||||
{
|
{
|
||||||
|
@ -209,7 +211,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "clear destination buffer" << std::endl;
|
std::cout << "clear destination buffer" << std::endl;
|
||||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
|
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0));
|
||||||
|
|
||||||
// run tests
|
// run tests
|
||||||
std::cout << "run tests" << std::endl;
|
std::cout << "run tests" << std::endl;
|
||||||
|
|
|
@ -35,6 +35,7 @@ ePixelFormat eformat = FORMAT_A8R8G8B8;
|
||||||
|
|
||||||
vx_device_h device = nullptr;
|
vx_device_h device = nullptr;
|
||||||
vx_buffer_h buffer = nullptr;
|
vx_buffer_h buffer = nullptr;
|
||||||
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Texture Test." << std::endl;
|
std::cout << "Vortex Texture Test." << std::endl;
|
||||||
|
@ -95,9 +96,11 @@ static void parse_args(int argc, char **argv) {
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (buffer) {
|
if (buffer) {
|
||||||
vx_buf_release(buffer);
|
vx_buf_free(buffer);
|
||||||
}
|
}
|
||||||
if (device) {
|
if (device) {
|
||||||
|
vx_mem_free(device, kernel_arg.src_addr);
|
||||||
|
vx_mem_free(device, kernel_arg.dst_addr);
|
||||||
vx_dev_close(device);
|
vx_dev_close(device);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -141,7 +144,6 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
kernel_arg_t kernel_arg;
|
|
||||||
std::vector<uint8_t> src_pixels;
|
std::vector<uint8_t> src_pixels;
|
||||||
std::vector<uint32_t> mip_offsets;
|
std::vector<uint32_t> mip_offsets;
|
||||||
uint32_t src_width;
|
uint32_t src_width;
|
||||||
|
@ -196,8 +198,8 @@ int main(int argc, char *argv[]) {
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
std::cout << "allocate device memory" << std::endl;
|
std::cout << "allocate device memory" << std::endl;
|
||||||
uint64_t src_addr, dst_addr;
|
uint64_t src_addr, dst_addr;
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, src_bufsize, &src_addr));
|
RT_CHECK(vx_mem_alloc(device, src_bufsize, &src_addr));
|
||||||
RT_CHECK(vx_alloc_dev_mem(device, dst_bufsize, &dst_addr));
|
RT_CHECK(vx_mem_alloc(device, dst_bufsize, &dst_addr));
|
||||||
|
|
||||||
std::cout << "src_addr=0x" << std::hex << src_addr << std::endl;
|
std::cout << "src_addr=0x" << std::hex << src_addr << std::endl;
|
||||||
std::cout << "dst_addr=0x" << std::hex << dst_addr << std::endl;
|
std::cout << "dst_addr=0x" << std::hex << dst_addr << std::endl;
|
||||||
|
@ -206,7 +208,7 @@ int main(int argc, char *argv[]) {
|
||||||
std::cout << "allocate shared memory" << std::endl;
|
std::cout << "allocate shared memory" << std::endl;
|
||||||
uint32_t alloc_size = std::max<uint32_t>(sizeof(kernel_arg_t),
|
uint32_t alloc_size = std::max<uint32_t>(sizeof(kernel_arg_t),
|
||||||
std::max<uint32_t>(src_bufsize, dst_bufsize));
|
std::max<uint32_t>(src_bufsize, dst_bufsize));
|
||||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
|
RT_CHECK(vx_buf_alloc(device, alloc_size, &buffer));
|
||||||
|
|
||||||
// upload kernel argument
|
// upload kernel argument
|
||||||
std::cout << "upload kernel argument" << std::endl;
|
std::cout << "upload kernel argument" << std::endl;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue