mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
driver refactoring
This commit is contained in:
parent
808bddb586
commit
27a65fdee7
27 changed files with 200 additions and 198 deletions
|
@ -17,6 +17,7 @@
|
|||
#include <fpga.h>
|
||||
#endif
|
||||
|
||||
#include "vx_utils.h"
|
||||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
#include "vortex_afu.h"
|
||||
|
@ -52,7 +53,7 @@
|
|||
|
||||
typedef struct vx_device_ {
|
||||
fpga_handle fpga;
|
||||
size_t mem_allocation;
|
||||
uint64_t mem_allocation;
|
||||
unsigned version;
|
||||
unsigned num_cores;
|
||||
unsigned num_warps;
|
||||
|
@ -64,19 +65,9 @@ typedef struct vx_buffer_ {
|
|||
void* host_ptr;
|
||||
uint64_t io_addr;
|
||||
vx_device_h hdevice;
|
||||
size_t size;
|
||||
uint64_t size;
|
||||
} vx_buffer_t;
|
||||
|
||||
inline size_t aligned_size(size_t size, size_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
inline bool is_aligned(size_t addr, size_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return 0 == (addr & (alignment - 1));
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
|
@ -107,7 +98,7 @@ AutoPerfDump gAutoPerfDump;
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
|
@ -279,7 +270,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_maddr
|
||||
|| 0 >= size)
|
||||
|
@ -299,7 +290,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
|
||||
fpga_result res;
|
||||
void* host_ptr;
|
||||
uint64_t wsid;
|
||||
|
@ -367,7 +358,7 @@ extern int vx_buf_release(vx_buffer_h hbuffer) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
|
@ -386,7 +377,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
|||
#endif
|
||||
|
||||
// to milliseconds
|
||||
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
|
||||
uint64_t sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
|
||||
|
||||
for (;;) {
|
||||
uint64_t status;
|
||||
|
@ -430,7 +421,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
@ -438,8 +429,8 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
|
|||
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||
|
||||
size_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
|
||||
// check alignment
|
||||
if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE))
|
||||
|
@ -454,7 +445,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
|
|||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
|
||||
return -1;
|
||||
|
||||
auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE);
|
||||
|
@ -465,13 +456,13 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
|
|||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_WRITE));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t dest_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
@ -479,8 +470,8 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
|
|||
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||
|
||||
size_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
|
||||
// check alignment
|
||||
if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE))
|
||||
|
@ -495,7 +486,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
|
|||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
|
||||
return -1;
|
||||
|
||||
auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE);
|
||||
|
@ -506,7 +497,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
|
|||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_READ));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
|
@ -519,7 +510,7 @@ extern int vx_start(vx_device_h hdevice) {
|
|||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
if (vx_ready_wait(hdevice, MAX_TIMEOUT) != 0)
|
||||
return -1;
|
||||
|
||||
// start execution
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef USE_VLSIM
|
||||
#include <fpga.h>
|
||||
|
|
|
@ -1,17 +1,29 @@
|
|||
#include "vx_utils.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
#include <assert.h>
|
||||
|
||||
extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
|
||||
uint64_t aligned_size(uint64_t size, uint64_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
bool is_aligned(uint64_t addr, uint64_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return 0 == (addr & (alignment - 1));
|
||||
}
|
||||
|
||||
extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint64_t size) {
|
||||
int err = 0;
|
||||
|
||||
if (NULL == content || 0 == size)
|
||||
return -1;
|
||||
|
||||
uint32_t buffer_transfer_size = 65536;
|
||||
unsigned kernel_base_addr;
|
||||
uint64_t kernel_base_addr;
|
||||
err = vx_dev_caps(device, VX_CAPS_KERNEL_BASE_ADDR, &kernel_base_addr);
|
||||
if (err != 0)
|
||||
return -1;
|
||||
|
@ -29,9 +41,9 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_
|
|||
// upload content
|
||||
//
|
||||
|
||||
size_t offset = 0;
|
||||
uint64_t offset = 0;
|
||||
while (offset < size) {
|
||||
auto chunk_size = std::min<size_t>(buffer_transfer_size, size - offset);
|
||||
auto chunk_size = std::min<uint64_t>(buffer_transfer_size, size - offset);
|
||||
std::memcpy(buf_ptr, (uint8_t*)content + offset, chunk_size);
|
||||
|
||||
/*printf("*** Upload Kernel to 0x%0x: data=", kernel_base_addr + offset);
|
||||
|
@ -127,7 +139,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
uint64_t mem_lat = 0;
|
||||
#endif
|
||||
|
||||
unsigned num_cores;
|
||||
uint64_t num_cores;
|
||||
ret = vx_dev_caps(device, VX_CAPS_MAX_CORES, &num_cores);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
|
11
driver/common/vx_utils.h
Normal file
11
driver/common/vx_utils.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
uint64_t aligned_size(uint64_t size, uint64_t alignment);
|
||||
|
||||
bool is_aligned(uint64_t addr, uint64_t alignment);
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
#define ALLOC_BASE_ADDR 0x00000000
|
||||
#define LOCAL_MEM_SIZE 4294967296 // 4 GB
|
|
@ -2,6 +2,7 @@
|
|||
#define __VX_DRIVER_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -22,9 +23,7 @@ typedef void* vx_buffer_h;
|
|||
#define VX_CAPS_ALLOC_BASE_ADDR 0x6
|
||||
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
#define ALLOC_BASE_ADDR 0x00000000
|
||||
#define LOCAL_MEM_SIZE 0xffffffff
|
||||
#define MAX_TIMEOUT (60*60*1000) // 1hr
|
||||
|
||||
// open the device and connect to it
|
||||
int vx_dev_open(vx_device_h* hdevice);
|
||||
|
@ -33,10 +32,10 @@ int vx_dev_open(vx_device_h* hdevice);
|
|||
int vx_dev_close(vx_device_h hdevice);
|
||||
|
||||
// return device configurations
|
||||
int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value);
|
||||
int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value);
|
||||
|
||||
// Allocate shared buffer with device
|
||||
int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer);
|
||||
int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer);
|
||||
|
||||
// Get host pointer address
|
||||
void* vx_host_ptr(vx_buffer_h hbuffer);
|
||||
|
@ -45,24 +44,24 @@ void* vx_host_ptr(vx_buffer_h hbuffer);
|
|||
int vx_buf_release(vx_buffer_h hbuffer);
|
||||
|
||||
// allocate device memory and return address
|
||||
int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr);
|
||||
int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr);
|
||||
|
||||
// Copy bytes from buffer to device local memory
|
||||
int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset);
|
||||
int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset);
|
||||
|
||||
// Copy bytes from device local memory to buffer
|
||||
int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dst_offset);
|
||||
int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t dst_offset);
|
||||
|
||||
// Start device execution
|
||||
int vx_start(vx_device_h hdevice);
|
||||
|
||||
// Wait for device ready with milliseconds timeout
|
||||
int vx_ready_wait(vx_device_h hdevice, long long timeout);
|
||||
int vx_ready_wait(vx_device_h hdevice, uint64_t timeout);
|
||||
|
||||
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
|
||||
|
||||
// upload kernel bytes to device
|
||||
int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size);
|
||||
int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint64_t size);
|
||||
|
||||
// upload kernel file to device
|
||||
int vx_upload_kernel_file(vx_device_h device, const char* filename);
|
||||
|
|
|
@ -3,7 +3,7 @@ RTLSIM_DIR = ../../sim/rtlsim
|
|||
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../include -I../../hw -I$(RTLSIM_DIR) -I$(RTLSIM_DIR)/../common
|
||||
CXXFLAGS += -I../include -I../common -I../../hw -I$(RTLSIM_DIR) -I$(RTLSIM_DIR)/../common
|
||||
|
||||
LDFLAGS += $(RTLSIM_DIR)/librtlsim.a
|
||||
|
||||
|
|
|
@ -7,11 +7,14 @@
|
|||
#include <chrono>
|
||||
|
||||
#include <vortex.h>
|
||||
#include <vx_utils.h>
|
||||
#include <VX_config.h>
|
||||
#include <mem.h>
|
||||
#include <util.h>
|
||||
#include <simulator.h>
|
||||
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -19,7 +22,7 @@ using namespace vortex;
|
|||
class vx_device;
|
||||
class vx_buffer {
|
||||
public:
|
||||
vx_buffer(size_t size, vx_device* device)
|
||||
vx_buffer(uint64_t size, vx_device* device)
|
||||
: size_(size)
|
||||
, device_(device) {
|
||||
auto aligned_asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
|
@ -36,7 +39,7 @@ public:
|
|||
return data_;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
uint64_t size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
|
@ -45,7 +48,7 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
size_t size_;
|
||||
uint64_t size_;
|
||||
vx_device* device_;
|
||||
void* data_;
|
||||
};
|
||||
|
@ -54,9 +57,10 @@ private:
|
|||
|
||||
class vx_device {
|
||||
public:
|
||||
vx_device() : ram_((1<<12), (1<<20)) {
|
||||
mem_allocation_ = ALLOC_BASE_ADDR;
|
||||
}
|
||||
vx_device()
|
||||
: ram_(RAM_PAGE_SIZE)
|
||||
, mem_allocation_(ALLOC_BASE_ADDR)
|
||||
{}
|
||||
|
||||
~vx_device() {
|
||||
if (future_.valid()) {
|
||||
|
@ -64,9 +68,9 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
auto dev_mem_size = LOCAL_MEM_SIZE;
|
||||
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) {
|
||||
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (mem_allocation_ + asize > dev_mem_size)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
|
@ -74,9 +78,9 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int upload(const void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (dest_addr + asize > ram_.size())
|
||||
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (dest_addr + asize > LOCAL_MEM_SIZE)
|
||||
return -1;
|
||||
|
||||
/*printf("VXDRV: upload %ld bytes from 0x%lx:", size, uintptr_t((uint8_t*)src + src_offset));
|
||||
|
@ -92,9 +96,9 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int download(void* dest, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (src_addr + asize > ram_.size())
|
||||
int download(void* dest, uint64_t src_addr, uint64_t size, uint64_t dest_offset) {
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (src_addr + asize > LOCAL_MEM_SIZE)
|
||||
return -1;
|
||||
|
||||
ram_.read((uint8_t*)dest + dest_offset, src_addr, asize);
|
||||
|
@ -125,10 +129,10 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int wait(long long timeout) {
|
||||
int wait(uint64_t timeout) {
|
||||
if (!future_.valid())
|
||||
return 0;
|
||||
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
|
||||
uint64_t timeout_sec = timeout / 1000;
|
||||
std::chrono::seconds wait_time(1);
|
||||
for (;;) {
|
||||
auto status = future_.wait_for(wait_time); // wait for 1 sec and check status
|
||||
|
@ -141,9 +145,9 @@ public:
|
|||
|
||||
private:
|
||||
|
||||
size_t mem_allocation_;
|
||||
RAM ram_;
|
||||
Simulator simulator_;
|
||||
uint64_t mem_allocation_;
|
||||
std::future<void> future_;
|
||||
};
|
||||
|
||||
|
@ -177,7 +181,7 @@ AutoPerfDump gAutoPerfDump;
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
|
@ -198,10 +202,10 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
|||
*value = CACHE_BLOCK_SIZE;
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
*value = 0xffffffff;
|
||||
*value = LOCAL_MEM_SIZE;
|
||||
break;
|
||||
case VX_CAPS_ALLOC_BASE_ADDR:
|
||||
*value = 0x10000000;
|
||||
*value = ALLOC_BASE_ADDR;
|
||||
break;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
*value = STARTUP_ADDR;
|
||||
|
@ -244,7 +248,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_maddr
|
||||
|| 0 >= size)
|
||||
|
@ -255,7 +259,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
|
|||
}
|
||||
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
|
||||
if (nullptr == hdevice
|
||||
|| 0 >= size
|
||||
|| nullptr == hbuffer)
|
||||
|
@ -294,7 +298,7 @@ extern int vx_buf_release(vx_buffer_h hbuffer) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
@ -307,7 +311,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
|
|||
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t dest_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
@ -329,7 +333,7 @@ extern int vx_start(vx_device_h hdevice) {
|
|||
return device->start();
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
|||
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I../include -I../../hw -I$(SIMX_DIR) -I$(SIMX_DIR)/../common
|
||||
CXXFLAGS += -I../include -I../common -I../../hw -I$(SIMX_DIR) -I$(SIMX_DIR)/../common
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
|
|
|
@ -8,11 +8,12 @@
|
|||
#include <chrono>
|
||||
|
||||
#include <vortex.h>
|
||||
#include <core.h>
|
||||
#include <vx_utils.h>
|
||||
#include <processor.h>
|
||||
#include <VX_config.h>
|
||||
#include <util.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
|
@ -22,10 +23,10 @@ class vx_device;
|
|||
|
||||
class vx_buffer {
|
||||
public:
|
||||
vx_buffer(size_t size, vx_device* device)
|
||||
vx_buffer(uint64_t size, vx_device* device)
|
||||
: size_(size)
|
||||
, device_(device) {
|
||||
auto aligned_asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
uint64_t aligned_asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
data_ = malloc(aligned_asize);
|
||||
}
|
||||
|
||||
|
@ -39,7 +40,7 @@ public:
|
|||
return data_;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
uint64_t size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
|
@ -48,7 +49,7 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
size_t size_;
|
||||
uint64_t size_;
|
||||
vx_device* device_;
|
||||
void* data_;
|
||||
};
|
||||
|
@ -59,32 +60,23 @@ class vx_device {
|
|||
public:
|
||||
vx_device()
|
||||
: arch_("rv32i", NUM_CORES, NUM_WARPS, NUM_THREADS)
|
||||
, decoder_(arch_)
|
||||
, mmu_(PAGE_SIZE, arch_.wsize(), true)
|
||||
, cores_(arch_.num_cores())
|
||||
, is_done_(false)
|
||||
, is_running_(false)
|
||||
, mem_allocation_(ALLOC_BASE_ADDR)
|
||||
, thread_(__thread_proc__, this)
|
||||
, ram_((1<<12), (1<<20)) {
|
||||
|
||||
mem_allocation_ = ALLOC_BASE_ADDR;
|
||||
mmu_.attach(ram_, 0, 0xffffffff);
|
||||
for (int i = 0; i < arch_.num_cores(); ++i) {
|
||||
cores_.at(i) = std::make_shared<Core>(arch_, decoder_, mmu_, i);
|
||||
}
|
||||
}
|
||||
, ram_(RAM_PAGE_SIZE)
|
||||
{}
|
||||
|
||||
~vx_device() {
|
||||
mutex_.lock();
|
||||
is_done_ = true;
|
||||
mutex_.unlock();
|
||||
|
||||
mutex_.unlock();
|
||||
thread_.join();
|
||||
}
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
auto dev_mem_size = LOCAL_MEM_SIZE;
|
||||
auto asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) {
|
||||
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (mem_allocation_ + asize > dev_mem_size)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
|
@ -92,9 +84,9 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int upload(const void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
auto asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (dest_addr + asize > ram_.size())
|
||||
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (dest_addr + asize > LOCAL_MEM_SIZE)
|
||||
return -1;
|
||||
|
||||
ram_.write((const uint8_t*)src + src_offset, dest_addr, asize);
|
||||
|
@ -107,9 +99,9 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int download(void* dest, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (src_addr + asize > ram_.size())
|
||||
int download(void* dest, uint64_t src_addr, uint64_t size, uint64_t dest_offset) {
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (src_addr + asize > LOCAL_MEM_SIZE)
|
||||
return -1;
|
||||
|
||||
ram_.read((uint8_t*)dest + dest_offset, src_addr, asize);
|
||||
|
@ -123,19 +115,17 @@ public:
|
|||
}
|
||||
|
||||
int start() {
|
||||
|
||||
mutex_.lock();
|
||||
for (int i = 0; i < arch_.num_cores(); ++i) {
|
||||
cores_.at(i)->clear();
|
||||
}
|
||||
SimPlatform::instance().flush();
|
||||
processor_ = std::make_shared<Processor>(arch_);
|
||||
processor_->attach_ram(&ram_);
|
||||
is_running_ = true;
|
||||
mutex_.unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int wait(long long timeout) {
|
||||
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
|
||||
int wait(uint64_t timeout) {
|
||||
uint64_t timeout_sec = timeout / 1000;
|
||||
for (;;) {
|
||||
mutex_.lock();
|
||||
bool is_running = is_running_;
|
||||
|
@ -147,32 +137,10 @@ public:
|
|||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_csr(int core_id, int addr, unsigned *value) {
|
||||
*value = cores_.at(core_id)->get_csr(addr, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int set_csr(int core_id, int addr, unsigned value) {
|
||||
cores_.at(core_id)->set_csr(addr, value, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void run() {
|
||||
bool running;
|
||||
do {
|
||||
running = false;
|
||||
for (auto& core : cores_) {
|
||||
core->step();
|
||||
if (core->running())
|
||||
running = true;
|
||||
}
|
||||
} while (running);
|
||||
}
|
||||
|
||||
void thread_proc() {
|
||||
std::cout << "Device ready..." << std::flush << std::endl;
|
||||
|
||||
|
@ -188,7 +156,7 @@ private:
|
|||
if (is_running) {
|
||||
std::cout << "Device running..." << std::flush << std::endl;
|
||||
|
||||
this->run();
|
||||
processor_->run();
|
||||
|
||||
mutex_.lock();
|
||||
is_running_ = false;
|
||||
|
@ -206,12 +174,10 @@ private:
|
|||
}
|
||||
|
||||
ArchDef arch_;
|
||||
Decoder decoder_;
|
||||
MemoryUnit mmu_;
|
||||
std::vector<std::shared_ptr<Core>> cores_;
|
||||
Processor::Ptr processor_;
|
||||
bool is_done_;
|
||||
bool is_running_;
|
||||
size_t mem_allocation_;
|
||||
uint64_t mem_allocation_;
|
||||
std::thread thread_;
|
||||
RAM ram_;
|
||||
std::mutex mutex_;
|
||||
|
@ -251,6 +217,9 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
if (!SimPlatform::instance().initialize())
|
||||
return -1;
|
||||
|
||||
*hdevice = new vx_device();
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
|
@ -273,10 +242,12 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
|
||||
delete device;
|
||||
|
||||
SimPlatform::instance().finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
|
@ -314,7 +285,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_maddr
|
||||
|| 0 >= size)
|
||||
|
@ -324,7 +295,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
|
|||
return device->alloc_local_mem(size, dev_maddr);
|
||||
}
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
|
||||
if (nullptr == hdevice
|
||||
|| 0 >= size
|
||||
|| nullptr == hbuffer)
|
||||
|
@ -363,7 +334,7 @@ extern int vx_buf_release(vx_buffer_h hbuffer) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
@ -376,7 +347,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
|
|||
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t dest_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
@ -398,7 +369,7 @@ extern int vx_start(vx_device_h hdevice) {
|
|||
return device->start();
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
|
|
|
@ -8,15 +8,15 @@ extern int vx_dev_close(vx_device_h /*hdevice*/) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_dev_caps(vx_device_h /*hdevice*/, unsigned /*caps_id*/, unsigned* /*value*/) {
|
||||
extern int vx_dev_caps(vx_device_h /*hdevice*/, uint32_t /*caps_id*/, uint64_t* /*value*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, size_t /*size*/, size_t* /*dev_maddr*/) {
|
||||
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, uint64_t /*size*/, uint64_t* /*dev_maddr*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, size_t /*size*/, vx_buffer_h* /*hbuffer*/) {
|
||||
extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, uint64_t /*size*/, vx_buffer_h* /*hbuffer*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -28,11 +28,11 @@ extern int vx_buf_release(vx_buffer_h /*hbuffer*/) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*src_offset*/) {
|
||||
extern int vx_copy_to_dev(vx_buffer_h /*hbuffer*/, uint64_t /*dev_maddr*/, uint64_t /*size*/, uint64_t /*src_offset*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*dest_offset*/) {
|
||||
extern int vx_copy_from_dev(vx_buffer_h /*hbuffer*/, uint64_t /*dev_maddr*/, uint64_t /*size*/, uint64_t /*dest_offset*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -40,6 +40,6 @@ extern int vx_start(vx_device_h /*hdevice*/) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
|
||||
extern int vx_ready_wait(vx_device_h /*hdevice*/, uint64_t /*timeout*/) {
|
||||
return -1;
|
||||
}
|
|
@ -168,11 +168,12 @@ void MemoryUnit::tlbRm(uint64_t va) {
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
RAM::RAM(uint32_t num_pages, uint32_t page_size)
|
||||
: page_bits_(log2ceil(page_size)) {
|
||||
assert(ispow2(page_size));
|
||||
mem_.resize(num_pages, NULL);
|
||||
size_ = uint64_t(mem_.size()) << page_bits_;
|
||||
RAM::RAM(uint32_t page_size)
|
||||
: size_(0)
|
||||
, page_bits_(log2ceil(page_size))
|
||||
, last_page_(nullptr)
|
||||
, last_page_index_(0) {
|
||||
assert(ispow2(page_size));
|
||||
}
|
||||
|
||||
RAM::~RAM() {
|
||||
|
@ -180,31 +181,41 @@ RAM::~RAM() {
|
|||
}
|
||||
|
||||
void RAM::clear() {
|
||||
for (auto& page : mem_) {
|
||||
delete[] page;
|
||||
page = NULL;
|
||||
for (auto& page : pages_) {
|
||||
delete[] page.second;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t RAM::size() const {
|
||||
return size_;
|
||||
return uint64_t(pages_.size()) << page_bits_;
|
||||
}
|
||||
|
||||
uint8_t *RAM::get(uint32_t address) const {
|
||||
uint32_t page_size = 1 << page_bits_;
|
||||
uint32_t page_index = address >> page_bits_;
|
||||
uint32_t byte_offset = address & ((1 << page_bits_) - 1);
|
||||
uint8_t *RAM::get(uint64_t address) const {
|
||||
uint32_t page_size = 1 << page_bits_;
|
||||
uint32_t page_offset = address & (page_size - 1);
|
||||
uint64_t page_index = address >> page_bits_;
|
||||
|
||||
auto &page = mem_.at(page_index);
|
||||
if (page == NULL) {
|
||||
uint8_t *ptr = new uint8_t[page_size];
|
||||
// set uninitialized data to "baadf00d"
|
||||
for (uint32_t i = 0; i < page_size; ++i) {
|
||||
ptr[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff;
|
||||
uint8_t* page;
|
||||
if (last_page_ && last_page_index_ == page_index) {
|
||||
page = last_page_;
|
||||
} else {
|
||||
auto it = pages_.find(page_index);
|
||||
if (it != pages_.end()) {
|
||||
page = it->second;
|
||||
} else {
|
||||
uint8_t *ptr = new uint8_t[page_size];
|
||||
// set uninitialized data to "baadf00d"
|
||||
for (uint32_t i = 0; i < page_size; ++i) {
|
||||
ptr[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff;
|
||||
}
|
||||
pages_.emplace(page_index, ptr);
|
||||
page = ptr;
|
||||
}
|
||||
page = ptr;
|
||||
last_page_ = page;
|
||||
last_page_index_ = page_index;
|
||||
}
|
||||
return page + byte_offset;
|
||||
|
||||
return page + page_offset;
|
||||
}
|
||||
|
||||
void RAM::read(void *data, uint64_t addr, uint64_t size) {
|
||||
|
|
|
@ -130,13 +130,13 @@ private:
|
|||
class RAM : public MemDevice {
|
||||
public:
|
||||
|
||||
RAM(uint32_t num_pages, uint32_t page_size);
|
||||
|
||||
RAM(uint32_t page_size);
|
||||
~RAM();
|
||||
|
||||
void clear();
|
||||
|
||||
uint64_t size() const override;
|
||||
|
||||
void read(void *data, uint64_t addr, uint64_t size) override;
|
||||
void write(const void *data, uint64_t addr, uint64_t size) override;
|
||||
|
||||
|
@ -153,11 +153,13 @@ public:
|
|||
|
||||
private:
|
||||
|
||||
uint8_t *get(uint32_t address) const;
|
||||
uint8_t *get(uint64_t address) const;
|
||||
|
||||
mutable std::vector<uint8_t*> mem_;
|
||||
uint32_t page_bits_;
|
||||
uint64_t size_;
|
||||
uint32_t page_bits_;
|
||||
mutable std::unordered_map<uint64_t, uint8_t*> pages_;
|
||||
mutable uint8_t* last_page_;
|
||||
mutable uint64_t last_page_index_;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
|
@ -282,6 +282,10 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
void flush() {
|
||||
instance().clear();
|
||||
}
|
||||
|
||||
void finalize() {
|
||||
instance().clear();
|
||||
}
|
||||
|
|
|
@ -75,11 +75,6 @@ inline uint64_t bit_getw(uint64_t bits, uint32_t start, uint32_t end) {
|
|||
return (bits << shift) >> (shift + start);
|
||||
}
|
||||
|
||||
inline uint64_t aligned_size(uint64_t size, uint32_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
// Apply integer sign extension
|
||||
inline uint32_t sext32(uint32_t word, uint32_t width) {
|
||||
assert(width > 1);
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
#include <mem.h>
|
||||
#include "simulator.h"
|
||||
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
static void show_usage() {
|
||||
|
@ -49,7 +51,7 @@ int main(int argc, char **argv) {
|
|||
for (auto program : programs) {
|
||||
std::cout << "Running " << program << "..." << std::endl;
|
||||
|
||||
vortex::RAM ram((1<<12), (1<<20));
|
||||
vortex::RAM ram(RAM_PAGE_SIZE);
|
||||
vortex::Simulator simulator;
|
||||
simulator.attach_ram(&ram);
|
||||
|
||||
|
|
|
@ -477,7 +477,7 @@ void Simulator::eval_mem_bus(bool clk) {
|
|||
uint8_t* data = (uint8_t*)(vl_obj_->device->mem_req_data);
|
||||
if (base_addr >= IO_COUT_ADDR
|
||||
&& base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
for (int i = 0; i < IO_COUT_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
char c = data[i];
|
||||
|
|
|
@ -169,7 +169,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
|||
std::cout << "start execution" << std::endl;
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
RT_CHECK(vx_start(device));
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
|
||||
auto t3 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// read destination buffer from local memory
|
||||
|
@ -228,7 +228,7 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
unsigned max_cores;
|
||||
uint64_t max_cores;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
uint32_t num_points = count;
|
||||
uint32_t num_blocks = (num_points * sizeof(int32_t) + 63) / 64;
|
||||
|
|
|
@ -67,7 +67,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
|||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
|
@ -112,7 +112,7 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
unsigned max_cores, max_warps, max_threads;
|
||||
uint64_t max_cores, max_warps, max_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
|
|
@ -121,7 +121,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
|||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
|
|
|
@ -168,7 +168,7 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
unsigned max_cores, max_warps, max_threads;
|
||||
uint64_t max_cores, max_warps, max_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
@ -245,7 +245,7 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
|
|
|
@ -67,7 +67,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
|||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
|
@ -112,7 +112,7 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
unsigned max_cores, max_warps, max_threads;
|
||||
uint64_t max_cores, max_warps, max_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
|
|
@ -101,7 +101,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
|||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
|
|
|
@ -136,7 +136,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
|||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
|
@ -194,7 +194,7 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
unsigned max_cores, max_warps, max_threads;
|
||||
uint64_t max_cores, max_warps, max_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
|
|
@ -67,7 +67,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
|||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
|
|
|
@ -67,7 +67,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
|||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
|
|
|
@ -65,7 +65,7 @@ int run_test() {
|
|||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
unsigned max_cores, max_warps, max_threads;
|
||||
uint64_t max_cores, max_warps, max_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
|
|
@ -110,7 +110,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
|||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
|
||||
|
||||
auto time_end = std::chrono::high_resolution_clock::now();
|
||||
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
|
||||
|
@ -166,7 +166,7 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
unsigned max_cores, max_warps, max_threads;
|
||||
uint64_t max_cores, max_warps, max_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue