driver refactoring

This commit is contained in:
Blaise Tine 2021-11-14 09:05:15 -05:00
parent 808bddb586
commit 27a65fdee7
27 changed files with 200 additions and 198 deletions

View file

@ -17,6 +17,7 @@
#include <fpga.h>
#endif
#include "vx_utils.h"
#include <vortex.h>
#include <VX_config.h>
#include "vortex_afu.h"
@ -52,7 +53,7 @@
typedef struct vx_device_ {
fpga_handle fpga;
size_t mem_allocation;
uint64_t mem_allocation;
unsigned version;
unsigned num_cores;
unsigned num_warps;
@ -64,19 +65,9 @@ typedef struct vx_buffer_ {
void* host_ptr;
uint64_t io_addr;
vx_device_h hdevice;
size_t size;
uint64_t size;
} vx_buffer_t;
inline size_t aligned_size(size_t size, size_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
inline bool is_aligned(size_t addr, size_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return 0 == (addr & (alignment - 1));
}
///////////////////////////////////////////////////////////////////////////////
#ifdef DUMP_PERF_STATS
@ -107,7 +98,7 @@ AutoPerfDump gAutoPerfDump;
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
if (nullptr == hdevice)
return -1;
@ -279,7 +270,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
@ -299,7 +290,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
return 0;
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
fpga_result res;
void* host_ptr;
uint64_t wsid;
@ -367,7 +358,7 @@ extern int vx_buf_release(vx_buffer_h hbuffer) {
return 0;
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
if (nullptr == hdevice)
return -1;
@ -386,7 +377,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
#endif
// to milliseconds
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
uint64_t sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
for (;;) {
uint64_t status;
@ -430,7 +421,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
@ -438,8 +429,8 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
size_t dev_mem_size = LOCAL_MEM_SIZE;
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
// check alignment
if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE))
@ -454,7 +445,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
return -1;
// Ensure ready for new command
if (vx_ready_wait(buffer->hdevice, -1) != 0)
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
return -1;
auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE);
@ -465,13 +456,13 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_WRITE));
// Wait for the write operation to finish
if (vx_ready_wait(buffer->hdevice, -1) != 0)
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
return -1;
return 0;
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
@ -479,8 +470,8 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
size_t dev_mem_size = LOCAL_MEM_SIZE;
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
// check alignment
if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE))
@ -495,7 +486,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
return -1;
// Ensure ready for new command
if (vx_ready_wait(buffer->hdevice, -1) != 0)
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
return -1;
auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE);
@ -506,7 +497,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_READ));
// Wait for the write operation to finish
if (vx_ready_wait(buffer->hdevice, -1) != 0)
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
return -1;
return 0;
@ -519,7 +510,7 @@ extern int vx_start(vx_device_h hdevice) {
vx_device_t *device = ((vx_device_t*)hdevice);
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
if (vx_ready_wait(hdevice, MAX_TIMEOUT) != 0)
return -1;
// start execution

View file

@ -1,6 +1,6 @@
#pragma once
#include <cstdint>
#include <stdint.h>
#ifdef USE_VLSIM
#include <fpga.h>

View file

@ -1,17 +1,29 @@
#include "vx_utils.h"
#include <iostream>
#include <fstream>
#include <cstring>
#include <vortex.h>
#include <VX_config.h>
#include <assert.h>
extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
uint64_t aligned_size(uint64_t size, uint64_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
bool is_aligned(uint64_t addr, uint64_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return 0 == (addr & (alignment - 1));
}
extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint64_t size) {
int err = 0;
if (NULL == content || 0 == size)
return -1;
uint32_t buffer_transfer_size = 65536;
unsigned kernel_base_addr;
uint64_t kernel_base_addr;
err = vx_dev_caps(device, VX_CAPS_KERNEL_BASE_ADDR, &kernel_base_addr);
if (err != 0)
return -1;
@ -29,9 +41,9 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_
// upload content
//
size_t offset = 0;
uint64_t offset = 0;
while (offset < size) {
auto chunk_size = std::min<size_t>(buffer_transfer_size, size - offset);
auto chunk_size = std::min<uint64_t>(buffer_transfer_size, size - offset);
std::memcpy(buf_ptr, (uint8_t*)content + offset, chunk_size);
/*printf("*** Upload Kernel to 0x%0x: data=", kernel_base_addr + offset);
@ -127,7 +139,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
uint64_t mem_lat = 0;
#endif
unsigned num_cores;
uint64_t num_cores;
ret = vx_dev_caps(device, VX_CAPS_MAX_CORES, &num_cores);
if (ret != 0)
return ret;

11
driver/common/vx_utils.h Normal file
View file

@ -0,0 +1,11 @@
#pragma once
#include <cstdint>
uint64_t aligned_size(uint64_t size, uint64_t alignment);
bool is_aligned(uint64_t addr, uint64_t alignment);
#define CACHE_BLOCK_SIZE 64
#define ALLOC_BASE_ADDR 0x00000000
#define LOCAL_MEM_SIZE 4294967296 // 4 GB

View file

@ -2,6 +2,7 @@
#define __VX_DRIVER_H__
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#ifdef __cplusplus
@ -22,9 +23,7 @@ typedef void* vx_buffer_h;
#define VX_CAPS_ALLOC_BASE_ADDR 0x6
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
#define CACHE_BLOCK_SIZE 64
#define ALLOC_BASE_ADDR 0x00000000
#define LOCAL_MEM_SIZE 0xffffffff
#define MAX_TIMEOUT (60*60*1000) // 1hr
// open the device and connect to it
int vx_dev_open(vx_device_h* hdevice);
@ -33,10 +32,10 @@ int vx_dev_open(vx_device_h* hdevice);
int vx_dev_close(vx_device_h hdevice);
// return device configurations
int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value);
int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value);
// Allocate shared buffer with device
int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer);
int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer);
// Get host pointer address
void* vx_host_ptr(vx_buffer_h hbuffer);
@ -45,24 +44,24 @@ void* vx_host_ptr(vx_buffer_h hbuffer);
int vx_buf_release(vx_buffer_h hbuffer);
// allocate device memory and return address
int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr);
int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr);
// Copy bytes from buffer to device local memory
int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset);
int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset);
// Copy bytes from device local memory to buffer
int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dst_offset);
int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t dst_offset);
// Start device execution
int vx_start(vx_device_h hdevice);
// Wait for device ready with milliseconds timeout
int vx_ready_wait(vx_device_h hdevice, long long timeout);
int vx_ready_wait(vx_device_h hdevice, uint64_t timeout);
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
// upload kernel bytes to device
int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size);
int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint64_t size);
// upload kernel file to device
int vx_upload_kernel_file(vx_device_h device, const char* filename);

View file

@ -3,7 +3,7 @@ RTLSIM_DIR = ../../sim/rtlsim
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I../../hw -I$(RTLSIM_DIR) -I$(RTLSIM_DIR)/../common
CXXFLAGS += -I../include -I../common -I../../hw -I$(RTLSIM_DIR) -I$(RTLSIM_DIR)/../common
LDFLAGS += $(RTLSIM_DIR)/librtlsim.a

View file

@ -7,11 +7,14 @@
#include <chrono>
#include <vortex.h>
#include <vx_utils.h>
#include <VX_config.h>
#include <mem.h>
#include <util.h>
#include <simulator.h>
#define RAM_PAGE_SIZE 4096
using namespace vortex;
///////////////////////////////////////////////////////////////////////////////
@ -19,7 +22,7 @@ using namespace vortex;
class vx_device;
class vx_buffer {
public:
vx_buffer(size_t size, vx_device* device)
vx_buffer(uint64_t size, vx_device* device)
: size_(size)
, device_(device) {
auto aligned_asize = aligned_size(size, CACHE_BLOCK_SIZE);
@ -36,7 +39,7 @@ public:
return data_;
}
size_t size() const {
uint64_t size() const {
return size_;
}
@ -45,7 +48,7 @@ public:
}
private:
size_t size_;
uint64_t size_;
vx_device* device_;
void* data_;
};
@ -54,9 +57,10 @@ private:
class vx_device {
public:
vx_device() : ram_((1<<12), (1<<20)) {
mem_allocation_ = ALLOC_BASE_ADDR;
}
vx_device()
: ram_(RAM_PAGE_SIZE)
, mem_allocation_(ALLOC_BASE_ADDR)
{}
~vx_device() {
if (future_.valid()) {
@ -64,9 +68,9 @@ public:
}
}
int alloc_local_mem(size_t size, size_t* dev_maddr) {
auto dev_mem_size = LOCAL_MEM_SIZE;
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) {
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (mem_allocation_ + asize > dev_mem_size)
return -1;
*dev_maddr = mem_allocation_;
@ -74,9 +78,9 @@ public:
return 0;
}
int upload(const void* src, size_t dest_addr, size_t size, size_t src_offset) {
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (dest_addr + asize > ram_.size())
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (dest_addr + asize > LOCAL_MEM_SIZE)
return -1;
/*printf("VXDRV: upload %ld bytes from 0x%lx:", size, uintptr_t((uint8_t*)src + src_offset));
@ -92,9 +96,9 @@ public:
return 0;
}
int download(void* dest, size_t src_addr, size_t size, size_t dest_offset) {
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (src_addr + asize > ram_.size())
int download(void* dest, uint64_t src_addr, uint64_t size, uint64_t dest_offset) {
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (src_addr + asize > LOCAL_MEM_SIZE)
return -1;
ram_.read((uint8_t*)dest + dest_offset, src_addr, asize);
@ -125,10 +129,10 @@ public:
return 0;
}
int wait(long long timeout) {
int wait(uint64_t timeout) {
if (!future_.valid())
return 0;
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
uint64_t timeout_sec = timeout / 1000;
std::chrono::seconds wait_time(1);
for (;;) {
auto status = future_.wait_for(wait_time); // wait for 1 sec and check status
@ -141,9 +145,9 @@ public:
private:
size_t mem_allocation_;
RAM ram_;
Simulator simulator_;
uint64_t mem_allocation_;
std::future<void> future_;
};
@ -177,7 +181,7 @@ AutoPerfDump gAutoPerfDump;
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
if (nullptr == hdevice)
return -1;
@ -198,10 +202,10 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
*value = CACHE_BLOCK_SIZE;
break;
case VX_CAPS_LOCAL_MEM_SIZE:
*value = 0xffffffff;
*value = LOCAL_MEM_SIZE;
break;
case VX_CAPS_ALLOC_BASE_ADDR:
*value = 0x10000000;
*value = ALLOC_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = STARTUP_ADDR;
@ -244,7 +248,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
@ -255,7 +259,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| nullptr == hbuffer)
@ -294,7 +298,7 @@ extern int vx_buf_release(vx_buffer_h hbuffer) {
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
@ -307,7 +311,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
@ -329,7 +333,7 @@ extern int vx_start(vx_device_h hdevice) {
return device->start();
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
if (nullptr == hdevice)
return -1;

View file

@ -4,7 +4,7 @@ CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../include -I../../hw -I$(SIMX_DIR) -I$(SIMX_DIR)/../common
CXXFLAGS += -I../include -I../common -I../../hw -I$(SIMX_DIR) -I$(SIMX_DIR)/../common
CXXFLAGS += $(CONFIGS)
CXXFLAGS += -DDUMP_PERF_STATS

View file

@ -8,11 +8,12 @@
#include <chrono>
#include <vortex.h>
#include <core.h>
#include <vx_utils.h>
#include <processor.h>
#include <VX_config.h>
#include <util.h>
#define PAGE_SIZE 4096
#define RAM_PAGE_SIZE 4096
using namespace vortex;
@ -22,10 +23,10 @@ class vx_device;
class vx_buffer {
public:
vx_buffer(size_t size, vx_device* device)
vx_buffer(uint64_t size, vx_device* device)
: size_(size)
, device_(device) {
auto aligned_asize = aligned_size(size, CACHE_BLOCK_SIZE);
uint64_t aligned_asize = aligned_size(size, CACHE_BLOCK_SIZE);
data_ = malloc(aligned_asize);
}
@ -39,7 +40,7 @@ public:
return data_;
}
size_t size() const {
uint64_t size() const {
return size_;
}
@ -48,7 +49,7 @@ public:
}
private:
size_t size_;
uint64_t size_;
vx_device* device_;
void* data_;
};
@ -59,32 +60,23 @@ class vx_device {
public:
vx_device()
: arch_("rv32i", NUM_CORES, NUM_WARPS, NUM_THREADS)
, decoder_(arch_)
, mmu_(PAGE_SIZE, arch_.wsize(), true)
, cores_(arch_.num_cores())
, is_done_(false)
, is_running_(false)
, mem_allocation_(ALLOC_BASE_ADDR)
, thread_(__thread_proc__, this)
, ram_((1<<12), (1<<20)) {
mem_allocation_ = ALLOC_BASE_ADDR;
mmu_.attach(ram_, 0, 0xffffffff);
for (int i = 0; i < arch_.num_cores(); ++i) {
cores_.at(i) = std::make_shared<Core>(arch_, decoder_, mmu_, i);
}
}
, ram_(RAM_PAGE_SIZE)
{}
~vx_device() {
mutex_.lock();
is_done_ = true;
mutex_.unlock();
mutex_.unlock();
thread_.join();
}
int alloc_local_mem(size_t size, size_t* dev_maddr) {
auto dev_mem_size = LOCAL_MEM_SIZE;
auto asize = aligned_size(size, CACHE_BLOCK_SIZE);
int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) {
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (mem_allocation_ + asize > dev_mem_size)
return -1;
*dev_maddr = mem_allocation_;
@ -92,9 +84,9 @@ public:
return 0;
}
int upload(const void* src, size_t dest_addr, size_t size, size_t src_offset) {
auto asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (dest_addr + asize > ram_.size())
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (dest_addr + asize > LOCAL_MEM_SIZE)
return -1;
ram_.write((const uint8_t*)src + src_offset, dest_addr, asize);
@ -107,9 +99,9 @@ public:
return 0;
}
int download(void* dest, size_t src_addr, size_t size, size_t dest_offset) {
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (src_addr + asize > ram_.size())
int download(void* dest, uint64_t src_addr, uint64_t size, uint64_t dest_offset) {
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (src_addr + asize > LOCAL_MEM_SIZE)
return -1;
ram_.read((uint8_t*)dest + dest_offset, src_addr, asize);
@ -123,19 +115,17 @@ public:
}
int start() {
mutex_.lock();
for (int i = 0; i < arch_.num_cores(); ++i) {
cores_.at(i)->clear();
}
SimPlatform::instance().flush();
processor_ = std::make_shared<Processor>(arch_);
processor_->attach_ram(&ram_);
is_running_ = true;
mutex_.unlock();
return 0;
}
int wait(long long timeout) {
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
int wait(uint64_t timeout) {
uint64_t timeout_sec = timeout / 1000;
for (;;) {
mutex_.lock();
bool is_running = is_running_;
@ -147,32 +137,10 @@ public:
std::this_thread::sleep_for(std::chrono::seconds(1));
}
return 0;
}
int get_csr(int core_id, int addr, unsigned *value) {
*value = cores_.at(core_id)->get_csr(addr, 0, 0);
return 0;
}
int set_csr(int core_id, int addr, unsigned value) {
cores_.at(core_id)->set_csr(addr, value, 0, 0);
return 0;
}
}
private:
void run() {
bool running;
do {
running = false;
for (auto& core : cores_) {
core->step();
if (core->running())
running = true;
}
} while (running);
}
void thread_proc() {
std::cout << "Device ready..." << std::flush << std::endl;
@ -188,7 +156,7 @@ private:
if (is_running) {
std::cout << "Device running..." << std::flush << std::endl;
this->run();
processor_->run();
mutex_.lock();
is_running_ = false;
@ -206,12 +174,10 @@ private:
}
ArchDef arch_;
Decoder decoder_;
MemoryUnit mmu_;
std::vector<std::shared_ptr<Core>> cores_;
Processor::Ptr processor_;
bool is_done_;
bool is_running_;
size_t mem_allocation_;
uint64_t mem_allocation_;
std::thread thread_;
RAM ram_;
std::mutex mutex_;
@ -251,6 +217,9 @@ extern int vx_dev_open(vx_device_h* hdevice) {
if (nullptr == hdevice)
return -1;
if (!SimPlatform::instance().initialize())
return -1;
*hdevice = new vx_device();
#ifdef DUMP_PERF_STATS
@ -273,10 +242,12 @@ extern int vx_dev_close(vx_device_h hdevice) {
delete device;
SimPlatform::instance().finalize();
return 0;
}
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
if (nullptr == hdevice)
return -1;
@ -314,7 +285,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
@ -324,7 +295,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
return device->alloc_local_mem(size, dev_maddr);
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| nullptr == hbuffer)
@ -363,7 +334,7 @@ extern int vx_buf_release(vx_buffer_h hbuffer) {
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
@ -376,7 +347,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
@ -398,7 +369,7 @@ extern int vx_start(vx_device_h hdevice) {
return device->start();
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
if (nullptr == hdevice)
return -1;

View file

@ -8,15 +8,15 @@ extern int vx_dev_close(vx_device_h /*hdevice*/) {
return -1;
}
extern int vx_dev_caps(vx_device_h /*hdevice*/, unsigned /*caps_id*/, unsigned* /*value*/) {
extern int vx_dev_caps(vx_device_h /*hdevice*/, uint32_t /*caps_id*/, uint64_t* /*value*/) {
return -1;
}
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, size_t /*size*/, size_t* /*dev_maddr*/) {
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, uint64_t /*size*/, uint64_t* /*dev_maddr*/) {
return -1;
}
extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, size_t /*size*/, vx_buffer_h* /*hbuffer*/) {
extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, uint64_t /*size*/, vx_buffer_h* /*hbuffer*/) {
return -1;
}
@ -28,11 +28,11 @@ extern int vx_buf_release(vx_buffer_h /*hbuffer*/) {
return -1;
}
extern int vx_copy_to_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*src_offset*/) {
extern int vx_copy_to_dev(vx_buffer_h /*hbuffer*/, uint64_t /*dev_maddr*/, uint64_t /*size*/, uint64_t /*src_offset*/) {
return -1;
}
extern int vx_copy_from_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*dest_offset*/) {
extern int vx_copy_from_dev(vx_buffer_h /*hbuffer*/, uint64_t /*dev_maddr*/, uint64_t /*size*/, uint64_t /*dest_offset*/) {
return -1;
}
@ -40,6 +40,6 @@ extern int vx_start(vx_device_h /*hdevice*/) {
return -1;
}
extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
extern int vx_ready_wait(vx_device_h /*hdevice*/, uint64_t /*timeout*/) {
return -1;
}

View file

@ -168,11 +168,12 @@ void MemoryUnit::tlbRm(uint64_t va) {
///////////////////////////////////////////////////////////////////////////////
RAM::RAM(uint32_t num_pages, uint32_t page_size)
: page_bits_(log2ceil(page_size)) {
assert(ispow2(page_size));
mem_.resize(num_pages, NULL);
size_ = uint64_t(mem_.size()) << page_bits_;
RAM::RAM(uint32_t page_size)
: size_(0)
, page_bits_(log2ceil(page_size))
, last_page_(nullptr)
, last_page_index_(0) {
assert(ispow2(page_size));
}
RAM::~RAM() {
@ -180,31 +181,41 @@ RAM::~RAM() {
}
void RAM::clear() {
for (auto& page : mem_) {
delete[] page;
page = NULL;
for (auto& page : pages_) {
delete[] page.second;
}
}
uint64_t RAM::size() const {
return size_;
return uint64_t(pages_.size()) << page_bits_;
}
uint8_t *RAM::get(uint32_t address) const {
uint32_t page_size = 1 << page_bits_;
uint32_t page_index = address >> page_bits_;
uint32_t byte_offset = address & ((1 << page_bits_) - 1);
uint8_t *RAM::get(uint64_t address) const {
uint32_t page_size = 1 << page_bits_;
uint32_t page_offset = address & (page_size - 1);
uint64_t page_index = address >> page_bits_;
auto &page = mem_.at(page_index);
if (page == NULL) {
uint8_t *ptr = new uint8_t[page_size];
// set uninitialized data to "baadf00d"
for (uint32_t i = 0; i < page_size; ++i) {
ptr[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff;
uint8_t* page;
if (last_page_ && last_page_index_ == page_index) {
page = last_page_;
} else {
auto it = pages_.find(page_index);
if (it != pages_.end()) {
page = it->second;
} else {
uint8_t *ptr = new uint8_t[page_size];
// set uninitialized data to "baadf00d"
for (uint32_t i = 0; i < page_size; ++i) {
ptr[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff;
}
pages_.emplace(page_index, ptr);
page = ptr;
}
page = ptr;
last_page_ = page;
last_page_index_ = page_index;
}
return page + byte_offset;
return page + page_offset;
}
void RAM::read(void *data, uint64_t addr, uint64_t size) {

View file

@ -130,13 +130,13 @@ private:
class RAM : public MemDevice {
public:
RAM(uint32_t num_pages, uint32_t page_size);
RAM(uint32_t page_size);
~RAM();
void clear();
uint64_t size() const override;
void read(void *data, uint64_t addr, uint64_t size) override;
void write(const void *data, uint64_t addr, uint64_t size) override;
@ -153,11 +153,13 @@ public:
private:
uint8_t *get(uint32_t address) const;
uint8_t *get(uint64_t address) const;
mutable std::vector<uint8_t*> mem_;
uint32_t page_bits_;
uint64_t size_;
uint32_t page_bits_;
mutable std::unordered_map<uint64_t, uint8_t*> pages_;
mutable uint8_t* last_page_;
mutable uint64_t last_page_index_;
};
} // namespace vortex

View file

@ -282,6 +282,10 @@ public:
return true;
}
void flush() {
instance().clear();
}
void finalize() {
instance().clear();
}

View file

@ -75,11 +75,6 @@ inline uint64_t bit_getw(uint64_t bits, uint32_t start, uint32_t end) {
return (bits << shift) >> (shift + start);
}
inline uint64_t aligned_size(uint64_t size, uint32_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
// Apply integer sign extension
inline uint32_t sext32(uint32_t word, uint32_t width) {
assert(width > 1);

View file

@ -7,6 +7,8 @@
#include <mem.h>
#include "simulator.h"
#define RAM_PAGE_SIZE 4096
using namespace vortex;
static void show_usage() {
@ -49,7 +51,7 @@ int main(int argc, char **argv) {
for (auto program : programs) {
std::cout << "Running " << program << "..." << std::endl;
vortex::RAM ram((1<<12), (1<<20));
vortex::RAM ram(RAM_PAGE_SIZE);
vortex::Simulator simulator;
simulator.attach_ram(&ram);

View file

@ -477,7 +477,7 @@ void Simulator::eval_mem_bus(bool clk) {
uint8_t* data = (uint8_t*)(vl_obj_->device->mem_req_data);
if (base_addr >= IO_COUT_ADDR
&& base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
for (int i = 0; i < IO_COUT_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];

View file

@ -169,7 +169,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
std::cout << "start execution" << std::endl;
auto t2 = std::chrono::high_resolution_clock::now();
RT_CHECK(vx_start(device));
RT_CHECK(vx_ready_wait(device, -1));
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
auto t3 = std::chrono::high_resolution_clock::now();
// read destination buffer from local memory
@ -228,7 +228,7 @@ int main(int argc, char *argv[]) {
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores;
uint64_t max_cores;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
uint32_t num_points = count;
uint32_t num_blocks = (num_points * sizeof(int32_t) + 63) / 64;

View file

@ -67,7 +67,7 @@ int run_test(const kernel_arg_t& kernel_arg,
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
// download destination buffer
std::cout << "download destination buffer" << std::endl;
@ -112,7 +112,7 @@ int main(int argc, char *argv[]) {
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores, max_warps, max_threads;
uint64_t max_cores, max_warps, max_threads;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));

View file

@ -121,7 +121,7 @@ int run_test(const kernel_arg_t& kernel_arg,
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
// download destination buffer
std::cout << "download destination buffer" << std::endl;

View file

@ -168,7 +168,7 @@ int main(int argc, char *argv[]) {
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores, max_warps, max_threads;
uint64_t max_cores, max_warps, max_threads;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
@ -245,7 +245,7 @@ int main(int argc, char *argv[]) {
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
// download destination buffer
std::cout << "download destination buffer" << std::endl;

View file

@ -67,7 +67,7 @@ int run_test(const kernel_arg_t& kernel_arg,
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
// download destination buffer
std::cout << "download destination buffer" << std::endl;
@ -112,7 +112,7 @@ int main(int argc, char *argv[]) {
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores, max_warps, max_threads;
uint64_t max_cores, max_warps, max_threads;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));

View file

@ -101,7 +101,7 @@ int run_test(const kernel_arg_t& kernel_arg,
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
// download destination buffer
std::cout << "download destination buffer" << std::endl;

View file

@ -136,7 +136,7 @@ int run_test(const kernel_arg_t& kernel_arg,
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
// download destination buffer
std::cout << "download destination buffer" << std::endl;
@ -194,7 +194,7 @@ int main(int argc, char *argv[]) {
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores, max_warps, max_threads;
uint64_t max_cores, max_warps, max_threads;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));

View file

@ -67,7 +67,7 @@ int run_test(const kernel_arg_t& kernel_arg,
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
// download destination buffer
std::cout << "download destination buffer" << std::endl;

View file

@ -67,7 +67,7 @@ int run_test(const kernel_arg_t& kernel_arg,
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
// download destination buffer
std::cout << "download destination buffer" << std::endl;

View file

@ -65,7 +65,7 @@ int run_test() {
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
return 0;
}
@ -85,7 +85,7 @@ int main(int argc, char *argv[]) {
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores, max_warps, max_threads;
uint64_t max_cores, max_warps, max_threads;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));

View file

@ -110,7 +110,7 @@ int run_test(const kernel_arg_t& kernel_arg,
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
auto time_end = std::chrono::high_resolution_clock::now();
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
@ -166,7 +166,7 @@ int main(int argc, char *argv[]) {
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores, max_warps, max_threads;
uint64_t max_cores, max_warps, max_threads;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));