// Copyright © 2019-2023 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #ifdef SCOPE #include "scope.h" #endif // XRT includes #ifdef XRTSIM #include #else #include "experimental/xrt_bo.h" #include "experimental/xrt_device.h" #include "experimental/xrt_error.h" #include "experimental/xrt_ip.h" #include "experimental/xrt_kernel.h" #include "experimental/xrt_xclbin.h" #endif #include #include #include #include #include #include using namespace vortex; #ifndef XRTSIM #define CPP_API #endif // #define BANK_INTERLEAVE #define MMIO_CTL_ADDR 0x00 #define MMIO_DEV_ADDR 0x10 #define MMIO_ISA_ADDR 0x18 #define MMIO_DCR_ADDR 0x20 #define MMIO_SCP_ADDR 0x28 #define MMIO_MEM_ADDR 0x30 #define CTL_AP_START (1 << 0) #define CTL_AP_DONE (1 << 1) #define CTL_AP_IDLE (1 << 2) #define CTL_AP_READY (1 << 3) #define CTL_AP_RESET (1 << 4) #define CTL_AP_RESTART (1 << 7) #ifdef CPP_API typedef xrt::device xrt_device_t; typedef xrt::ip xrt_kernel_t; typedef xrt::bo xrt_buffer_t; #else typedef xrtDeviceHandle xrt_device_t; typedef xrtKernelHandle xrt_kernel_t; typedef xrtBufferHandle xrt_buffer_t; #endif #define DEFAULT_DEVICE_INDEX 0 #define DEFAULT_XCLBIN_PATH "vortex_afu.xclbin" #define KERNEL_NAME "vortex_afu" #define CHECK_HANDLE(handle, _expr, _cleanup) \ auto handle = _expr; \ if (handle == nullptr) { \ printf("[VXDRV] Error: '%s' returned NULL!\n", #_expr); \ _cleanup \ } #ifndef CPP_API static void dump_xrt_error(xrtDeviceHandle xrtDevice, xrtErrorCode err) { size_t len = 0; xrtErrorGetString(xrtDevice, err, nullptr, 0, &len); std::vector buf(len); xrtErrorGetString(xrtDevice, err, buf.data(), buf.size(), nullptr); printf("[VXDRV] detail: %s!\n", buf.data()); } #endif /////////////////////////////////////////////////////////////////////////////// class vx_device { public: vx_device() : global_mem_(ALLOC_BASE_ADDR, GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR, RAM_PAGE_SIZE, CACHE_BLOCK_SIZE) #ifndef CPP_API , xrtDevice_(nullptr) , xrtKernel_(nullptr) #endif {} ~vx_device() { #ifdef SCOPE vx_scope_stop(this); #endif #ifndef CPP_API for (auto &entry : xrtBuffers_) { #ifdef BANK_INTERLEAVE xrtBOFree(entry); #else xrtBOFree(entry.second.xrtBuffer); #endif } if (xrtKernel_) { xrtKernelClose(xrtKernel_); } if (xrtDevice_) { xrtDeviceClose(xrtDevice_); } #endif } int init() { int device_index = DEFAULT_DEVICE_INDEX; const char *device_index_s = getenv("XRT_DEVICE_INDEX"); if (device_index_s != nullptr) { device_index = atoi(device_index_s); } const char *xlbin_path_s = getenv("XRT_XCLBIN_PATH"); if (xlbin_path_s == nullptr) { xlbin_path_s = DEFAULT_XCLBIN_PATH; } #ifdef CPP_API auto xrtDevice = xrt::device(device_index); auto uuid = xrtDevice.load_xclbin(xlbin_path_s); auto xrtKernel = xrt::ip(xrtDevice, uuid, KERNEL_NAME); auto xclbin = xrt::xclbin(xlbin_path_s); auto device_name = xrtDevice.get_info(); #else CHECK_HANDLE(xrtDevice, xrtDeviceOpen(device_index), { return -1; }); #ifndef XRTSIM CHECK_ERR(xrtDeviceLoadXclbinFile(xrtDevice, xlbin_path_s), { dump_xrt_error(xrtDevice, err); xrtDeviceClose(xrtDevice); return err; }); xuid_t uuid; CHECK_ERR(xrtDeviceGetXclbinUUID(xrtDevice, uuid), { dump_xrt_error(xrtDevice, err); xrtDeviceClose(xrtDevice); return err; }); CHECK_HANDLE(xrtKernel, xrtPLKernelOpenExclusive(xrtDevice, uuid, KERNEL_NAME), { xrtDeviceClose(xrtDevice); return -1; }); #else xrtKernelHandle xrtKernel = xrtDevice; #endif // get device name int device_name_size; xrtXclbinGetXSAName(xrtDevice, nullptr, 0, &device_name_size); std::vector sz_device_name(device_name_size); xrtXclbinGetXSAName(xrtDevice, sz_device_name.data(), device_name_size, nullptr); std::string device_name(sz_device_name.data(), device_name_size); #endif xrtDevice_ = xrtDevice; xrtKernel_ = xrtKernel; CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_RESET), { return err; }); CHECK_ERR(this->read_register(MMIO_DEV_ADDR, (uint32_t *)&dev_caps_), { return err; }); CHECK_ERR(this->read_register(MMIO_DEV_ADDR + 4, (uint32_t *)&dev_caps_ + 1), { return err; }); CHECK_ERR(this->read_register(MMIO_ISA_ADDR, (uint32_t *)&isa_caps_), { return err; }); CHECK_ERR(this->read_register(MMIO_ISA_ADDR + 4, (uint32_t *)&isa_caps_ + 1), { return err; }); uint64_t num_banks; this->get_caps(VX_CAPS_NUM_MEM_BANKS, &num_banks); lg2_num_banks_ = log2ceil(num_banks); uint64_t bank_size; this->get_caps(VX_CAPS_MEM_BANK_SIZE, &bank_size); lg2_bank_size_ = log2ceil(bank_size); global_mem_size_ = num_banks * bank_size; printf("info: device name=%s, memory_capacity=0x%lx bytes, memory_banks=%ld.\n", device_name.c_str(), global_mem_size_, num_banks); #ifdef BANK_INTERLEAVE xrtBuffers_.reserve(num_banks); for (uint32_t i = 0; i < num_banks; ++i) { #ifdef CPP_API xrtBuffers_.emplace_back(xrtDevice_, bank_size, xrt::bo::flags::normal, i); #else CHECK_HANDLE(xrtBuffer, xrtBOAlloc(xrtDevice_, bank_size, XRT_BO_FLAGS_NONE, i), { return -1; }); xrtBuffers_.push_back(xrtBuffer); #endif printf("*** allocated bank%u/%u, size=%lu\n", i, num_banks, bank_size); } #endif #ifdef SCOPE { scope_callback_t callback; callback.registerWrite = [](vx_device_h hdevice, uint64_t value) -> int { auto device = (vx_device *)hdevice; uint32_t value_lo = (uint32_t)(value); uint32_t value_hi = (uint32_t)(value >> 32); CHECK_ERR(device->write_register(MMIO_SCP_ADDR, value_lo), { return err; }); CHECK_ERR(device->write_register(MMIO_SCP_ADDR + 4, value_hi), { return err; }); return 0; }; callback.registerRead = [](vx_device_h hdevice, uint64_t *value) -> int { auto device = (vx_device *)hdevice; uint32_t value_lo, value_hi; CHECK_ERR(device->read_register(MMIO_SCP_ADDR, &value_lo), { return err; }); CHECK_ERR(device->read_register(MMIO_SCP_ADDR + 4, &value_hi), { return err; }); *value = (((uint64_t)value_hi) << 32) | value_lo; return 0; }; CHECK_ERR(vx_scope_start(&callback, this, -1, -1), { return err; }); } #endif #ifdef CHIPSCOPE std::cout << "\nPress ENTER to continue after setting up ILA trigger..." << std::endl; std::cin.ignore(std::numeric_limits::max(), '\n'); #endif return 0; } int get_caps(uint32_t caps_id, uint64_t *value) { uint64_t _value; switch (caps_id) { case VX_CAPS_VERSION: _value = (dev_caps_ >> 0) & 0xff; break; case VX_CAPS_NUM_THREADS: _value = (dev_caps_ >> 8) & 0xff; break; case VX_CAPS_NUM_WARPS: _value = (dev_caps_ >> 16) & 0xff; break; case VX_CAPS_NUM_CORES: _value = (dev_caps_ >> 24) & 0xffff; break; case VX_CAPS_CACHE_LINE_SIZE: _value = CACHE_BLOCK_SIZE; break; case VX_CAPS_GLOBAL_MEM_SIZE: _value = global_mem_size_; break; case VX_CAPS_LOCAL_MEM_SIZE: _value = 1ull << ((dev_caps_ >> 40) & 0xff); break; case VX_CAPS_ISA_FLAGS: _value = isa_caps_; break; case VX_CAPS_NUM_MEM_BANKS: _value = 1 << ((dev_caps_ >> 48) & 0x7); break; case VX_CAPS_MEM_BANK_SIZE: _value = 1ull << (20 + ((dev_caps_ >> 51) & 0x1f)); break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); std::abort(); return -1; } *value = _value; return 0; } int mem_alloc(uint64_t size, int flags, uint64_t *dev_addr) { uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); uint64_t addr; CHECK_ERR(global_mem_.allocate(asize, &addr), { return err; }); #ifndef BANK_INTERLEAVE uint32_t bank_id; CHECK_ERR(this->get_bank_info(addr, &bank_id, nullptr), { global_mem_.release(addr); return err; }); CHECK_ERR(get_buffer(bank_id, nullptr), { global_mem_.release(addr); return err; }); #endif CHECK_ERR(this->mem_access(addr, size, flags), { global_mem_.release(addr); return err; }); *dev_addr = addr; return 0; } int mem_reserve(uint64_t dev_addr, uint64_t size, int flags) { CHECK_ERR(global_mem_.reserve(dev_addr, size), { return err; }); #ifndef BANK_INTERLEAVE uint32_t bank_id; CHECK_ERR(this->get_bank_info(dev_addr, &bank_id, nullptr), { global_mem_.release(dev_addr); return err; }); CHECK_ERR(get_buffer(bank_id, nullptr), { global_mem_.release(dev_addr); return err; }); #endif CHECK_ERR(this->mem_access(dev_addr, size, flags), { global_mem_.release(dev_addr); return err; }); return 0; } int mem_free(uint64_t dev_addr) { CHECK_ERR(global_mem_.release(dev_addr), { return err; }); #ifdef BANK_INTERLEAVE if (0 == global_mem_.allocated()) { #ifndef CPP_API for (auto &entry : xrtBuffers_) { xrtBOFree(entry); } #endif xrtBuffers_.clear(); } #else uint32_t bank_id; CHECK_ERR(this->get_bank_info(dev_addr, &bank_id, nullptr), { return err; }); auto it = xrtBuffers_.find(bank_id); if (it != xrtBuffers_.end()) { auto count = --it->second.count; if (0 == count) { printf("freeing bank%d...\n", bank_id); #ifndef CPP_API xrtBOFree(it->second.xrtBuffer); #endif xrtBuffers_.erase(it); } } else { fprintf(stderr, "[VXDRV] Error: invalid device memory address: 0x%lx\n", dev_addr); return -1; } #endif return 0; } int mem_access(uint64_t /*dev_addr*/, uint64_t /*size*/, int /*flags*/) { return 0; } int mem_info(uint64_t *mem_free, uint64_t *mem_used) const { if (mem_free) *mem_free = global_mem_.free(); if (mem_used) *mem_used = global_mem_.allocated(); return 0; } int write_register(uint32_t addr, uint32_t value) { #ifdef CPP_API xrtKernel_.write_register(addr, value); #else CHECK_ERR(xrtKernelWriteRegister(xrtKernel_, addr, value), { dump_xrt_error(xrtDevice_, err); return err; }); #endif return 0; } int read_register(uint32_t addr, uint32_t *value) { #ifdef CPP_API *value = xrtKernel_.read_register(addr); #else CHECK_ERR(xrtKernelReadRegister(xrtKernel_, addr, value), { dump_xrt_error(xrtDevice_, err); return err; }); #endif return 0; } int upload(uint64_t dev_addr, const void *src, uint64_t size) { auto host_ptr = (const uint8_t *)src; // check alignment if (!is_aligned(dev_addr, CACHE_BLOCK_SIZE)) return -1; auto asize = aligned_size(size, CACHE_BLOCK_SIZE); // bound checking if (dev_addr + asize > global_mem_size_) return -1; for (uint64_t end = dev_addr + asize; dev_addr < end; dev_addr += CACHE_BLOCK_SIZE, host_ptr += CACHE_BLOCK_SIZE) { #ifdef BANK_INTERLEAVE asize = CACHE_BLOCK_SIZE; #else end = 0; #endif uint32_t bo_index; uint64_t bo_offset; xrt_buffer_t xrtBuffer; CHECK_ERR(this->get_bank_info(dev_addr, &bo_index, &bo_offset), { return err; }); CHECK_ERR(this->get_buffer(bo_index, &xrtBuffer), { return err; }); #ifdef CPP_API xrtBuffer.write(host_ptr, size, bo_offset); xrtBuffer.sync(XCL_BO_SYNC_BO_TO_DEVICE, size, bo_offset); #else CHECK_ERR(xrtBOWrite(xrtBuffer, host_ptr, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_TO_DEVICE, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); #endif } return 0; } int download(void *dest, uint64_t dev_addr, uint64_t size) { auto host_ptr = (uint8_t *)dest; // check alignment if (!is_aligned(dev_addr, CACHE_BLOCK_SIZE)) return -1; auto asize = aligned_size(size, CACHE_BLOCK_SIZE); // bound checking if (dev_addr + asize > global_mem_size_) return -1; for (uint64_t end = dev_addr + asize; dev_addr < end; dev_addr += CACHE_BLOCK_SIZE, host_ptr += CACHE_BLOCK_SIZE) { #ifdef BANK_INTERLEAVE asize = CACHE_BLOCK_SIZE; #else end = 0; #endif uint32_t bo_index; uint64_t bo_offset; xrt_buffer_t xrtBuffer; CHECK_ERR(this->get_bank_info(dev_addr, &bo_index, &bo_offset), { return err; }); CHECK_ERR(this->get_buffer(bo_index, &xrtBuffer), { return err; }); #ifdef CPP_API xrtBuffer.sync(XCL_BO_SYNC_BO_FROM_DEVICE, size, bo_offset); xrtBuffer.read(host_ptr, size, bo_offset); #else CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_FROM_DEVICE, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); CHECK_ERR(xrtBORead(xrtBuffer, host_ptr, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); #endif } return 0; } int start(uint64_t krnl_addr, uint64_t args_addr) { // set kernel info CHECK_ERR(this->dcr_write(VX_DCR_BASE_STARTUP_ADDR0, krnl_addr & 0xffffffff), { return err; }); CHECK_ERR(this->dcr_write(VX_DCR_BASE_STARTUP_ADDR1, krnl_addr >> 32), { return err; }); CHECK_ERR(this->dcr_write(VX_DCR_BASE_STARTUP_ARG0, args_addr & 0xffffffff), { return err; }); CHECK_ERR(this->dcr_write(VX_DCR_BASE_STARTUP_ARG1, args_addr >> 32), { return err; }); // start execution CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_START), { return err; }); // clear mpm cache mpm_cache_.clear(); return 0; } int ready_wait(uint64_t timeout) { struct timespec sleep_time; #ifndef NDEBUG sleep_time.tv_sec = 1; sleep_time.tv_nsec = 0; #else sleep_time.tv_sec = 0; sleep_time.tv_nsec = 1000000; #endif // to milliseconds uint64_t sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000); for (;;) { uint32_t status = 0; CHECK_ERR(this->read_register(MMIO_CTL_ADDR, &status), { return err; }); bool is_done = (status & CTL_AP_DONE) == CTL_AP_DONE; if (is_done) break; if (0 == timeout) { return -1; } nanosleep(&sleep_time, nullptr); timeout -= sleep_time_ms; }; return 0; } int dcr_write(uint32_t addr, uint32_t value) { CHECK_ERR(this->write_register(MMIO_DCR_ADDR, addr), { return err; }); CHECK_ERR(this->write_register(MMIO_DCR_ADDR + 4, value), { return err; }); dcrs_.write(addr, value); return 0; } int dcr_read(uint32_t addr, uint32_t *value) const { return dcrs_.read(addr, value); } int mpm_query(uint32_t addr, uint32_t core_id, uint64_t *value) { uint32_t offset = addr - VX_CSR_MPM_BASE; if (offset > 31) return -1; if (mpm_cache_.count(core_id) == 0) { uint64_t mpm_mem_addr = IO_MPM_ADDR + core_id * 32 * sizeof(uint64_t); CHECK_ERR(this->download(mpm_cache_[core_id].data(), mpm_mem_addr, 32 * sizeof(uint64_t)), { return err; }); } *value = mpm_cache_.at(core_id).at(offset); return 0; } private: MemoryAllocator global_mem_; xrt_device_t xrtDevice_; xrt_kernel_t xrtKernel_; uint64_t dev_caps_; uint64_t isa_caps_; uint64_t global_mem_size_; DeviceConfig dcrs_; std::unordered_map> mpm_cache_; uint32_t lg2_num_banks_; uint32_t lg2_bank_size_; #ifdef BANK_INTERLEAVE std::vector xrtBuffers_; int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) { uint32_t num_banks = 1 << lg2_num_banks_; uint64_t block_addr = addr / CACHE_BLOCK_SIZE; uint32_t index = block_addr & (num_banks - 1); uint64_t offset = (block_addr >> lg2_num_banks_) * CACHE_BLOCK_SIZE; if (pIdx) { *pIdx = index; } if (pOff) { *pOff = offset; } //printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, offset); return 0; } int get_buffer(uint32_t bank_id, xrt_buffer_t *pBuf) { if (pBuf) { *pBuf = xrtBuffers_.at(bank_id); } return 0; } #else struct buf_cnt_t { xrt_buffer_t xrtBuffer; uint32_t count; }; std::unordered_map xrtBuffers_; int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) { uint32_t num_banks = 1 << lg2_num_banks_; uint64_t bank_size = 1ull << lg2_bank_size_; uint32_t index = addr >> lg2_bank_size_; uint64_t offset = addr & (bank_size - 1); if (index > num_banks) { fprintf(stderr, "[VXDRV] Error: address out of range: 0x%lx\n", addr); return -1; } if (pIdx) { *pIdx = index; } if (pOff) { *pOff = offset; } //printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, offset); return 0; } int get_buffer(uint32_t bank_id, xrt_buffer_t *pBuf) { auto it = xrtBuffers_.find(bank_id); if (it != xrtBuffers_.end()) { if (pBuf) { *pBuf = it->second.xrtBuffer; } else { printf("reusing bank%d...\n", bank_id); ++it->second.count; } } else { printf("allocating bank%d...\n", bank_id); uint64_t bank_size = 1ull << lg2_bank_size_; #ifdef CPP_API xrt::bo xrtBuffer(xrtDevice_, bank_size, xrt::bo::flags::normal, bank_id); #else CHECK_HANDLE(xrtBuffer, xrtBOAlloc(xrtDevice_, bank_size, XRT_BO_FLAGS_NONE, bank_id), { return -1; }); #endif xrtBuffers_.insert({bank_id, {xrtBuffer, 1}}); if (pBuf) { *pBuf = xrtBuffer; } } return 0; } #endif }; #include