mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
get device caps from CSRs
This commit is contained in:
parent
f66c251309
commit
2de61b5982
13 changed files with 534 additions and 275 deletions
|
@ -4,31 +4,6 @@
|
|||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
|
||||
extern int vx_dev_caps(int caps_id) {
|
||||
switch (caps_id) {
|
||||
case VX_CAPS_VERSION:
|
||||
return 0;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
return NUM_CORES;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
return NUM_WARPS;
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
return NUM_THREADS;
|
||||
case VX_CAPS_CACHE_LINESIZE:
|
||||
return 64;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
return 0xffffffff;
|
||||
case VX_CAPS_ALLOC_BASE_ADDR:
|
||||
return 0x10000000;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
return 0x80000000;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
std::abort();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
|
||||
int err = 0;
|
||||
|
||||
|
@ -36,7 +11,10 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_
|
|||
return -1;
|
||||
|
||||
uint32_t buffer_transfer_size = 65536;
|
||||
uint32_t kernel_base_addr = vx_dev_caps(VX_CAPS_KERNEL_BASE_ADDR);
|
||||
unsigned kernel_base_addr;
|
||||
err = vx_dev_caps(device, VX_CAPS_KERNEL_BASE_ADDR, &kernel_base_addr);
|
||||
if (err != 0)
|
||||
return -1;
|
||||
|
||||
// allocate device buffer
|
||||
vx_buffer_h buffer;
|
||||
|
@ -47,7 +25,7 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_
|
|||
// get buffer address
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
|
||||
|
||||
#if defined(USE_SIMX)
|
||||
#if defined(USE_SIMX)
|
||||
// default startup routine
|
||||
((uint32_t*)buf_ptr)[0] = 0xf1401073;
|
||||
((uint32_t*)buf_ptr)[1] = 0xf1401073;
|
||||
|
|
|
@ -21,15 +21,15 @@ typedef void* vx_buffer_h;
|
|||
#define VX_CAPS_ALLOC_BASE_ADDR 0x6
|
||||
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
|
||||
|
||||
// return device configurations
|
||||
int vx_dev_caps(int caps_id);
|
||||
|
||||
// open the device and connect to it
|
||||
int vx_dev_open(vx_device_h* hdevice);
|
||||
|
||||
// Close the device when all the operations are done
|
||||
int vx_dev_close(vx_device_h hdevice);
|
||||
|
||||
// return device configurations
|
||||
int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value);
|
||||
|
||||
// Allocate shared buffer with device
|
||||
int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer);
|
||||
|
||||
|
@ -58,10 +58,10 @@ int vx_start(vx_device_h hdevice);
|
|||
int vx_ready_wait(vx_device_h hdevice, long long timeout);
|
||||
|
||||
// set device constant registers
|
||||
int vx_set_regiters(int state, int value);
|
||||
int vx_csr_set(vx_device_h hdevice, int address, int value);
|
||||
|
||||
// get device constant registers
|
||||
int vx_get_regiters(int state, int* value);
|
||||
int vx_csr_get(vx_device_h hdevice, int address, int* value);
|
||||
|
||||
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
|
||||
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
return -1; \
|
||||
} while (false)
|
||||
|
||||
#define MMIO_CSR_SCOPE_CMD (AFU_IMAGE_MMIO_CSR_SCOPE_CMD * 4)
|
||||
#define MMIO_CSR_SCOPE_DATA (AFU_IMAGE_MMIO_CSR_SCOPE_DATA * 4)
|
||||
#define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4)
|
||||
#define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4)
|
||||
|
||||
struct scope_signal_t {
|
||||
int width;
|
||||
|
@ -136,7 +136,7 @@ int vx_scope_start(fpga_handle hfpga, uint64_t delay) {
|
|||
if (delay != uint64_t(-1)) {
|
||||
// set start delay
|
||||
uint64_t cmd_delay = ((delay << 3) | 4);
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, cmd_delay));
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_delay));
|
||||
std::cout << "scope start delay: " << delay << std::endl;
|
||||
}
|
||||
|
||||
|
@ -150,7 +150,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
|
|||
if (delay != uint64_t(-1)) {
|
||||
// stop recording
|
||||
uint64_t cmd_stop = ((delay << 3) | 5);
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, cmd_stop));
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop));
|
||||
std::cout << "scope stop delay: " << delay << std::endl;
|
||||
}
|
||||
|
||||
|
@ -170,9 +170,9 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
|
|||
uint64_t frame_width, max_frames, data_valid;
|
||||
|
||||
// wait for recording to terminate
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 0));
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 0));
|
||||
do {
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
|
||||
if (data_valid)
|
||||
break;
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
|
@ -180,15 +180,15 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
|
|||
|
||||
std::cout << "scope trace dump begin..." << std::endl;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 2));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &frame_width));
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 2));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &frame_width));
|
||||
std::cout << "scope::frame_width=" << std::dec << frame_width << std::endl;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 3));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &max_frames));
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 3));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &max_frames));
|
||||
std::cout << "scope::max_frames=" << std::dec << max_frames << std::endl;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 1));
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 1));
|
||||
|
||||
if (fwidth != (int)frame_width) {
|
||||
std::cerr << "invalid frame_width: expecting " << std::dec << fwidth << "!" << std::endl;
|
||||
|
@ -209,7 +209,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
|
|||
ofs << "b1 0" << std::endl;
|
||||
|
||||
uint64_t delta;
|
||||
fpga_result res = fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &delta);
|
||||
fpga_result res = fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta);
|
||||
assert(res == FPGA_OK);
|
||||
|
||||
while (delta != 0) {
|
||||
|
@ -228,14 +228,14 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
|
|||
do {
|
||||
if (frame_no == (max_frames-1)) {
|
||||
// verify last frame is valid
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 0));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid));
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 0));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
|
||||
assert(data_valid == 1);
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 1));
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 1));
|
||||
}
|
||||
|
||||
uint64_t word;
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &word));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &word));
|
||||
|
||||
do {
|
||||
int signal_width = scope_signals[signal_id-1].width;
|
||||
|
@ -267,8 +267,8 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
|
|||
std::cout << "scope trace dump done! - " << (timestamp/2) << " cycles" << std::endl;
|
||||
|
||||
// verify data not valid
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 0));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid));
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 0));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
|
||||
assert(data_valid == 0);
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -1,17 +1,24 @@
|
|||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cstdlib>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <cmath>
|
||||
#include <uuid/uuid.h>
|
||||
#include <opae/fpga.h>
|
||||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
#include "vortex_afu.h"
|
||||
#ifdef SCOPE
|
||||
#include "scope.h"
|
||||
#endif
|
||||
|
||||
#define CACHE_LINESIZE 64
|
||||
#define ALLOC_BASE_ADDR 0x10000000
|
||||
#define LOCAL_MEM_SIZE 0xffffffff
|
||||
|
||||
#define CHECK_RES(_expr) \
|
||||
do { \
|
||||
fpga_result res = _expr; \
|
||||
|
@ -24,22 +31,31 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define CMD_TYPE_READ AFU_IMAGE_CMD_TYPE_READ
|
||||
#define CMD_TYPE_WRITE AFU_IMAGE_CMD_TYPE_WRITE
|
||||
#define CMD_TYPE_RUN AFU_IMAGE_CMD_TYPE_RUN
|
||||
#define CMD_TYPE_CLFLUSH AFU_IMAGE_CMD_TYPE_CLFLUSH
|
||||
#define CMD_MEM_READ AFU_IMAGE_CMD_MEM_READ
|
||||
#define CMD_MEM_WRITE AFU_IMAGE_CMD_MEM_WRITE
|
||||
#define CMD_RUN AFU_IMAGE_CMD_RUN
|
||||
#define CMD_CLFLUSH AFU_IMAGE_CMD_CLFLUSH
|
||||
#define CMD_CSR_READ AFU_IMAGE_CMD_CSR_READ
|
||||
#define CMD_CSR_WRITE AFU_IMAGE_CMD_CSR_WRITE
|
||||
|
||||
#define MMIO_CSR_CMD (AFU_IMAGE_MMIO_CSR_CMD * 4)
|
||||
#define MMIO_CSR_IO_ADDR (AFU_IMAGE_MMIO_CSR_IO_ADDR * 4)
|
||||
#define MMIO_CSR_MEM_ADDR (AFU_IMAGE_MMIO_CSR_MEM_ADDR * 4)
|
||||
#define MMIO_CSR_DATA_SIZE (AFU_IMAGE_MMIO_CSR_DATA_SIZE * 4)
|
||||
#define MMIO_CSR_STATUS (AFU_IMAGE_MMIO_CSR_STATUS * 4)
|
||||
#define MMIO_CMD_TYPE (AFU_IMAGE_MMIO_CMD_TYPE * 4)
|
||||
#define MMIO_IO_ADDR (AFU_IMAGE_MMIO_IO_ADDR * 4)
|
||||
#define MMIO_MEM_ADDR (AFU_IMAGE_MMIO_MEM_ADDR * 4)
|
||||
#define MMIO_DATA_SIZE (AFU_IMAGE_MMIO_DATA_SIZE * 4)
|
||||
#define MMIO_STATUS (AFU_IMAGE_MMIO_STATUS * 4)
|
||||
#define MMIO_CSR_ADDR (AFU_IMAGE_MMIO_CSR_ADDR * 4)
|
||||
#define MMIO_CSR_DATA (AFU_IMAGE_MMIO_CSR_DATA * 4)
|
||||
#define MMIO_CSR_READ (AFU_IMAGE_MMIO_CSR_READ * 4)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef struct vx_device_ {
|
||||
fpga_handle fpga;
|
||||
size_t mem_allocation;
|
||||
int implementation_id;
|
||||
int num_cores;
|
||||
int num_warps;
|
||||
int num_threads;
|
||||
} vx_device_t;
|
||||
|
||||
typedef struct vx_buffer_ {
|
||||
|
@ -62,21 +78,58 @@ inline bool is_aligned(size_t addr, size_t alignment) {
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
switch (caps_id) {
|
||||
case VX_CAPS_VERSION:
|
||||
*value = device->implementation_id;
|
||||
break;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
*value = device->num_cores;
|
||||
break;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
*value = device->num_warps;
|
||||
break;
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
*value = device->num_threads;
|
||||
break;
|
||||
case VX_CAPS_CACHE_LINESIZE:
|
||||
*value = CACHE_LINESIZE;
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
*value = LOCAL_MEM_SIZE;
|
||||
break;
|
||||
case VX_CAPS_ALLOC_BASE_ADDR:
|
||||
*value = ALLOC_BASE_ADDR;
|
||||
break;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
*value = STARTUP_ADDR;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "invalid caps id: %d\n", caps_id);
|
||||
std::abort();
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
fpga_properties filter = nullptr;
|
||||
fpga_result res;
|
||||
fpga_guid guid;
|
||||
fpga_token accel_token;
|
||||
uint32_t num_matches;
|
||||
fpga_handle accel_handle;
|
||||
vx_device_t* device;
|
||||
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
// ensure that the block size 64
|
||||
assert(64 == vx_dev_caps(VX_CAPS_CACHE_LINESIZE));
|
||||
|
||||
vx_device_t* device;
|
||||
|
||||
// Set up a filter that will search for an accelerator
|
||||
fpgaGetProperties(nullptr, &filter);
|
||||
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
|
||||
|
@ -114,17 +167,32 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||
}
|
||||
|
||||
device->fpga = accel_handle;
|
||||
device->mem_allocation = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
|
||||
device->mem_allocation = ALLOC_BASE_ADDR;
|
||||
|
||||
*hdevice = device;
|
||||
{
|
||||
// Load device CAPS
|
||||
int ret = 0;
|
||||
ret |= vx_csr_get(device, CSR_IMPL_ID, &device->implementation_id);
|
||||
ret |= vx_csr_get(device, CSR_NC, &device->num_cores);
|
||||
ret |= vx_csr_get(device, CSR_NW, &device->num_warps);
|
||||
ret |= vx_csr_get(device, CSR_NT, &device->num_threads);
|
||||
if (ret != 0) {
|
||||
fpgaClose(accel_handle);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SCOPE
|
||||
{
|
||||
int ret = vx_scope_start(device->fpga, 0);
|
||||
if (ret != 0)
|
||||
int ret = vx_scope_start(accel_handle, 0);
|
||||
if (ret != 0) {
|
||||
fpgaClose(accel_handle);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
*hdevice = device;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -154,10 +222,8 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
|
|||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
|
||||
size_t asize = align_size(size, line_size);
|
||||
size_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
size_t asize = align_size(size, CACHE_LINESIZE);
|
||||
|
||||
if (device->mem_allocation + asize > dev_mem_size)
|
||||
return -1;
|
||||
|
@ -182,9 +248,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
|
|||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
|
||||
size_t asize = align_size(size, line_size);
|
||||
size_t asize = align_size(size, CACHE_LINESIZE);
|
||||
|
||||
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
|
||||
if (FPGA_OK != res) {
|
||||
|
@ -260,7 +324,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
|||
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
|
||||
|
||||
for (;;) {
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_STATUS, &data));
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &data));
|
||||
if (0 == data || 0 == timeout) {
|
||||
if (data != 0) {
|
||||
fprintf(stdout, "ready-wait timed out: status=%ld\n", data);
|
||||
|
@ -282,17 +346,15 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
|
|||
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||
|
||||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
|
||||
size_t asize = align_size(size, line_size);
|
||||
size_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
size_t asize = align_size(size, CACHE_LINESIZE);
|
||||
|
||||
// check alignment
|
||||
if (!is_aligned(dev_maddr, line_size))
|
||||
if (!is_aligned(dev_maddr, CACHE_LINESIZE))
|
||||
return -1;
|
||||
if (!is_aligned(buffer->io_addr + src_offset, line_size))
|
||||
if (!is_aligned(buffer->io_addr + src_offset, CACHE_LINESIZE))
|
||||
return -1;
|
||||
|
||||
|
||||
// bound checking
|
||||
if (src_offset + asize > buffer->size)
|
||||
return -1;
|
||||
|
@ -303,12 +365,12 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
|
|||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
auto ls_shift = (int)std::log2(line_size);
|
||||
auto ls_shift = (int)std::log2(CACHE_LINESIZE);
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_WRITE));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
|
@ -325,15 +387,13 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
|
|||
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||
|
||||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
|
||||
size_t asize = align_size(size, line_size);
|
||||
size_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
size_t asize = align_size(size, CACHE_LINESIZE);
|
||||
|
||||
// check alignment
|
||||
if (!is_aligned(dev_maddr, line_size))
|
||||
if (!is_aligned(dev_maddr, CACHE_LINESIZE))
|
||||
return -1;
|
||||
if (!is_aligned(buffer->io_addr + dest_offset, line_size))
|
||||
if (!is_aligned(buffer->io_addr + dest_offset, CACHE_LINESIZE))
|
||||
return -1;
|
||||
|
||||
// bound checking
|
||||
|
@ -346,12 +406,12 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
|
|||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
auto ls_shift = (int)std::log2(line_size);
|
||||
auto ls_shift = (int)std::log2(CACHE_LINESIZE);
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_READ));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
|
@ -367,23 +427,21 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
|||
|
||||
vx_device_t* device = ((vx_device_t*)hdevice);
|
||||
|
||||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
|
||||
size_t asize = align_size(size, line_size);
|
||||
size_t asize = align_size(size, CACHE_LINESIZE);
|
||||
|
||||
// check alignment
|
||||
if (!is_aligned(dev_maddr, line_size))
|
||||
if (!is_aligned(dev_maddr, CACHE_LINESIZE))
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
auto ls_shift = (int)std::log2(line_size);
|
||||
auto ls_shift = (int)std::log2(CACHE_LINESIZE);
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CLFLUSH));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
|
@ -396,13 +454,59 @@ extern int vx_start(vx_device_h hdevice) {
|
|||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
// start execution
|
||||
// start execution
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// set device constant registers
|
||||
extern int vx_csr_set(vx_device_h hdevice, int address, int value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_RUN));
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
// write CSR value
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA, value));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_WRITE));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// get device constant registers
|
||||
extern int vx_csr_get(vx_device_h hdevice, int address, int* value) {
|
||||
if (nullptr == hdevice || nullptr == value)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
// write CSR value
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_READ));
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
uint64_t value64;
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_READ, &value64));
|
||||
*value = (int)value64;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -7,14 +7,19 @@
|
|||
#include <chrono>
|
||||
|
||||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
#include <ram.h>
|
||||
#include <simulator.h>
|
||||
|
||||
#define CACHE_LINESIZE 64
|
||||
#define ALLOC_BASE_ADDR 0x10000000
|
||||
#define LOCAL_MEM_SIZE 0xffffffff
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
|
||||
inline size_t align_size(size_t size, size_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -26,7 +31,7 @@ public:
|
|||
vx_buffer(size_t size, vx_device* device)
|
||||
: size_(size)
|
||||
, device_(device) {
|
||||
auto aligned_asize = align_size(size);
|
||||
auto aligned_asize = align_size(size, CACHE_LINESIZE);
|
||||
data_ = malloc(aligned_asize);
|
||||
}
|
||||
|
||||
|
@ -59,7 +64,7 @@ private:
|
|||
class vx_device {
|
||||
public:
|
||||
vx_device() {
|
||||
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
|
||||
mem_allocation_ = ALLOC_BASE_ADDR;
|
||||
simulator_.attach_ram(&ram_);
|
||||
}
|
||||
|
||||
|
@ -70,8 +75,8 @@ public:
|
|||
}
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
size_t asize = align_size(size);
|
||||
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
auto dev_mem_size = LOCAL_MEM_SIZE;
|
||||
size_t asize = align_size(size, CACHE_LINESIZE);
|
||||
if (mem_allocation_ + asize > dev_mem_size)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
|
@ -80,7 +85,7 @@ public:
|
|||
}
|
||||
|
||||
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
size_t asize = align_size(size);
|
||||
size_t asize = align_size(size, CACHE_LINESIZE);
|
||||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
|
@ -94,7 +99,7 @@ public:
|
|||
}
|
||||
|
||||
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
size_t asize = align_size(size);
|
||||
size_t asize = align_size(size, CACHE_LINESIZE);
|
||||
if (src_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
|
@ -156,6 +161,44 @@ private:
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
switch (caps_id) {
|
||||
case VX_CAPS_VERSION:
|
||||
*value = IMPLEMENTATION_ID;
|
||||
break;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
*value = NUM_CORES;
|
||||
break;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
*value = NUM_WARPS;
|
||||
break;
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
*value = NUM_THREADS;
|
||||
break;
|
||||
case VX_CAPS_CACHE_LINESIZE:
|
||||
*value = CACHE_LINESIZE;
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
*value = 0xffffffff;
|
||||
break;
|
||||
case VX_CAPS_ALLOC_BASE_ADDR:
|
||||
*value = 0x10000000;
|
||||
break;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
*value = STARTUP_ADDR;
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
std::abort();
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
|
|
@ -11,13 +11,16 @@
|
|||
#include <core.h>
|
||||
#include <VX_config.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
#define CACHE_LINESIZE 64
|
||||
#define PAGE_SIZE 4096
|
||||
#define ALLOC_BASE_ADDR 0x10000000
|
||||
#define LOCAL_MEM_SIZE 0xffffffff
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
|
||||
inline size_t align_size(size_t size, size_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -29,7 +32,7 @@ public:
|
|||
vx_buffer(size_t size, vx_device* device)
|
||||
: size_(size)
|
||||
, device_(device) {
|
||||
auto aligned_asize = align_size(size);
|
||||
auto aligned_asize = align_size(size, CACHE_LINESIZE);
|
||||
data_ = malloc(aligned_asize);
|
||||
}
|
||||
|
||||
|
@ -65,7 +68,7 @@ public:
|
|||
: is_done_(false)
|
||||
, is_running_(false)
|
||||
, thread_(__thread_proc__, this) {
|
||||
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
|
||||
mem_allocation_ = ALLOC_BASE_ADDR;
|
||||
}
|
||||
|
||||
~vx_device() {
|
||||
|
@ -77,8 +80,8 @@ public:
|
|||
}
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
auto asize = align_size(size);
|
||||
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
auto dev_mem_size = LOCAL_MEM_SIZE;
|
||||
auto asize = align_size(size, CACHE_LINESIZE);
|
||||
if (mem_allocation_ + asize > dev_mem_size)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
|
@ -87,7 +90,7 @@ public:
|
|||
}
|
||||
|
||||
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
auto asize = align_size(size);
|
||||
auto asize = align_size(size, CACHE_LINESIZE);
|
||||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
|
@ -101,7 +104,7 @@ public:
|
|||
}
|
||||
|
||||
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
size_t asize = align_size(size);
|
||||
size_t asize = align_size(size, CACHE_LINESIZE);
|
||||
if (src_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
|
@ -216,6 +219,44 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
switch (caps_id) {
|
||||
case VX_CAPS_VERSION:
|
||||
*value = IMPLEMENTATION_ID;
|
||||
break;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
*value = NUM_CORES;
|
||||
break;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
*value = NUM_WARPS;
|
||||
break;
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
*value = NUM_THREADS;
|
||||
break;
|
||||
case VX_CAPS_CACHE_LINESIZE:
|
||||
*value = CACHE_LINESIZE;
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
*value = LOCAL_MEM_SIZE;
|
||||
break;
|
||||
case VX_CAPS_ALLOC_BASE_ADDR:
|
||||
*value = ALLOC_BASE_ADDR;
|
||||
break;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
*value = STARTUP_ADDR;
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
std::abort();
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_maddr
|
||||
|
|
|
@ -8,6 +8,10 @@ extern int vx_dev_close(vx_device_h /*hdevice*/) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_dev_caps(vx_device_h /*hdevice*/, unsigned /*caps_id*/, unsigned* /*value*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, size_t /*size*/, size_t* /*dev_maddr*/) {
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -179,7 +179,12 @@ int main(int argc, char *argv[]) {
|
|||
count = 1;
|
||||
}
|
||||
|
||||
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
unsigned max_cores;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
uint32_t num_points = max_cores * count;
|
||||
uint32_t num_blocks = (num_points * sizeof(uint32_t) + 63) / 64;
|
||||
uint32_t buf_size = num_blocks * 64;
|
||||
|
@ -187,10 +192,6 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "number of points: " << num_points << std::endl;
|
||||
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
// allocate device memory
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src_ptr = value;
|
||||
|
|
|
@ -110,9 +110,14 @@ int main(int argc, char *argv[]) {
|
|||
count = 1;
|
||||
}
|
||||
|
||||
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
|
||||
uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS);
|
||||
uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS);
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
unsigned max_cores, max_warps, max_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
||||
uint32_t num_points = count * max_cores * max_warps * max_threads;
|
||||
uint32_t buf_size = num_points * sizeof(uint32_t);
|
||||
|
@ -120,10 +125,6 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "number of points: " << num_points << std::endl;
|
||||
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
|
||||
|
|
|
@ -5,18 +5,23 @@
|
|||
"clock-frequency-high": "auto",
|
||||
"clock-frequency-low": "auto",
|
||||
|
||||
"mmio-csr-cmd": 10,
|
||||
"mmio-csr-io-addr": 12,
|
||||
"mmio-csr-mem-addr": 14,
|
||||
"mmio-csr-data-size": 16,
|
||||
"mmio-csr-status": 18,
|
||||
"mmio-csr-scope-cmd": 20,
|
||||
"mmio-csr-scope-data": 22,
|
||||
"cmd-mem-read": 1,
|
||||
"cmd-mem-write": 2,
|
||||
"cmd-run": 3,
|
||||
"cmd-clflush": 4,
|
||||
"cmd-csr-read": 5,
|
||||
"cmd-csr-write": 6,
|
||||
|
||||
"cmd-type-read": 1,
|
||||
"cmd-type-write": 2,
|
||||
"cmd-type-run": 3,
|
||||
"cmd-type-clflush": 4,
|
||||
"mmio-cmd-type": 10,
|
||||
"mmio-io-addr": 12,
|
||||
"mmio-mem-addr": 14,
|
||||
"mmio-data-size": 16,
|
||||
"mmio-status": 18,
|
||||
"mmio-scope-read": 20,
|
||||
"mmio-scope-write": 22,
|
||||
"mmio-csr-addr": 24,
|
||||
"mmio-csr-data": 26,
|
||||
"mmio-csr-read": 28,
|
||||
|
||||
"afu-top-interface":
|
||||
{
|
||||
|
|
|
@ -53,19 +53,25 @@ localparam CCI_RW_QUEUE_SIZE = 1024;
|
|||
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
|
||||
localparam AFU_ID_H = 16'h0004; // AFU ID Higher
|
||||
|
||||
localparam CMD_TYPE_READ = `AFU_IMAGE_CMD_TYPE_READ;
|
||||
localparam CMD_TYPE_WRITE = `AFU_IMAGE_CMD_TYPE_WRITE;
|
||||
localparam CMD_TYPE_RUN = `AFU_IMAGE_CMD_TYPE_RUN;
|
||||
localparam CMD_TYPE_CLFLUSH = `AFU_IMAGE_CMD_TYPE_CLFLUSH;
|
||||
localparam CMD_MEM_READ = `AFU_IMAGE_CMD_MEM_READ;
|
||||
localparam CMD_MEM_WRITE = `AFU_IMAGE_CMD_MEM_WRITE;
|
||||
localparam CMD_RUN = `AFU_IMAGE_CMD_RUN;
|
||||
localparam CMD_CLFLUSH = `AFU_IMAGE_CMD_CLFLUSH;
|
||||
localparam CMD_CSR_READ = `AFU_IMAGE_CMD_CSR_READ;
|
||||
localparam CMD_CSR_WRITE = `AFU_IMAGE_CMD_CSR_WRITE;
|
||||
|
||||
localparam MMIO_CSR_CMD = `AFU_IMAGE_MMIO_CSR_CMD;
|
||||
localparam MMIO_CSR_IO_ADDR = `AFU_IMAGE_MMIO_CSR_IO_ADDR;
|
||||
localparam MMIO_CSR_MEM_ADDR = `AFU_IMAGE_MMIO_CSR_MEM_ADDR;
|
||||
localparam MMIO_CSR_DATA_SIZE = `AFU_IMAGE_MMIO_CSR_DATA_SIZE;
|
||||
localparam MMIO_CSR_STATUS = `AFU_IMAGE_MMIO_CSR_STATUS;
|
||||
localparam MMIO_CMD_TYPE = `AFU_IMAGE_MMIO_CMD_TYPE;
|
||||
localparam MMIO_IO_ADDR = `AFU_IMAGE_MMIO_IO_ADDR;
|
||||
localparam MMIO_MEM_ADDR = `AFU_IMAGE_MMIO_MEM_ADDR;
|
||||
localparam MMIO_DATA_SIZE = `AFU_IMAGE_MMIO_DATA_SIZE;
|
||||
localparam MMIO_STATUS = `AFU_IMAGE_MMIO_STATUS;
|
||||
|
||||
localparam MMIO_CSR_SCOPE_CMD = `AFU_IMAGE_MMIO_CSR_SCOPE_CMD;
|
||||
localparam MMIO_CSR_SCOPE_DATA= `AFU_IMAGE_MMIO_CSR_SCOPE_DATA;
|
||||
localparam MMIO_SCOPE_READ = `AFU_IMAGE_MMIO_SCOPE_READ;
|
||||
localparam MMIO_SCOPE_WRITE = `AFU_IMAGE_MMIO_SCOPE_WRITE;
|
||||
|
||||
localparam MMIO_CSR_ADDR = `AFU_IMAGE_MMIO_CSR_ADDR;
|
||||
localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA;
|
||||
localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ;
|
||||
|
||||
logic [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
|
@ -75,7 +81,9 @@ typedef enum logic[3:0] {
|
|||
STATE_WRITE,
|
||||
STATE_START,
|
||||
STATE_RUN,
|
||||
STATE_CLFLUSH
|
||||
STATE_CLFLUSH,
|
||||
STATE_CSR_READ,
|
||||
STATE_CSR_WRITE
|
||||
} state_t;
|
||||
|
||||
typedef logic [$clog2(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag;
|
||||
|
@ -134,20 +142,24 @@ logic avs_rdq_empty;
|
|||
logic avs_rdq_full;
|
||||
`DEBUG_END
|
||||
|
||||
// CSR variables //////////////////////////////////////////////////////////////
|
||||
// CMD variables //////////////////////////////////////////////////////////////
|
||||
|
||||
logic [2:0] csr_cmd;
|
||||
t_ccip_clAddr csr_io_addr;
|
||||
logic[DRAM_ADDR_WIDTH-1:0] csr_mem_addr;
|
||||
logic[DRAM_ADDR_WIDTH-1:0] csr_data_size;
|
||||
logic [2:0] cmd_type;
|
||||
t_ccip_clAddr cmd_io_addr;
|
||||
logic[DRAM_ADDR_WIDTH-1:0] cmd_mem_addr;
|
||||
logic[DRAM_ADDR_WIDTH-1:0] cmd_data_size;
|
||||
|
||||
`ifdef SCOPE
|
||||
logic [63:0] csr_scope_cmd;
|
||||
logic [63:0] csr_scope_data;
|
||||
logic csr_scope_read;
|
||||
logic csr_scope_write;
|
||||
logic [63:0] cmd_scope_rdata;
|
||||
logic [63:0] cmd_scope_wdata;
|
||||
logic cmd_scope_read;
|
||||
logic cmd_scope_write;
|
||||
`endif
|
||||
|
||||
logic [31:0] cmd_csr_addr;
|
||||
logic [31:0] cmd_csr_rdata;
|
||||
logic [31:0] cmd_csr_wdata;
|
||||
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
|
@ -159,9 +171,9 @@ t_if_ccip_c2_Tx mmio_tx;
|
|||
assign af2cp_sTxPort.c2 = mmio_tx;
|
||||
|
||||
`ifdef SCOPE
|
||||
assign csr_scope_cmd = 64'(cp2af_sRxPort.c0.data);
|
||||
assign csr_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CSR_SCOPE_CMD == mmio_hdr.address);
|
||||
assign csr_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_CSR_SCOPE_DATA == mmio_hdr.address);
|
||||
assign cmd_scope_wdata = 64'(cp2af_sRxPort.c0.data);
|
||||
assign cmd_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_SCOPE_READ == mmio_hdr.address);
|
||||
assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mmio_hdr.address);
|
||||
`endif
|
||||
|
||||
always_ff @(posedge clk)
|
||||
|
@ -170,57 +182,63 @@ begin
|
|||
mmio_tx.hdr <= 0;
|
||||
mmio_tx.data <= 0;
|
||||
mmio_tx.mmioRdValid <= 0;
|
||||
csr_cmd <= 0;
|
||||
csr_io_addr <= 0;
|
||||
csr_mem_addr <= 0;
|
||||
csr_data_size <= 0;
|
||||
cmd_type <= 0;
|
||||
cmd_io_addr <= 0;
|
||||
cmd_mem_addr <= 0;
|
||||
cmd_data_size <= 0;
|
||||
end
|
||||
else begin
|
||||
|
||||
csr_cmd <= 0;
|
||||
cmd_type <= 0;
|
||||
mmio_tx.mmioRdValid <= 0;
|
||||
|
||||
// serve MMIO write request
|
||||
if (cp2af_sRxPort.c0.mmioWrValid)
|
||||
begin
|
||||
case (mmio_hdr.address)
|
||||
MMIO_CSR_IO_ADDR: begin
|
||||
csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
|
||||
MMIO_IO_ADDR: begin
|
||||
cmd_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CSR_IO_ADDR: 0x%0h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data));
|
||||
$display("%t: MMIO_IO_ADDR: 0x%0h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
MMIO_CSR_MEM_ADDR: begin
|
||||
csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data);
|
||||
MMIO_MEM_ADDR: begin
|
||||
cmd_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CSR_MEM_ADDR: 0x%0h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data));
|
||||
$display("%t: MMIO_MEM_ADDR: 0x%0h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
MMIO_CSR_DATA_SIZE: begin
|
||||
csr_data_size <= $bits(csr_data_size)'(cp2af_sRxPort.c0.data);
|
||||
MMIO_DATA_SIZE: begin
|
||||
cmd_data_size <= $bits(cmd_data_size)'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CSR_DATA_SIZE: %0d", $time, $bits(csr_data_size)'(cp2af_sRxPort.c0.data));
|
||||
$display("%t: MMIO_DATA_SIZE: %0d", $time, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
MMIO_CSR_CMD: begin
|
||||
csr_cmd <= $bits(csr_cmd)'(cp2af_sRxPort.c0.data);
|
||||
MMIO_CMD_TYPE: begin
|
||||
cmd_type <= $bits(cmd_type)'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CSR_CMD: %0d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data));
|
||||
$display("%t: MMIO_CMD_TYPE: %0d", $time, $bits(cmd_type)'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
`ifdef SCOPE
|
||||
MMIO_CSR_SCOPE_CMD: begin
|
||||
MMIO_SCOPE_WRITE: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CSR_SCOPE_CMD: %0h", $time, 64'(cp2af_sRxPort.c0.data));
|
||||
$display("%t: MMIO_SCOPE_WRITE: %0h", $time, 64'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
// user-defined CSRs
|
||||
//if (mmio_hdr.addres >= MMIO_CSR_USER) begin
|
||||
// write Vortex CRS
|
||||
//end
|
||||
end
|
||||
MMIO_CSR_ADDR: begin
|
||||
cmd_csr_addr <= $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_CSR_ADDR: %0h", $time, $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
MMIO_CSR_DATA: begin
|
||||
cmd_csr_wdata <= $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_CSR_DATA: %0h", $time, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
@ -243,22 +261,28 @@ begin
|
|||
AFU_ID_H: mmio_tx.data <= afu_id[127:64]; // afu id hi
|
||||
16'h0006: mmio_tx.data <= 64'h0; // next AFU
|
||||
16'h0008: mmio_tx.data <= 64'h0; // reserved
|
||||
MMIO_CSR_STATUS: begin
|
||||
MMIO_STATUS: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
if (state != state_t'(mmio_tx.data)) begin
|
||||
$display("%t: STATUS: state=%0d", $time, state);
|
||||
$display("%t: MMIO_STATUS: state=%0d", $time, state);
|
||||
end
|
||||
`endif
|
||||
mmio_tx.data <= 64'(state);
|
||||
end
|
||||
`ifdef SCOPE
|
||||
MMIO_CSR_SCOPE_DATA: begin
|
||||
mmio_tx.data <= csr_scope_data;
|
||||
MMIO_SCOPE_READ: begin
|
||||
mmio_tx.data <= cmd_scope_rdata;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: SCOPE: data=%0h", $time, csr_scope_data);
|
||||
$display("%t: MMIO_SCOPE_READ: data=%0h", $time, cmd_scope_rdata);
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
MMIO_CSR_READ: begin
|
||||
mmio_tx.data <= cmd_csr_rdata;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_CSR_READ: data=%0h", $time, cmd_csr_rdata);
|
||||
`endif
|
||||
end
|
||||
default: mmio_tx.data <= 64'h0;
|
||||
endcase
|
||||
mmio_tx.mmioRdValid <= 1; // post response
|
||||
|
@ -271,6 +295,8 @@ end
|
|||
logic cmd_read_done;
|
||||
logic cmd_write_done;
|
||||
logic cmd_clflush_done;
|
||||
logic cmd_csr_read_done;
|
||||
logic cmd_csr_write_done;
|
||||
logic cmd_run_done;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
|
@ -285,32 +311,44 @@ begin
|
|||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
case (csr_cmd)
|
||||
CMD_TYPE_READ: begin
|
||||
case (cmd_type)
|
||||
CMD_MEM_READ: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE READ: ia=%0h da=%0h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
|
||||
$display("%t: STATE READ: ia=%0h addr=%0h size=%0d", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size);
|
||||
`endif
|
||||
state <= STATE_READ;
|
||||
end
|
||||
CMD_TYPE_WRITE: begin
|
||||
CMD_MEM_WRITE: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE WRITE: ia=%0h da=%0h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
|
||||
$display("%t: STATE WRITE: ia=%0h addr=%0h size=%0d", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size);
|
||||
`endif
|
||||
state <= STATE_WRITE;
|
||||
end
|
||||
CMD_TYPE_RUN: begin
|
||||
CMD_RUN: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE START", $time);
|
||||
`endif
|
||||
vx_reset <= 1;
|
||||
state <= STATE_START;
|
||||
end
|
||||
CMD_TYPE_CLFLUSH: begin
|
||||
CMD_CLFLUSH: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE CFLUSH: da=%0h sz=%0d", $time, csr_mem_addr, csr_data_size);
|
||||
$display("%t: STATE CFLUSH: addr=%0h size=%0d", $time, cmd_mem_addr, cmd_data_size);
|
||||
`endif
|
||||
state <= STATE_CLFLUSH;
|
||||
end
|
||||
CMD_CSR_READ: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE CSR_READ: addr=%0h", $time, cmd_csr_addr);
|
||||
`endif
|
||||
state <= STATE_CSR_READ;
|
||||
end
|
||||
CMD_CSR_WRITE: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE CSR_WRITE: addr=%0h data=%0d", $time, cmd_csr_addr, cmd_csr_wdata);
|
||||
`endif
|
||||
state <= STATE_CSR_WRITE;
|
||||
end
|
||||
default: begin
|
||||
state <= state;
|
||||
end
|
||||
|
@ -345,6 +383,18 @@ begin
|
|||
end
|
||||
end
|
||||
|
||||
STATE_CSR_READ: begin
|
||||
if (cmd_csr_read_done) begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_CSR_WRITE: begin
|
||||
if (cmd_csr_write_done) begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
state <= state;
|
||||
end
|
||||
|
@ -385,7 +435,7 @@ assign cci_dram_rd_req_enable = (state == STATE_READ)
|
|||
|
||||
assign cci_dram_wr_req_enable = (state == STATE_WRITE)
|
||||
&& !cci_rdq_empty
|
||||
&& (cci_dram_wr_req_ctr < csr_data_size);
|
||||
&& (cci_dram_wr_req_ctr < cmd_data_size);
|
||||
|
||||
assign vx_dram_req_enable = vortex_enabled && (avs_pending_reads < AVS_RD_QUEUE_SIZE);
|
||||
assign vx_dram_rd_req_enable = vx_dram_req_enable && vx_dram_req_valid && !vx_dram_req_rw;
|
||||
|
@ -414,19 +464,19 @@ end
|
|||
always_comb
|
||||
begin
|
||||
case (state)
|
||||
CMD_TYPE_READ: avs_address = cci_dram_rd_req_addr;
|
||||
CMD_TYPE_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout)));
|
||||
CMD_MEM_READ: avs_address = cci_dram_rd_req_addr;
|
||||
CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout)));
|
||||
default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
|
||||
endcase
|
||||
|
||||
case (state)
|
||||
CMD_TYPE_READ: avs_byteenable = 64'hffffffffffffffff;
|
||||
CMD_TYPE_WRITE: avs_byteenable = 64'hffffffffffffffff;
|
||||
CMD_MEM_READ: avs_byteenable = 64'hffffffffffffffff;
|
||||
CMD_MEM_WRITE: avs_byteenable = 64'hffffffffffffffff;
|
||||
default: avs_byteenable = vx_dram_req_byteen_;
|
||||
endcase
|
||||
|
||||
case (state)
|
||||
CMD_TYPE_WRITE: avs_writedata = cci_rdq_dout[$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:$bits(t_cci_rdq_tag)];
|
||||
CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:$bits(t_cci_rdq_tag)];
|
||||
default: avs_writedata = (DRAM_LINE_WIDTH)'(vx_dram_req_data) << vx_dram_req_offset;
|
||||
endcase
|
||||
end
|
||||
|
@ -434,7 +484,7 @@ end
|
|||
assign avs_read = cci_dram_rd_req_enable || vx_dram_rd_req_enable;
|
||||
assign avs_write = cci_dram_wr_req_enable || vx_dram_wr_req_enable;
|
||||
|
||||
assign cmd_write_done = (cci_dram_wr_req_ctr >= csr_data_size);
|
||||
assign cmd_write_done = (cci_dram_wr_req_ctr >= cmd_data_size);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
|
@ -451,12 +501,12 @@ begin
|
|||
else begin
|
||||
|
||||
if (state == STATE_IDLE) begin
|
||||
if (CMD_TYPE_READ == csr_cmd) begin
|
||||
cci_dram_rd_req_addr <= csr_mem_addr;
|
||||
cci_dram_rd_req_ctr <= csr_data_size;
|
||||
if (CMD_MEM_READ == cmd_type) begin
|
||||
cci_dram_rd_req_addr <= cmd_mem_addr;
|
||||
cci_dram_rd_req_ctr <= cmd_data_size;
|
||||
end
|
||||
else if (CMD_TYPE_WRITE == csr_cmd) begin
|
||||
cci_dram_wr_req_addr <= csr_mem_addr;
|
||||
else if (CMD_MEM_WRITE == cmd_type) begin
|
||||
cci_dram_wr_req_addr <= cmd_mem_addr;
|
||||
cci_dram_wr_req_ctr <= 0;
|
||||
end
|
||||
end
|
||||
|
@ -598,17 +648,17 @@ begin
|
|||
else begin
|
||||
|
||||
if ((STATE_IDLE == state)
|
||||
&& (CMD_TYPE_WRITE == csr_cmd)) begin
|
||||
cci_rd_req_addr <= csr_io_addr;
|
||||
&& (CMD_MEM_WRITE == cmd_type)) begin
|
||||
cci_rd_req_addr <= cmd_io_addr;
|
||||
cci_rd_req_ctr <= 0;
|
||||
cci_rd_rsp_ctr <= 0;
|
||||
cci_pending_reads <= 0;
|
||||
cci_rd_req_enable <= (csr_data_size != 0);
|
||||
cci_rd_req_enable <= (cmd_data_size != 0);
|
||||
cci_rd_req_wait <= 0;
|
||||
end
|
||||
|
||||
cci_rd_req_enable <= (STATE_WRITE == state)
|
||||
&& (cci_rd_req_ctr_next < csr_data_size)
|
||||
&& (cci_rd_req_ctr_next < cmd_data_size)
|
||||
&& (cci_pending_reads_next < CCI_RD_QUEUE_SIZE);
|
||||
|
||||
if (cci_rd_req_fire) begin
|
||||
|
@ -618,7 +668,7 @@ begin
|
|||
cci_rd_req_wait <= 1; // end current request batch
|
||||
end
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, (csr_data_size - cci_rd_req_ctr_next), cci_pending_reads_next);
|
||||
$display("%t: CCI Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next);
|
||||
`endif
|
||||
end
|
||||
|
||||
|
@ -695,9 +745,9 @@ begin
|
|||
else begin
|
||||
|
||||
if ((STATE_IDLE == state)
|
||||
&& (CMD_TYPE_READ == csr_cmd)) begin
|
||||
cci_wr_req_addr <= csr_io_addr;
|
||||
cci_wr_req_ctr <= csr_data_size;
|
||||
&& (CMD_MEM_READ == cmd_type)) begin
|
||||
cci_wr_req_addr <= cmd_io_addr;
|
||||
cci_wr_req_ctr <= cmd_data_size;
|
||||
cci_pending_writes <= 0;
|
||||
end
|
||||
|
||||
|
@ -733,11 +783,11 @@ logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_rsp_ctr, snp_rsp_ctr_next;
|
|||
logic vx_snp_req_fire, vx_snp_rsp_fire;
|
||||
|
||||
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
|
||||
assign snp_req_baseaddr = {csr_mem_addr, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)};
|
||||
assign snp_req_size = {csr_data_size, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)};
|
||||
assign snp_req_baseaddr = {cmd_mem_addr, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)};
|
||||
assign snp_req_size = {cmd_data_size, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)};
|
||||
end else begin
|
||||
assign snp_req_baseaddr = csr_mem_addr;
|
||||
assign snp_req_size = csr_data_size;
|
||||
assign snp_req_baseaddr = cmd_mem_addr;
|
||||
assign snp_req_size = cmd_data_size;
|
||||
end
|
||||
|
||||
assign vx_snp_req_fire = vx_snp_req_valid && vx_snp_req_ready;
|
||||
|
@ -761,7 +811,7 @@ begin
|
|||
else begin
|
||||
|
||||
if ((STATE_IDLE == state)
|
||||
&& (CMD_TYPE_CLFLUSH == csr_cmd)) begin
|
||||
&& (CMD_CLFLUSH == cmd_type)) begin
|
||||
vx_snp_req_addr <= snp_req_baseaddr;
|
||||
vx_snp_req_tag <= 0;
|
||||
snp_req_ctr <= 0;
|
||||
|
@ -802,6 +852,23 @@ begin
|
|||
end
|
||||
end
|
||||
|
||||
// CSRs///////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign cmd_csr_read_done = 1;
|
||||
assign cmd_csr_write_done = 1;
|
||||
|
||||
always_comb begin
|
||||
case (cmd_csr_addr)
|
||||
`CSR_VEND_ID : cmd_csr_rdata = `VENDOR_ID;
|
||||
`CSR_ARCH_ID : cmd_csr_rdata = `ARCHITECTURE_ID;
|
||||
`CSR_IMPL_ID : cmd_csr_rdata = `IMPLEMENTATION_ID;
|
||||
`CSR_NT : cmd_csr_rdata = `NUM_THREADS;
|
||||
`CSR_NW : cmd_csr_rdata = `NUM_WARPS;
|
||||
`CSR_NC : cmd_csr_rdata = `NUM_CORES * `NUM_CLUSTERS;
|
||||
default : cmd_csr_rdata = 0;
|
||||
endcase
|
||||
end
|
||||
|
||||
// Vortex /////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign cmd_run_done = !vx_busy;
|
||||
|
@ -944,10 +1011,10 @@ VX_scope #(
|
|||
.stop (0),
|
||||
.changed (scope_data_in_ste[1]),
|
||||
.data_in (scope_data_in_ste[SCOPE_DATAW+1:2]),
|
||||
.bus_in (csr_scope_cmd),
|
||||
.bus_out (csr_scope_data),
|
||||
.bus_read (csr_scope_read),
|
||||
.bus_write(csr_scope_write)
|
||||
.bus_in (cmd_scope_wdata),
|
||||
.bus_out (cmd_scope_rdata),
|
||||
.bus_read (cmd_scope_read),
|
||||
.bus_write(cmd_scope_write)
|
||||
);
|
||||
|
||||
`endif
|
||||
|
|
|
@ -59,21 +59,33 @@
|
|||
`define L3_ENABLE (`NUM_CLUSTERS > 1)
|
||||
`endif
|
||||
|
||||
`define CSR_LTID 12'h020
|
||||
`define CSR_LWID 12'h021
|
||||
`define CSR_GTID 12'hF14 // reserved Hardware Thread ID (mhartid)
|
||||
`define CSR_GWID 12'h023
|
||||
`define CSR_GCID 12'h024
|
||||
`define CSR_NT 12'h025
|
||||
`define CSR_NW 12'h026
|
||||
`define CSR_NC 12'h027
|
||||
// Configuration Values =======================================================
|
||||
|
||||
`define CSR_CYCLL 12'hC00
|
||||
`define CSR_CYCLH 12'hC80
|
||||
`define CSR_INSTL 12'hC02
|
||||
`define CSR_INSTH 12'hC82
|
||||
`define VENDOR_ID 0
|
||||
`define ARCHITECTURE_ID 0
|
||||
`define IMPLEMENTATION_ID 0
|
||||
|
||||
// ========================= Dcache Configurable Knobs ========================
|
||||
// CSR Addresses ==============================================================
|
||||
|
||||
`define CSR_VEND_ID 12'hF11
|
||||
`define CSR_ARCH_ID 12'hF12
|
||||
`define CSR_IMPL_ID 12'hF13
|
||||
`define CSR_GTID 12'hF14
|
||||
|
||||
`define CSR_LTID 12'h020
|
||||
`define CSR_LWID 12'h021
|
||||
`define CSR_GWID 12'h023
|
||||
`define CSR_GCID 12'h024
|
||||
`define CSR_NT 12'h025
|
||||
`define CSR_NW 12'h026
|
||||
`define CSR_NC 12'h027
|
||||
|
||||
`define CSR_CYCLE_L 12'hC00
|
||||
`define CSR_CYCLE_H 12'hC80
|
||||
`define CSR_INSTR_L 12'hC02
|
||||
`define CSR_INSTR_H 12'hC82
|
||||
|
||||
// Dcache Configurable Knobs ==================================================
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef DCACHE_SIZE
|
||||
|
@ -144,7 +156,7 @@
|
|||
`define DPRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// ========================== Icache Configurable Knobs =======================
|
||||
// Icache Configurable Knobs ==================================================
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef ICACHE_SIZE
|
||||
|
@ -210,7 +222,7 @@
|
|||
`define IPRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// =========================== SM Configurable Knobs ==========================
|
||||
// SM Configurable Knobs ======================================================
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef SCACHE_SIZE
|
||||
|
@ -247,7 +259,7 @@
|
|||
`define SCWBQ_SIZE `SCREQ_SIZE
|
||||
`endif
|
||||
|
||||
// ======================== L2cache Configurable Knobs ========================
|
||||
// L2cache Configurable Knobs =================================================
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef L2CACHE_SIZE
|
||||
|
@ -318,7 +330,7 @@
|
|||
`define L2PRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// ======================== L3cache Configurable Knobs ========================
|
||||
// L3cache Configurable Knobs =================================================
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef L3CACHE_SIZE
|
||||
|
|
|
@ -43,18 +43,21 @@ module VX_csr_data #(
|
|||
|
||||
always @(*) begin
|
||||
case (read_addr)
|
||||
`CSR_LWID : read_data = 32'(warp_num);
|
||||
`CSR_GTID ,
|
||||
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
|
||||
`CSR_GCID : read_data = CORE_ID;
|
||||
`CSR_NT : read_data = `NUM_THREADS;
|
||||
`CSR_NW : read_data = `NUM_WARPS;
|
||||
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
|
||||
`CSR_CYCLL : read_data = num_cycles[31:0];
|
||||
`CSR_CYCLH : read_data = num_cycles[63:32];
|
||||
`CSR_INSTL : read_data = num_instrs[31:0];
|
||||
`CSR_INSTH : read_data = num_instrs[63:32];
|
||||
default: read_data = 32'(csr_table[rd_addr]);
|
||||
`CSR_LWID : read_data = 32'(warp_num);
|
||||
`CSR_GTID ,
|
||||
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
|
||||
`CSR_GCID : read_data = CORE_ID;
|
||||
`CSR_NT : read_data = `NUM_THREADS;
|
||||
`CSR_NW : read_data = `NUM_WARPS;
|
||||
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
|
||||
`CSR_CYCLE_L : read_data = num_cycles[31:0];
|
||||
`CSR_CYCLE_H : read_data = num_cycles[63:32];
|
||||
`CSR_INSTR_L : read_data = num_instrs[31:0];
|
||||
`CSR_INSTR_H : read_data = num_instrs[63:32];
|
||||
`CSR_VEND_ID : read_data = `VENDOR_ID;
|
||||
`CSR_ARCH_ID : read_data = `ARCHITECTURE_ID;
|
||||
`CSR_IMPL_ID : read_data = `IMPLEMENTATION_ID;
|
||||
default : read_data = 32'(csr_table[rd_addr]);
|
||||
endcase
|
||||
end
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue