mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
OPAE rtl fixes
This commit is contained in:
parent
13dfd5c8c7
commit
c2e9240b7d
12 changed files with 208 additions and 243 deletions
|
@ -143,6 +143,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
|
|||
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
|
||||
size_t asize = align_size(size, line_size);
|
||||
|
||||
if (device->mem_allocation + asize > dev_mem_size)
|
||||
return -1;
|
||||
|
||||
|
@ -189,11 +190,11 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
|
|||
return -1;
|
||||
}
|
||||
|
||||
buffer->wsid = wsid;
|
||||
buffer->wsid = wsid;
|
||||
buffer->host_ptr = host_ptr;
|
||||
buffer->io_addr = io_addr;
|
||||
buffer->hdevice = hdevice;
|
||||
buffer->size = size;
|
||||
buffer->io_addr = io_addr;
|
||||
buffer->hdevice = hdevice;
|
||||
buffer->size = asize;
|
||||
|
||||
*hbuffer = buffer;
|
||||
|
||||
|
@ -265,18 +266,18 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
|
|||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
|
||||
size_t asize = align_size(size, line_size);
|
||||
|
||||
// check alignment
|
||||
if (!is_aligned(dev_maddr, line_size))
|
||||
return -1;
|
||||
if (!is_aligned(size, line_size))
|
||||
return -1;
|
||||
if (!is_aligned(buffer->io_addr + src_offset, line_size))
|
||||
return -1;
|
||||
|
||||
// bound checking
|
||||
if (size + src_offset > buffer->size)
|
||||
if (src_offset + asize > buffer->size)
|
||||
return -1;
|
||||
if (dev_maddr + size > dev_mem_size)
|
||||
if (dev_maddr + asize > dev_mem_size)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
|
@ -287,7 +288,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
|
|||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr >> ls_shift) ));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
|
@ -308,18 +309,18 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
|
|||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
|
||||
size_t asize = align_size(size, line_size);
|
||||
|
||||
// check alignment
|
||||
if (!is_aligned(dev_maddr, line_size))
|
||||
return -1;
|
||||
if (!is_aligned(size, line_size))
|
||||
return -1;
|
||||
if (!is_aligned(buffer->io_addr + dest_offset, line_size))
|
||||
return -1;
|
||||
|
||||
// bound checking
|
||||
if (size + dest_offset > buffer->size)
|
||||
if (dest_offset + asize > buffer->size)
|
||||
return -1;
|
||||
if (dev_maddr + size > dev_mem_size)
|
||||
if (dev_maddr + asize > dev_mem_size)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
|
@ -330,7 +331,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
|
|||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
|
@ -347,13 +348,13 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
|||
|
||||
vx_device_t* device = ((vx_device_t*)hdevice);
|
||||
|
||||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
|
||||
size_t asize = align_size(size, line_size);
|
||||
|
||||
// check alignment
|
||||
if (!is_aligned(dev_maddr, line_size))
|
||||
return -1;
|
||||
if (!is_aligned(size, line_size))
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
|
@ -362,7 +363,7 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
|||
auto ls_shift = (int)std::log2(line_size);
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
|
|
|
@ -4,6 +4,16 @@
|
|||
|
||||
int test = -1;
|
||||
|
||||
#define RT_CHECK(_expr) \
|
||||
do { \
|
||||
int _ret = _expr; \
|
||||
if (0 == _ret) \
|
||||
break; \
|
||||
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} while (false)
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "t:h?")) != -1) {
|
||||
|
@ -27,12 +37,27 @@ uint64_t shuffle(int i, uint64_t value) {
|
|||
return (value << i) | (value & ((1 << i)-1));;
|
||||
}
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h sbuf = nullptr;
|
||||
vx_buffer_h dbuf = nullptr;
|
||||
|
||||
void cleanup() {
|
||||
if (sbuf) {
|
||||
vx_buf_release(sbuf);
|
||||
}
|
||||
if (dbuf) {
|
||||
vx_buf_release(dbuf);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int run_memcopy_test(vx_buffer_h sbuf,
|
||||
vx_buffer_h dbuf,
|
||||
uint32_t address,
|
||||
uint64_t value,
|
||||
int num_blocks) {
|
||||
int ret;
|
||||
int errors = 0;
|
||||
|
||||
// write sbuf data
|
||||
|
@ -42,15 +67,11 @@ int run_memcopy_test(vx_buffer_h sbuf,
|
|||
|
||||
// write buffer to local memory
|
||||
std::cout << "write buffer to local memory" << std::endl;
|
||||
ret = vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
RT_CHECK(vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0));
|
||||
|
||||
// read buffer from local memory
|
||||
std::cout << "read buffer from local memory" << std::endl;
|
||||
ret = vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
RT_CHECK(vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
|
@ -77,7 +98,6 @@ int run_kernel_test(vx_device_h device,
|
|||
vx_buffer_h sbuf,
|
||||
vx_buffer_h dbuf,
|
||||
const char* program) {
|
||||
int ret;
|
||||
int errors = 0;
|
||||
|
||||
uint64_t seed = 0x0badf00d40ff40ff;
|
||||
|
@ -93,43 +113,27 @@ int run_kernel_test(vx_device_h device,
|
|||
|
||||
// write buffer to local memory
|
||||
std::cout << "write buffer to local memory" << std::endl;
|
||||
ret = vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
RT_CHECK(vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0));
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
ret = vx_upload_kernel_file(device, program);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_upload_kernel_file(device, program));
|
||||
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
ret = vx_start(device);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_start(device));
|
||||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
ret = vx_ready_wait(device, -1);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
|
||||
// flush the caches
|
||||
std::cout << "flush the caches" << std::endl;
|
||||
ret = vx_flush_caches(device, dest_dev_addr, 64 * num_blocks);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_flush_caches(device, dest_dev_addr, 64 * num_blocks));
|
||||
|
||||
// read buffer from local memory
|
||||
std::cout << "read buffer from local memory" << std::endl;
|
||||
ret = vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
RT_CHECK(vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
|
@ -152,75 +156,33 @@ int run_kernel_test(vx_device_h device,
|
|||
return 0;
|
||||
}
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h sbuf = nullptr;
|
||||
vx_buffer_h dbuf = nullptr;
|
||||
|
||||
void cleanup() {
|
||||
if (sbuf) {
|
||||
vx_buf_release(sbuf);
|
||||
}
|
||||
if (dbuf) {
|
||||
vx_buf_release(dbuf);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int ret;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
vx_device_h device;
|
||||
ret = vx_dev_open(&device);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
// create source buffer
|
||||
std::cout << "create source buffer" << std::endl;
|
||||
ret = vx_alloc_shared_mem(device, 4096, &sbuf);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_alloc_shared_mem(device, 4096, &sbuf));
|
||||
|
||||
// create destination buffer
|
||||
std::cout << "create destination buffer" << std::endl;
|
||||
ret = vx_alloc_shared_mem(device, 4096, &dbuf);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_alloc_shared_mem(device, 4096, &dbuf));
|
||||
|
||||
// run tests
|
||||
if (0 == test || -1 == test) {
|
||||
/*9if (0 == test || -1 == test) {
|
||||
std::cout << "run memcopy test" << std::endl;
|
||||
|
||||
ret = run_memcopy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = run_memcopy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
RT_CHECK(run_memcopy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1));
|
||||
RT_CHECK(run_memcopy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8));
|
||||
}*/
|
||||
|
||||
if (1 == test || -1 == test) {
|
||||
std::cout << "run kernel test" << std::endl;
|
||||
ret = run_kernel_test(device, sbuf, dbuf, "kernel.bin");
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(run_kernel_test(device, sbuf, dbuf, "kernel.bin"));
|
||||
}
|
||||
|
||||
// cleanup
|
||||
|
|
|
@ -4,6 +4,16 @@
|
|||
#include <vortex.h>
|
||||
#include "common.h"
|
||||
|
||||
#define RT_CHECK(_expr) \
|
||||
do { \
|
||||
int _ret = _expr; \
|
||||
if (0 == _ret) \
|
||||
break; \
|
||||
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} while (false)
|
||||
|
||||
const char* program_file = "kernel.bin";
|
||||
uint32_t data_stride = 0xffffffff;
|
||||
|
||||
|
@ -39,40 +49,38 @@ static void parse_args(int argc, char **argv) {
|
|||
}
|
||||
}
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h buffer = nullptr;
|
||||
|
||||
void cleanup() {
|
||||
if (buffer) {
|
||||
vx_buf_release(buffer);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int run_test(vx_device_h device,
|
||||
vx_buffer_h buffer,
|
||||
const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t num_points) {
|
||||
int ret;
|
||||
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
ret = vx_start(device);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_start(device));
|
||||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
ret = vx_ready_wait(device, -1);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
|
||||
// flush the destination buffer caches
|
||||
std::cout << "flush the destination buffer caches" << std::endl;
|
||||
ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
|
@ -96,20 +104,7 @@ int run_test(vx_device_h device,
|
|||
return 0;
|
||||
}
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h buffer = nullptr;
|
||||
|
||||
void cleanup() {
|
||||
if (buffer) {
|
||||
vx_buf_release(buffer);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int ret;
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
|
@ -132,50 +127,28 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
ret = vx_dev_open(&device);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
ret = vx_upload_kernel_file(device, program_file);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_upload_kernel_file(device, program_file));
|
||||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src0_ptr = value;
|
||||
|
||||
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src1_ptr = value;
|
||||
|
||||
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||
ret = vx_alloc_shared_mem(device, alloc_size, &buffer);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
|
||||
|
||||
// populate source buffer values
|
||||
std::cout << "populate source buffer values" << std::endl;
|
||||
|
@ -187,19 +160,9 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
|
||||
// upload source buffers
|
||||
std::cout << "upload source buffers" << std::endl;
|
||||
|
||||
ret = vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
std::cout << "upload source buffers" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
|
@ -210,20 +173,12 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||
ret = vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
}
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
ret = run_test(device, buffer, kernel_arg, buf_size, num_points);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
RT_CHECK(run_test(device, buffer, kernel_arg, buf_size, num_points));
|
||||
|
||||
// cleanup
|
||||
std::cout << "cleanup" << std::endl;
|
||||
|
|
|
@ -42,6 +42,8 @@ make
|
|||
# ASE build instructions
|
||||
#
|
||||
|
||||
source /export/fpga/bin/setup-fpga-env fpga-pac-a10
|
||||
|
||||
# Acquire a sever node for running ASE simulations
|
||||
qsub-sim
|
||||
|
||||
|
@ -51,7 +53,17 @@ vcd add -r /*/Vortex/hw/rtl/*
|
|||
run -all
|
||||
|
||||
# compress VCD trace
|
||||
tar -zcvf vortex.vcd.tar.gz work/vortex.vcd
|
||||
tar -zcvf vortex.vcd.tar.gz ./build_ase/work/vortex.vcd
|
||||
|
||||
# decompress VCD trace
|
||||
tar -zxvf vortex.vcd.tar.gz vortex.vcd
|
||||
tar -zxvf /mnt/c/Users/Blaise/Downloads/vortex.vcd.tar.gz
|
||||
|
||||
# launch Gtkwave
|
||||
gtkwave ./build_ase/work/vortex.vcd &
|
||||
|
||||
# test
|
||||
./run_ase.sh ../../driver/tests/basic/basic
|
||||
|
||||
# kill process by Users
|
||||
ps -u tinebp
|
||||
kill -9 <pid>
|
|
@ -14,17 +14,19 @@ rm -rf $ASE_WORKDIR/.app_lock.pid $ASE_WORKDIR/.ase_ready.pid
|
|||
|
||||
# Start Simulator in background
|
||||
pushd $SCRIPT_DIR/build_ase
|
||||
make sim &
|
||||
echo " [DBG] starting ASE simnulator"
|
||||
nohup make sim &
|
||||
popd
|
||||
|
||||
# Wait for simulator readiness
|
||||
# When .ase_ready is created in the $ASE_WORKDIR, ASE is ready for simulation
|
||||
while [! -f $ASE_WORKDIR/.ase_ready.pid]
|
||||
while [ ! -f $ASE_WORKDIR/.ase_ready.pid ]
|
||||
do
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# run application
|
||||
pushd $PROGRAM_DIR
|
||||
echo " [DBG] running ./$PROGRAM $*"
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$LD_LIBRARY_PATH ./$PROGRAM $*
|
||||
popd
|
|
@ -28,7 +28,12 @@ module vortex_afu #(
|
|||
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
|
||||
);
|
||||
|
||||
localparam DRAM_ADDR_WIDTH = (32 - `CLOG2(`GLOBAL_BLOCK_SIZE));
|
||||
localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr);
|
||||
localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data);
|
||||
localparam DRAM_TAG_WIDTH = `L3DRAM_TAG_WIDTH;
|
||||
|
||||
`STATIC_ASSERT(DRAM_ADDR_WIDTH == `L3DRAM_ADDR_WIDTH, "invalid vortex dram bus!")
|
||||
`STATIC_ASSERT(DRAM_LINE_WIDTH == `L3DRAM_LINE_WIDTH, "invalid vortex dram bus!")
|
||||
|
||||
localparam AVS_RD_QUEUE_SIZE = 16;
|
||||
|
||||
|
@ -58,6 +63,7 @@ typedef enum logic[3:0] {
|
|||
STATE_IDLE,
|
||||
STATE_READ,
|
||||
STATE_WRITE,
|
||||
STATE_START,
|
||||
STATE_RUN,
|
||||
STATE_CLFLUSH
|
||||
} state_t;
|
||||
|
@ -72,13 +78,13 @@ state_t state;
|
|||
logic vx_dram_req_read;
|
||||
logic vx_dram_req_write;
|
||||
logic [DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
|
||||
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_req_data;
|
||||
logic [`L3DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
|
||||
logic [DRAM_LINE_WIDTH-1:0] vx_dram_req_data;
|
||||
logic [DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
|
||||
logic vx_dram_req_ready;
|
||||
|
||||
logic vx_dram_rsp_valid;
|
||||
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_rsp_data;
|
||||
logic [`L3DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
|
||||
logic [DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data;
|
||||
logic [DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
|
||||
logic vx_dram_rsp_ready;
|
||||
|
||||
logic vx_snp_req_valid;
|
||||
|
@ -90,9 +96,9 @@ logic vx_busy;
|
|||
// AVS Queues /////////////////////////////////////////////////////////////////
|
||||
|
||||
logic avs_rtq_push;
|
||||
t_local_mem_addr avs_rtq_din;
|
||||
logic [DRAM_TAG_WIDTH-1:0] avs_rtq_din;
|
||||
logic avs_rtq_pop;
|
||||
t_local_mem_addr avs_rtq_dout;
|
||||
logic [DRAM_TAG_WIDTH-1:0] avs_rtq_dout;
|
||||
logic avs_rtq_empty;
|
||||
logic avs_rtq_full;
|
||||
|
||||
|
@ -229,7 +235,7 @@ begin
|
|||
CMD_TYPE_RUN: begin
|
||||
$display("%t: STATE START", $time);
|
||||
vx_reset <= 1;
|
||||
state <= STATE_RUN;
|
||||
state <= STATE_START;
|
||||
end
|
||||
CMD_TYPE_CLFLUSH: begin
|
||||
$display("%t: STATE CFLUSH: da=%h sz=%0d", $time, csr_mem_addr, csr_data_size);
|
||||
|
@ -250,6 +256,10 @@ begin
|
|||
end
|
||||
end
|
||||
|
||||
STATE_START: begin // vortex reset cycle
|
||||
state <= STATE_RUN;
|
||||
end
|
||||
|
||||
STATE_RUN: begin
|
||||
if (!vx_busy) begin
|
||||
state <= STATE_IDLE;
|
||||
|
@ -271,7 +281,7 @@ end
|
|||
logic cci_rdq_empty;
|
||||
t_cci_rdq_data cci_rdq_dout;
|
||||
logic cci_rdq_pop;
|
||||
logic [`L3DRAM_TAG_WIDTH-1:0] dram_req_tag;
|
||||
logic [DRAM_TAG_WIDTH-1:0] dram_req_tag;
|
||||
|
||||
t_ccip_clAddr next_avs_address;
|
||||
always_comb
|
||||
|
@ -372,7 +382,7 @@ end
|
|||
|
||||
always_comb
|
||||
begin
|
||||
vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty && vx_dram_rsp_ready;
|
||||
vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty;
|
||||
vx_dram_rsp_tag = avs_rtq_dout;
|
||||
vx_dram_rsp_data = avs_rdq_dout;
|
||||
end
|
||||
|
@ -389,7 +399,7 @@ begin
|
|||
end
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW($bits(t_local_mem_addr)),
|
||||
.DATAW(DRAM_TAG_WIDTH),
|
||||
.SIZE(AVS_RD_QUEUE_SIZE)
|
||||
) avs_rd_req_queue (
|
||||
.clk (clk),
|
||||
|
@ -412,7 +422,7 @@ begin
|
|||
end
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW($bits(t_local_mem_data)),
|
||||
.DATAW(DRAM_LINE_WIDTH),
|
||||
.SIZE(AVS_RD_QUEUE_SIZE)
|
||||
) avs_rd_rsp_queue (
|
||||
.clk (clk),
|
||||
|
@ -595,30 +605,46 @@ end
|
|||
// Vortex binding /////////////////////////////////////////////////////////////
|
||||
|
||||
Vortex_Socket #() vx_socket (
|
||||
.clk (clk),
|
||||
.reset (SoftReset || vx_reset),
|
||||
.clk (clk),
|
||||
.reset (vx_reset),
|
||||
|
||||
// DRAM Req
|
||||
.dram_req_write (vx_dram_req_write),
|
||||
.dram_req_read (vx_dram_req_read),
|
||||
.dram_req_addr (vx_dram_req_addr),
|
||||
.dram_req_data (vx_dram_req_data),
|
||||
.dram_req_tag (vx_dram_req_tag),
|
||||
.dram_req_ready (vx_dram_req_ready),
|
||||
// DRAM request
|
||||
.dram_req_write (vx_dram_req_write),
|
||||
.dram_req_read (vx_dram_req_read),
|
||||
.dram_req_addr (vx_dram_req_addr),
|
||||
.dram_req_data (vx_dram_req_data),
|
||||
.dram_req_tag (vx_dram_req_tag),
|
||||
.dram_req_ready (vx_dram_req_ready),
|
||||
|
||||
// DRAM Rsp
|
||||
.dram_rsp_valid (vx_dram_rsp_valid),
|
||||
.dram_rsp_data (vx_dram_rsp_data),
|
||||
.dram_rsp_tag (vx_dram_rsp_tag),
|
||||
.dram_rsp_ready (vx_dram_rsp_ready),
|
||||
// DRAM response
|
||||
.dram_rsp_valid (vx_dram_rsp_valid),
|
||||
.dram_rsp_data (vx_dram_rsp_data),
|
||||
.dram_rsp_tag (vx_dram_rsp_tag),
|
||||
.dram_rsp_ready (vx_dram_rsp_ready),
|
||||
|
||||
// Cache Snooping Req
|
||||
.snp_req_valid (vx_snp_req_valid),
|
||||
.snp_req_addr (vx_snp_req_addr),
|
||||
.snp_req_ready (vx_snp_req_ready),
|
||||
// Cache snooping
|
||||
.snp_req_valid (vx_snp_req_valid),
|
||||
.snp_req_addr (vx_snp_req_addr),
|
||||
.snp_req_ready (vx_snp_req_ready),
|
||||
|
||||
// I/O request
|
||||
.io_req_read (),
|
||||
.io_req_write (),
|
||||
.io_req_addr (),
|
||||
.io_req_data (),
|
||||
.io_req_byteen (),
|
||||
.io_req_tag (),
|
||||
.io_req_ready (0),
|
||||
|
||||
// I/O response
|
||||
.io_rsp_valid (0),
|
||||
.io_rsp_data (0),
|
||||
.io_rsp_tag (0),
|
||||
.io_rsp_ready (),
|
||||
|
||||
// status
|
||||
.busy (vx_busy)
|
||||
.busy (vx_busy),
|
||||
.ebreak ()
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -8,12 +8,8 @@ module VX_back_end #(
|
|||
|
||||
input wire schedule_delay,
|
||||
|
||||
VX_cache_core_rsp_if dcache_rsp_if,
|
||||
VX_cache_core_req_if dcache_req_if,
|
||||
|
||||
output wire mem_delay,
|
||||
output wire exec_delay,
|
||||
output wire gpr_stage_delay,
|
||||
VX_cache_core_rsp_if dcache_rsp_if,
|
||||
VX_jal_rsp_if jal_rsp_if,
|
||||
VX_branch_rsp_if branch_rsp_if,
|
||||
|
||||
|
@ -22,6 +18,10 @@ module VX_back_end #(
|
|||
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
|
||||
output wire mem_delay,
|
||||
output wire exec_delay,
|
||||
output wire gpr_stage_delay,
|
||||
|
||||
output wire ebreak
|
||||
);
|
||||
|
||||
|
@ -78,8 +78,8 @@ module VX_back_end #(
|
|||
.reset (reset),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.mem_wb_if (mem_wb_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
.dcache_req_if (dcache_req_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
.delay (mem_delay),
|
||||
.no_slot_mem (no_slot_mem)
|
||||
);
|
||||
|
|
|
@ -35,8 +35,11 @@ module VX_dmem_ctrl (
|
|||
.CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
|
||||
) dcache_core_rsp_qual_if(), smem_core_rsp_if();
|
||||
|
||||
// use "case equality" to handle uninitialized address value
|
||||
wire smem_select = ((dcache_core_req_if.core_req_addr[0][31:24] == `SHARED_MEM_TOP_ADDR) === 1'b1);
|
||||
|
||||
VX_dcache_io_arb dcache_io_arb (
|
||||
.io_select (dcache_core_req_if.core_req_addr[0][31:24] == `SHARED_MEM_TOP_ADDR),
|
||||
.io_select (smem_select),
|
||||
.core_req_if (dcache_core_req_if),
|
||||
.dcache_core_req_if (dcache_core_req_qual_if),
|
||||
.io_core_req_if (smem_core_req_if),
|
||||
|
|
|
@ -9,19 +9,24 @@ module VX_icache_stage (
|
|||
output wire[`NUM_THREADS-1:0] icache_stage_valids,
|
||||
VX_inst_meta_if fe_inst_meta_fi,
|
||||
VX_inst_meta_if fe_inst_meta_id,
|
||||
|
||||
VX_cache_core_rsp_if icache_rsp_if,
|
||||
VX_cache_core_req_if icache_req_if
|
||||
|
||||
VX_cache_core_req_if icache_req_if,
|
||||
VX_cache_core_rsp_if icache_rsp_if
|
||||
);
|
||||
|
||||
reg[`NUM_THREADS-1:0] pending_threads[`NUM_WARPS-1:0];
|
||||
reg [`NUM_THREADS-1:0] valid_threads [`NUM_WARPS-1:0];
|
||||
|
||||
wire valid_inst = (| fe_inst_meta_fi.valid);
|
||||
|
||||
`DEBUG_BEGIN
|
||||
wire [`CORE_REQ_TAG_WIDTH-1:0] core_req_tag = icache_req_if.core_req_tag;
|
||||
wire [`CORE_REQ_TAG_WIDTH-1:0] core_rsp_tag = icache_rsp_if.core_rsp_tag;
|
||||
`DEBUG_END
|
||||
|
||||
// Icache Request
|
||||
assign icache_req_if.core_req_valid = valid_inst && !total_freeze;
|
||||
assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc;
|
||||
assign icache_req_if.core_req_data = 32'b0;
|
||||
assign icache_req_if.core_req_data = 'z;
|
||||
assign icache_req_if.core_req_read = `BYTE_EN_LW;
|
||||
assign icache_req_if.core_req_write = `BYTE_EN_NO;
|
||||
assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num};
|
||||
|
@ -33,8 +38,8 @@ module VX_icache_stage (
|
|||
|
||||
assign {fe_inst_meta_id.inst_pc, rsp_wb, rsp_rd, fe_inst_meta_id.warp_num} = icache_rsp_if.core_rsp_tag;
|
||||
|
||||
assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_data[0][31:0];
|
||||
assign fe_inst_meta_id.valid = icache_rsp_if.core_rsp_valid ? pending_threads[fe_inst_meta_id.warp_num] : 0;
|
||||
assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_data[0];
|
||||
assign fe_inst_meta_id.valid = icache_rsp_if.core_rsp_valid ? valid_threads[fe_inst_meta_id.warp_num] : 0;
|
||||
|
||||
assign icache_stage_wid = fe_inst_meta_id.warp_num;
|
||||
assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
|
||||
|
@ -49,12 +54,10 @@ module VX_icache_stage (
|
|||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (i = 0; i < `NUM_WARPS; i = i + 1) begin
|
||||
pending_threads[i] <= 0;
|
||||
end
|
||||
//--
|
||||
end else begin
|
||||
if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin
|
||||
pending_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
|
||||
valid_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -11,8 +11,8 @@ module VX_lsu_unit (
|
|||
VX_wb_if mem_wb_if,
|
||||
|
||||
// Dcache interface
|
||||
VX_cache_core_rsp_if dcache_rsp_if,
|
||||
VX_cache_core_req_if dcache_req_if,
|
||||
VX_cache_core_rsp_if dcache_rsp_if,
|
||||
|
||||
output wire delay
|
||||
);
|
||||
|
|
|
@ -244,8 +244,11 @@ module Vortex #(
|
|||
.icache_dram_rsp_if (icache_dram_rsp_if)
|
||||
);
|
||||
|
||||
// use "case equality" to handle uninitialized address value
|
||||
wire io_select = ((dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR) === 1'b1);
|
||||
|
||||
VX_dcache_io_arb dcache_io_arb (
|
||||
.io_select (dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR),
|
||||
.io_select (io_select),
|
||||
.core_req_if (dcache_io_core_req_if),
|
||||
.dcache_core_req_if (dcache_core_req_if),
|
||||
.io_core_req_if (io_core_req_if),
|
||||
|
|
14
hw/rtl/cache/VX_tag_data_access.v
vendored
14
hw/rtl/cache/VX_tag_data_access.v
vendored
|
@ -90,13 +90,11 @@ module VX_tag_data_access #(
|
|||
|
||||
wire fill_sent;
|
||||
wire invalidate_line;
|
||||
wire tags_match;
|
||||
|
||||
wire real_writefill = writefill_st1e
|
||||
&& ((valid_req_st1e
|
||||
&& !use_read_valid_st1e)
|
||||
|| (valid_req_st1e
|
||||
&& use_read_valid_st1e
|
||||
&& (writeaddr_st1e[`TAG_LINE_ADDR_RNG] != use_read_tag_st1e)));
|
||||
&& ((valid_req_st1e && !use_read_valid_st1e)
|
||||
|| (valid_req_st1e && use_read_valid_st1e && !tags_match));
|
||||
|
||||
VX_tag_data_structure #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
|
@ -256,14 +254,14 @@ module VX_tag_data_access #(
|
|||
|
||||
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = force_write ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : use_write_dat;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
assign use_write_enable = (writefill_st1e && !real_writefill) ? 0 : we;
|
||||
assign use_write_data = data_write;
|
||||
|
||||
wire[`TAG_SELECT_BITS-1:0] writeaddr_tag = writeaddr_st1e[`TAG_LINE_ADDR_RNG];
|
||||
|
||||
wire tags_match = writeaddr_tag == use_read_tag_st1e;
|
||||
// use "case equality" to handle uninitialized tag when block entry is not valid
|
||||
assign tags_match = ((writeaddr_st1e[`TAG_LINE_ADDR_RNG] == use_read_tag_st1e) === 1'b1);
|
||||
|
||||
wire snoop_hit = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && use_read_dirty_st1e;
|
||||
wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue