scope refactoring

This commit is contained in:
Blaise Tine 2020-06-13 11:47:28 -07:00
parent 4fa540575c
commit 75af29febb
9 changed files with 268 additions and 230 deletions

View file

@ -17,6 +17,7 @@ CXXFLAGS +=-fstack-protector
# Position independent code
CXXFLAGS += -fPIC
# Enable scope analyzer
CXXFLAGS += -DSCOPE
LDFLAGS += -luuid
@ -37,7 +38,7 @@ PROJECT_ASE = $(ASE_DIR)/libvortex.so
AFU_JSON_INFO = vortex_afu.h
SRCS = vortex.cpp ../common/vx_utils.cpp
SRCS = vortex.cpp scope.cpp ../common/vx_utils.cpp
all: $(PROJECT) $(PROJECT_ASE)

223
driver/opae/scope.cpp Normal file
View file

@ -0,0 +1,223 @@
#include <iostream>
#include <fstream>
#include <thread>
#include <chrono>
#include <vector>
#include <assert.h>
#include "scope.h"
#include "vortex_afu.h"
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("OPAE Error: '%s' returned %d, %s!\n", \
#_expr, (int)res, fpgaErrStr(res)); \
return -1; \
} while (false)
#define MMIO_CSR_SCOPE_CMD (AFU_IMAGE_MMIO_CSR_SCOPE_CMD * 4)
#define MMIO_CSR_SCOPE_DATA (AFU_IMAGE_MMIO_CSR_SCOPE_DATA * 4)
struct scope_signal_t {
int width;
const char* name;
};
static const scope_signal_t scope_signals[] = {
{ 32, "icache_req_addr" },
{ 2, "icache_req_warp_num" },
{ 2, "icache_req_tag" },
{ 32, "icache_rsp_data" },
{ 2, "icache_rsp_tag" },
{ 32, "dcache_req_addr" },
{ 2, "dcache_req_warp_num" },
{ 2, "dcache_req_tag" },
{ 32, "dcache_rsp_data" },
{ 2 , "dcache_rsp_tag" },
{ 32, "dram_req_addr" },
{ 29, "dram_req_tag" },
{ 29, "dram_rsp_tag" },
{ 32, "snp_req_addr" },
{ 1, "snp_req_invalidate" },
{ 16, "snp_req_tag" },
{ 16, "snp_rsp_tag" },
{ 2, "decode_warp_num" },
{ 32, "decode_curr_PC" },
{ 1, "decode_is_jal" },
{ 5, "decode_rs1" },
{ 5, "decode_rs2" },
{ 2, "execute_warp_num" },
{ 5, "execute_rd" },
{ 32, "execute_a" },
{ 32, "execute_b" },
{ 2, "writeback_warp_num" },
{ 2, "writeback_wb" },
{ 5, "writeback_rd" },
{ 32, "writeback_data" },
{ 1, "icache_req_valid" },
{ 1, "icache_req_ready" },
{ 1, "icache_rsp_valid" },
{ 1, "icache_rsp_ready" },
{ 4, "dcache_req_valid" },
{ 1, "dcache_req_ready" },
{ 4, "dcache_rsp_valid" },
{ 1, "dcache_rsp_ready" },
{ 1, "dram_req_valid" },
{ 1, "dram_req_ready" },
{ 1, "dram_rsp_valid" },
{ 1, "dram_rsp_ready" },
{ 1, "snp_req_valid" },
{ 1, "snp_req_ready" },
{ 1, "snp_rsp_valid" },
{ 1, "snp_rsp_ready" },
{ 4, "decode_valid" },
{ 4, "execute_valid" },
{ 4, "writeback_valid" },
{ 1, "schedule_delay" },
{ 1, "memory_delay" },
{ 1, "exec_delay" },
{ 1, "gpr_stage_delay" },
};
static const int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t);
int vx_scope_start(fpga_handle hfpga, uint64_t delay) {
if (nullptr == hfpga)
return -1;
// set start delay
uint64_t cmd_delay = ((delay << 3) | 4);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, cmd_delay));
return 0;
}
int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
if (nullptr == hfpga)
return -1;
// stop recording
uint64_t cmd_stop = ((delay << 3) | 5);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, cmd_stop));
std::ofstream ofs("vx_scope.vcd");
ofs << "$timescale 1 ns $end" << std::endl;
ofs << "$var reg 1 0 clk $end" << std::endl;
int fwidth = 0;
for (int i = 0; i < num_signals; ++i) {
ofs << "$var reg " << scope_signals[i].width << " " << (i+1) << " " << scope_signals[i].name << " $end" << std::endl;
fwidth += scope_signals[i].width;
}
ofs << "enddefinitions $end" << std::endl;
uint64_t frame_width, max_frames, data_valid;
// wait for recording to terminate
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 0));
do {
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid));
if (data_valid)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
} while (true);
std::cout << "scope trace dump begin..." << std::endl;
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 2));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &frame_width));
std::cout << "scope::frame_width=" << frame_width << std::endl;
assert(fwidth == (int)frame_width);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 3));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &max_frames));
std::cout << "scope::max_frames=" << max_frames << std::endl;
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 1));
std::vector<char> signal_data(frame_width+1);
uint64_t frame_offset = 0;
uint64_t frame_no = 0;
uint64_t timestamp = 0;
int signal_id = 0;
int signal_offset = 0;
auto print_header = [&] () {
ofs << '#' << timestamp++ << std::endl;
ofs << "b0 0" << std::endl;
ofs << '#' << timestamp++ << std::endl;
ofs << "b1 0" << std::endl;
uint64_t delta;
fpga_result res = fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &delta);
assert(res == FPGA_OK);
while (delta != 0) {
ofs << '#' << timestamp++ << std::endl;
ofs << "b0 0" << std::endl;
ofs << '#' << timestamp++ << std::endl;
ofs << "b1 0" << std::endl;
--delta;
}
signal_id = num_signals;
};
print_header();
do {
if (frame_no == (max_frames-1)) {
// verify last frame is valid
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 0));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid));
assert(data_valid == 1);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 1));
}
uint64_t word;
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &word));
do {
int signal_width = scope_signals[signal_id-1].width;
int word_offset = frame_offset % 64;
signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0';
++signal_offset;
++frame_offset;
if (signal_offset == signal_width) {
signal_data[signal_width] = 0; // string null termination
ofs << 'b' << signal_data.data() << ' ' << signal_id << std::endl;
signal_offset = 0;
--signal_id;
}
if (frame_offset == frame_width) {
assert(0 == signal_offset);
frame_offset = 0;
++frame_no;
if (frame_no != max_frames) {
print_header();
}
}
} while ((frame_offset % 64) != 0);
} while (frame_no != max_frames);
std::cout << "scope trace dump done!" << std::endl;
// verify data not valid
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 0));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid));
assert(data_valid == 0);
return 0;
}

7
driver/opae/scope.h Normal file
View file

@ -0,0 +1,7 @@
#pragma once
#include <opae/fpga.h>
int vx_scope_start(fpga_handle hfpga, uint64_t delay);
int vx_scope_stop(fpga_handle hfpga, uint64_t delay);

View file

@ -4,16 +4,13 @@
#include <unistd.h>
#include <assert.h>
#include <cmath>
#include <thread>
#include <future>
#include <chrono>
#include <fstream>
#include <iostream>
#include <vector>
#include <uuid/uuid.h>
#include <opae/fpga.h>
#include <vortex.h>
#include "vortex_afu.h"
#ifdef SCOPE
#include "scope.h"
#endif
#define CHECK_RES(_expr) \
do { \
@ -37,8 +34,6 @@
#define MMIO_CSR_MEM_ADDR (AFU_IMAGE_MMIO_CSR_MEM_ADDR * 4)
#define MMIO_CSR_DATA_SIZE (AFU_IMAGE_MMIO_CSR_DATA_SIZE * 4)
#define MMIO_CSR_STATUS (AFU_IMAGE_MMIO_CSR_STATUS * 4)
#define MMIO_CSR_SCOPE_CMD (AFU_IMAGE_MMIO_CSR_SCOPE_CMD * 4)
#define MMIO_CSR_SCOPE_DATA (AFU_IMAGE_MMIO_CSR_SCOPE_DATA * 4)
///////////////////////////////////////////////////////////////////////////////
@ -67,203 +62,6 @@ inline bool is_aligned(size_t addr, size_t alignment) {
///////////////////////////////////////////////////////////////////////////////
struct scope_signal_t {
int width;
const char* name;
};
static const scope_signal_t scope_signals[] = {
{ 32, "icache_req_addr" },
{ 2, "icache_req_warp_num" },
{ 2, "icache_req_tag" },
{ 32, "icache_rsp_data" },
{ 2, "icache_rsp_tag" },
{ 32, "dcache_req_addr" },
{ 2, "dcache_req_warp_num" },
{ 2, "dcache_req_tag" },
{ 32, "dcache_rsp_data" },
{ 2 , "dcache_rsp_tag" },
{ 32, "dram_req_addr" },
{ 29, "dram_req_tag" },
{ 29, "dram_rsp_tag" },
{ 32, "snp_req_addr" },
{ 1, "snp_req_invalidate" },
{ 16, "snp_req_tag" },
{ 16, "snp_rsp_tag" },
{ 2, "decode_warp_num" },
{ 32, "decode_curr_PC" },
{ 1, "decode_is_jal" },
{ 5, "decode_rs1" },
{ 5, "decode_rs2" },
{ 2, "execute_warp_num" },
{ 5, "execute_rd" },
{ 32, "execute_a" },
{ 32, "execute_b" },
{ 2, "writeback_warp_num" },
{ 2, "writeback_wb" },
{ 5, "writeback_rd" },
{ 32, "writeback_data" },
{ 1, "icache_req_valid" },
{ 1, "icache_req_ready" },
{ 1, "icache_rsp_valid" },
{ 1, "icache_rsp_ready" },
{ 4, "dcache_req_valid" },
{ 1, "dcache_req_ready" },
{ 4, "dcache_rsp_valid" },
{ 1, "dcache_rsp_ready" },
{ 1, "dram_req_valid" },
{ 1, "dram_req_ready" },
{ 1, "dram_rsp_valid" },
{ 1, "dram_rsp_ready" },
{ 1, "snp_req_valid" },
{ 1, "snp_req_ready" },
{ 1, "snp_rsp_valid" },
{ 1, "snp_rsp_ready" },
{ 4, "decode_valid" },
{ 4, "execute_valid" },
{ 4, "writeback_valid" },
{ 1, "schedule_delay" },
{ 1, "memory_delay" },
{ 1, "exec_delay" },
{ 1, "gpr_stage_delay" },
};
static int vx_scope_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
// set start delay
uint64_t delay = 0;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, ((delay << 3) | 4)));
// start execution
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_RUN));
const int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t);
std::ofstream ofs("vx_scope.vcd");
ofs << "$timescale 1 ns $end" << std::endl;
ofs << "$var reg 1 0 clk $end" << std::endl;
int fwidth = 0;
for (int i = 0; i < num_signals; ++i) {
ofs << "$var reg " << scope_signals[i].width << " " << (i+1) << " " << scope_signals[i].name << " $end" << std::endl;
fwidth += scope_signals[i].width;
}
uint64_t frame_width, max_frames, data_valid;
ofs << "enddefinitions $end" << std::endl;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 0));
do {
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid));
if (data_valid)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
} while (true);
std::cout << "scope trace dump begin..." << std::endl;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 2));
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &frame_width));
std::cout << "scope::frame_width=" << frame_width << std::endl;
assert(fwidth == (int)frame_width);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 3));
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &max_frames));
std::cout << "scope::max_frames=" << max_frames << std::endl;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 1));
std::vector<char> signal_data(frame_width+1);
uint64_t frame_offset = 0, frame_no = 0, timestamp = 0;
int signal_id = 0;
int signal_offset = 0;
auto print_header = [&] () {
ofs << '#' << timestamp++ << std::endl;
ofs << "b0 0" << std::endl;
ofs << '#' << timestamp++ << std::endl;
ofs << "b1 0" << std::endl;
uint64_t delta;
fpga_result res = fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &delta);
assert(res == FPGA_OK);
while (delta != 0) {
ofs << '#' << timestamp++ << std::endl;
ofs << "b0 0" << std::endl;
ofs << '#' << timestamp++ << std::endl;
ofs << "b1 0" << std::endl;
--delta;
}
signal_id = num_signals;
};
print_header();
do {
if (frame_no == max_frames-1) {
// verify last frame is valid
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 0));
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid));
assert(data_valid == 1);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 1));
}
uint64_t word;
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &word));
do {
int signal_width = scope_signals[signal_id-1].width;
int word_offset = frame_offset % 64;
signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0';
++signal_offset;
++frame_offset;
if (signal_offset == signal_width) {
signal_data[signal_width] = 0; // string null termination
ofs << 'b' << signal_data.data() << ' ' << signal_id << std::endl;
signal_offset = 0;
--signal_id;
}
if (frame_offset == frame_width) {
assert(0 == signal_offset);
frame_offset = 0;
++frame_no;
if (frame_no != max_frames) {
print_header();
}
}
} while ((frame_offset % 64) != 0);
} while (frame_no != max_frames);
std::cout << "scope trace dump done!" << std::endl;
// verify data not valid
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 0));
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid));
assert(data_valid == 0);
return 0;
}
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_open(vx_device_h* hdevice) {
fpga_properties filter = nullptr;
fpga_result res;
@ -319,6 +117,14 @@ extern int vx_dev_open(vx_device_h* hdevice) {
device->mem_allocation = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
*hdevice = device;
#ifdef SCOPE
{
int ret = vx_scope_start(device->fpga, 0);
if (ret != 0)
return ret;
}
#endif
return 0;
}
@ -329,6 +135,10 @@ extern int vx_dev_close(vx_device_h hdevice) {
vx_device_t *device = ((vx_device_t*)hdevice);
#ifdef SCOPE
vx_scope_stop(device->fpga, 0);
#endif
fpgaClose(device->fpga);
free(device);
@ -590,15 +400,9 @@ extern int vx_start(vx_device_h hdevice) {
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
#ifdef SCOPE
int ret = vx_scope_start(hdevice);
if (ret != 0)
return ret;
#else
// start execution
vx_device_t *device = ((vx_device_t*)hdevice);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_RUN));
#endif
return 0;
}

View file

@ -42,6 +42,8 @@ vx_device_h device = nullptr;
vx_buffer_h sbuf = nullptr;
vx_buffer_h dbuf = nullptr;
int total_blocks = NUM_BLOCKS;
void cleanup() {
if (sbuf) {
vx_buf_release(sbuf);
@ -103,15 +105,15 @@ int run_memcopy_test(vx_buffer_h sbuf,
int run_kernel_test(vx_device_h device,
vx_buffer_h sbuf,
vx_buffer_h dbuf,
const char* program) {
const char* program,
int num_blocks) {
int errors = 0;
uint64_t seed = 0x0badf00d40ff40ff;
int src_dev_addr = DEV_MEM_SRC_ADDR;
int dest_dev_addr = DEV_MEM_DST_ADDR;
int num_blocks = NUM_BLOCKS;
// write sbuf data
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, seed);
@ -171,29 +173,30 @@ int main(int argc, char *argv[]) {
// parse command arguments
parse_args(argc, argv);
std::cout << "total blocks: " << total_blocks << std::endl;
// open device connection
std::cout << "open device connection" << std::endl;
vx_device_h device;
RT_CHECK(vx_dev_open(&device));
// create source buffer
std::cout << "create source buffer" << std::endl;
RT_CHECK(vx_alloc_shared_mem(device, 4096, &sbuf));
RT_CHECK(vx_alloc_shared_mem(device, total_blocks * 64, &sbuf));
// create destination buffer
std::cout << "create destination buffer" << std::endl;
RT_CHECK(vx_alloc_shared_mem(device, 4096, &dbuf));
RT_CHECK(vx_alloc_shared_mem(device, total_blocks * 64, &dbuf));
// run tests
if (0 == test || -1 == test) {
std::cout << "run memcopy test" << std::endl;
RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d00ff00ff, 1));
RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d40ff40ff, 64));
RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d40ff40ff, total_blocks));
}
if (1 == test || -1 == test) {
std::cout << "run kernel test" << std::endl;
RT_CHECK(run_kernel_test(device, sbuf, dbuf, "kernel.bin"));
RT_CHECK(run_kernel_test(device, sbuf, dbuf, "kernel.bin", total_blocks));
}
// cleanup

View file

@ -3,6 +3,6 @@
#define DEV_MEM_SRC_ADDR 0x10000040
#define DEV_MEM_DST_ADDR 0x20000080
#define NUM_BLOCKS 1
#define NUM_BLOCKS 2
#endif

Binary file not shown.

View file

@ -835,7 +835,7 @@ VX_scope #(
.clk (clk),
.reset (SoftReset),
.start (vx_reset),
.stop (cmd_run_done),
.stop (0),
.changed (force_changed),
.data_in ({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}),
.bus_in (csr_scope_cmd),

View file

@ -19,7 +19,7 @@ module VX_scope #(
input wire bus_read
);
localparam DELTA_ENABLE = (UPDW != 0);
localparam MAX_DELTA = (1**DELTAW)-1;
localparam MAX_DELTA = (1**DELTAW)-1;
typedef enum logic[2:0] {
CMD_GET_VALID,
@ -27,9 +27,9 @@ module VX_scope #(
CMD_GET_WIDTH,
CMD_GET_COUNT,
CMD_SET_DELAY,
CMD_SET_DURATION,
CMD_SET_RESERVED1,
CMD_SET_RESERVED2
CMD_SET_STOP,
CMD_RESERVED1,
CMD_RESERVED2
} cmd_t;
typedef enum logic[1:0] {
@ -81,9 +81,9 @@ module VX_scope #(
CMD_GET_VALID,
CMD_GET_DATA,
CMD_GET_WIDTH,
CMD_GET_COUNT: out_cmd <= $bits(out_cmd)'(cmd_type);
CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data);
CMD_SET_DURATION: waddr_end <= $bits(waddr)'(cmd_data);
CMD_GET_COUNT: out_cmd <= $bits(out_cmd)'(cmd_type);
CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data);
CMD_SET_STOP: waddr_end <= $bits(waddr)'(cmd_data);
default:;
endcase
end
@ -130,7 +130,7 @@ module VX_scope #(
end
if (stop
|| (waddr == waddr_end)) begin
|| (waddr >= waddr_end)) begin
waddr <= waddr; // keep last written address
recording <= 0;
data_valid <= 1;