mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
extending tracing feature for advanced debugging
This commit is contained in:
parent
640c98a4e8
commit
a60bfc5e01
7 changed files with 100 additions and 36 deletions
|
@ -10,8 +10,12 @@
|
|||
|
||||
#define ENABLE_MEM_STALLS
|
||||
|
||||
#ifndef TRACE_DELAY
|
||||
#define TRACE_DELAY 0
|
||||
#ifndef TRACE_START_TIME
|
||||
#define TRACE_START_TIME 0ull
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_STOP_TIME
|
||||
#define TRACE_STOP_TIME -1ull
|
||||
#endif
|
||||
|
||||
#ifndef MEM_LATENCY
|
||||
|
@ -30,8 +34,6 @@
|
|||
#define VERILATOR_RESET_VALUE 2
|
||||
#endif
|
||||
|
||||
uint64_t sim_trace_delay = TRACE_DELAY;
|
||||
|
||||
static uint64_t timestamp = 0;
|
||||
|
||||
double sc_time_stamp() {
|
||||
|
@ -55,6 +57,23 @@ static void __aligned_free(void *ptr) {
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static bool trace_enabled = false;
|
||||
static uint64_t trace_start_time = TRACE_START_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
|
||||
bool sim_trace_enabled() {
|
||||
if (timestamp >= trace_start_time
|
||||
&& timestamp < trace_stop_time)
|
||||
return true;
|
||||
return trace_enabled;
|
||||
}
|
||||
|
||||
void sim_trace_enable(bool enable) {
|
||||
trace_enabled = enable;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
opae_sim::opae_sim()
|
||||
: stop_(false)
|
||||
, host_buffer_ids_(0)
|
||||
|
@ -205,7 +224,7 @@ void opae_sim::step() {
|
|||
void opae_sim::eval() {
|
||||
vortex_afu_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
if (timestamp >= sim_trace_delay) {
|
||||
if (sim_trace_enabled()) {
|
||||
trace_->dump(timestamp);
|
||||
}
|
||||
#endif
|
||||
|
@ -349,7 +368,7 @@ void opae_sim::avs_bus() {
|
|||
}
|
||||
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, base_addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%0x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");*/
|
||||
}
|
||||
|
@ -360,6 +379,7 @@ void opae_sim::avs_bus() {
|
|||
mem_req.cycles_left = MEM_LATENCY;
|
||||
for (auto& rsp : mem_reads_[b]) {
|
||||
if (mem_req.addr == rsp.addr) {
|
||||
// duplicate requests receive the same cycle delay
|
||||
mem_req.cycles_left = rsp.cycles_left;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#pragma once
|
||||
|
||||
#include "verilated.h"
|
||||
//#include "verilated_stub.h"
|
||||
#include <verilated.h>
|
||||
#include "Vvortex_afu_shim.h"
|
||||
#include "Vvortex_afu_shim__Syms.h"
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@ inline size_t align_size(size_t size, size_t alignment) {
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device;
|
||||
|
||||
class vx_buffer {
|
||||
public:
|
||||
vx_buffer(size_t size, vx_device* device)
|
||||
|
@ -84,11 +83,11 @@ public:
|
|||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
/*printf("VXDRV: upload %d bytes from 0x%lx to 0x%lx", size, (uint8_t*)src + src_offset, dest_addr);
|
||||
if (size <= 1024) {
|
||||
printf(": ");
|
||||
for (int i = asize-1; i >= 0; --i) {
|
||||
printf("%x", *((uint8_t*)src + src_offset + i));
|
||||
/*printf("VXDRV: upload %ld bytes from 0x%lx:", size, uintptr_t((uint8_t*)src + src_offset));
|
||||
for (int i = 0; i < (asize / CACHE_BLOCK_SIZE); ++i) {
|
||||
printf("\n0x%08lx=", dest_addr + i * CACHE_BLOCK_SIZE);
|
||||
for (int j = 0; j < CACHE_BLOCK_SIZE; ++j) {
|
||||
printf("%02x", *((uint8_t*)src + src_offset + i * CACHE_BLOCK_SIZE + CACHE_BLOCK_SIZE - 1 - j));
|
||||
}
|
||||
}
|
||||
printf("\n");*/
|
||||
|
@ -104,11 +103,11 @@ public:
|
|||
|
||||
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
|
||||
|
||||
/*printf("VXDRV: download %d bytes from 0x%lx to 0x%lx", size, src_addr, (uint8_t*)dest + dest_offset);
|
||||
if (size <= 1024) {
|
||||
printf(": ");
|
||||
for (int i = asize-1; i >= 0; --i) {
|
||||
printf("%x", *((uint8_t*)dest + dest_offset + i));
|
||||
/*printf("VXDRV: download %ld bytes to 0x%lx:", size, uintptr_t((uint8_t*)dest + dest_offset));
|
||||
for (int i = 0; i < (asize / CACHE_BLOCK_SIZE); ++i) {
|
||||
printf("\n0x%08lx=", src_addr + i * CACHE_BLOCK_SIZE);
|
||||
for (int j = 0; j < CACHE_BLOCK_SIZE; ++j) {
|
||||
printf("%02x", *((uint8_t*)dest + dest_offset + i * CACHE_BLOCK_SIZE + CACHE_BLOCK_SIZE - 1 - j));
|
||||
}
|
||||
}
|
||||
printf("\n");*/
|
||||
|
|
|
@ -15,12 +15,13 @@ extern "C" {
|
|||
int dpi_register();
|
||||
void dpi_assert(int inst, bool cond, int delay);
|
||||
|
||||
void dpi_trace(const char* format, ...);
|
||||
void dpi_trace(const char* format, ...);
|
||||
void dpi_trace_start();
|
||||
void dpi_trace_stop();
|
||||
}
|
||||
|
||||
double sc_time_stamp();
|
||||
|
||||
extern uint64_t sim_trace_delay;
|
||||
bool sim_trace_enabled();
|
||||
void sim_trace_enable(bool enable);
|
||||
|
||||
class ShiftRegister {
|
||||
public:
|
||||
|
@ -141,12 +142,19 @@ void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder) {
|
|||
}
|
||||
}
|
||||
|
||||
void dpi_trace(const char* format, ...) {
|
||||
uint64_t timestamp = (uint64_t)sc_time_stamp();
|
||||
if (timestamp < sim_trace_delay)
|
||||
void dpi_trace(const char* format, ...) {
|
||||
if (!sim_trace_enabled())
|
||||
return;
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
vprintf(format, va);
|
||||
va_end(va);
|
||||
}
|
||||
|
||||
void dpi_trace_start() {
|
||||
sim_trace_enable(true);
|
||||
}
|
||||
|
||||
void dpi_trace_stop() {
|
||||
sim_trace_enable(false);
|
||||
}
|
|
@ -8,5 +8,7 @@ import "DPI-C" function int dpi_register();
|
|||
import "DPI-C" function void dpi_assert(int inst, input logic cond, input int delay);
|
||||
|
||||
import "DPI-C" function void dpi_trace(input string format /*verilator sformat*/);
|
||||
import "DPI-C" function void dpi_trace_start();
|
||||
import "DPI-C" function void dpi_trace_stop();
|
||||
|
||||
`endif
|
|
@ -5,8 +5,12 @@
|
|||
|
||||
#define ENABLE_MEM_STALLS
|
||||
|
||||
#ifndef TRACE_DELAY
|
||||
#define TRACE_DELAY 0
|
||||
#ifndef TRACE_START_TIME
|
||||
#define TRACE_START_TIME 0ull
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_STOP_TIME
|
||||
#define TRACE_STOP_TIME -1ull
|
||||
#endif
|
||||
|
||||
#ifndef MEM_LATENCY
|
||||
|
@ -28,14 +32,31 @@
|
|||
#define VL_WDATA_GETW(lwp, i, n, w) \
|
||||
VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w)
|
||||
|
||||
uint64_t sim_trace_delay = TRACE_DELAY;
|
||||
|
||||
static uint64_t timestamp = 0;
|
||||
|
||||
double sc_time_stamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static bool trace_enabled = false;
|
||||
static uint64_t trace_start_time = TRACE_START_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
|
||||
bool sim_trace_enabled() {
|
||||
if (timestamp >= trace_start_time
|
||||
&& timestamp < trace_stop_time)
|
||||
return true;
|
||||
return trace_enabled;
|
||||
}
|
||||
|
||||
void sim_trace_enable(bool enable) {
|
||||
trace_enabled = enable;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Simulator::Simulator() {
|
||||
// force random values for unitialized signals
|
||||
Verilated::randReset(VERILATOR_RESET_VALUE);
|
||||
|
@ -127,7 +148,7 @@ void Simulator::step() {
|
|||
void Simulator::eval() {
|
||||
vortex_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
if (timestamp >= sim_trace_delay) {
|
||||
if (sim_trace_enabled()) {
|
||||
trace_->dump(timestamp);
|
||||
}
|
||||
#endif
|
||||
|
@ -169,7 +190,14 @@ void Simulator::eval_mem_bus() {
|
|||
if (!mem_rsp_active_) {
|
||||
if (has_response) {
|
||||
vortex_->mem_rsp_valid = 1;
|
||||
std::list<mem_req_t>::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
|
||||
std::list<mem_req_t>::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
memcpy((uint8_t*)vortex_->mem_rsp_data, mem_rsp_it->block.data(), MEM_BLOCK_SIZE);
|
||||
vortex_->mem_rsp_tag = mem_rsp_it->tag;
|
||||
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
|
||||
|
@ -214,20 +242,28 @@ void Simulator::eval_mem_bus() {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[base_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
} else {
|
||||
mem_req_t mem_req;
|
||||
mem_req.tag = vortex_->mem_req_tag;
|
||||
mem_req.addr = vortex_->mem_req_addr;
|
||||
mem_req.addr = (vortex_->mem_req_addr * MEM_BLOCK_SIZE);
|
||||
ram_->read(vortex_->mem_req_addr * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.block.data());
|
||||
mem_req.cycles_left = MEM_LATENCY;
|
||||
for (auto& rsp : mem_rsp_vec_[req_bank]) {
|
||||
if (mem_req.addr == rsp.addr) {
|
||||
// duplicate requests receive the same cycle delay
|
||||
mem_req.cycles_left = rsp.cycles_left;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include <verilated.h>
|
||||
#include "VVortex.h"
|
||||
#include "VVortex__Syms.h"
|
||||
#include "verilated.h"
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_vcd_c.h>
|
||||
|
@ -51,7 +51,7 @@ private:
|
|||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> block;
|
||||
uint32_t addr;
|
||||
uint64_t addr;
|
||||
uint64_t tag;
|
||||
} mem_req_t;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue