mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
adding CSR support to rtlsim driver
This commit is contained in:
parent
dccea80b68
commit
112d8ab815
10 changed files with 167 additions and 42 deletions
|
@ -91,22 +91,22 @@ extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
|
|||
return err;
|
||||
}
|
||||
|
||||
extern int vx_get_perf(vx_device_h device, size_t* cycles, size_t* instrs) {
|
||||
extern int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs) {
|
||||
int ret = 0;
|
||||
|
||||
unsigned value;
|
||||
|
||||
if (cycles) {
|
||||
ret |= vx_csr_get(device, 0, CSR_CYCLE_H, &value);
|
||||
ret |= vx_csr_get(device, core_id, CSR_CYCLE_H, &value);
|
||||
*cycles = value;
|
||||
ret |= vx_csr_get(device, 0, CSR_CYCLE, &value);
|
||||
ret |= vx_csr_get(device, core_id, CSR_CYCLE, &value);
|
||||
*cycles = (*cycles << 32) | value;
|
||||
}
|
||||
|
||||
if (instrs) {
|
||||
ret |= vx_csr_get(device, 0, CSR_INSTRET_H, &value);
|
||||
ret |= vx_csr_get(device, core_id, CSR_INSTRET_H, &value);
|
||||
*instrs = value;
|
||||
ret |= vx_csr_get(device, 0, CSR_INSTRET, &value);
|
||||
ret |= vx_csr_get(device, core_id, CSR_INSTRET, &value);
|
||||
*instrs = (*instrs << 32) | value;
|
||||
}
|
||||
|
||||
|
|
|
@ -58,10 +58,10 @@ int vx_start(vx_device_h hdevice);
|
|||
int vx_ready_wait(vx_device_h hdevice, long long timeout);
|
||||
|
||||
// set device constant registers
|
||||
int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value);
|
||||
int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value);
|
||||
|
||||
// get device constant registers
|
||||
int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* value);
|
||||
int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value);
|
||||
|
||||
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
|
||||
|
||||
|
@ -72,7 +72,7 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size)
|
|||
int vx_upload_kernel_file(vx_device_h device, const char* filename);
|
||||
|
||||
// get performance counters
|
||||
int vx_get_perf(vx_device_h device, size_t* cycles, size_t* instrs);
|
||||
int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -17,6 +17,9 @@ CXXFLAGS +=-fstack-protector
|
|||
# Position independent code
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
# Dump perf stats
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
# Enable scope analyzer
|
||||
#CXXFLAGS += -DSCOPE
|
||||
|
||||
|
|
|
@ -211,14 +211,29 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
vx_scope_stop(device->fpga, 0);
|
||||
#endif
|
||||
|
||||
{
|
||||
// Dump perf stats
|
||||
#ifdef DUMP_PERF_STATS
|
||||
// Dump perf stats
|
||||
if (device->num_cores > 1) {
|
||||
uint64_t total_instrs = 0, total_cycles = 0;
|
||||
for (unsigned core_id = 0; core_id < device->num_cores; ++core_id) {
|
||||
uint64_t instrs, cycles;
|
||||
int ret = vx_get_perf(hdevice, core_id, &instrs, &cycles);
|
||||
assert(ret == 0);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
|
||||
total_instrs += instrs;
|
||||
total_cycles = std::max<uint64_t>(total_cycles, cycles);
|
||||
}
|
||||
float IPC = (float)(double(total_instrs) / double(total_cycles));
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
|
||||
} else {
|
||||
uint64_t instrs, cycles;
|
||||
int ret = vx_get_perf(hdevice, &instrs, &cycles);
|
||||
int ret = vx_get_perf(hdevice, 0, &instrs, &cycles);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
assert(ret == 0);
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
}
|
||||
#endif
|
||||
|
||||
fpgaClose(device->fpga);
|
||||
|
||||
|
@ -480,7 +495,7 @@ extern int vx_start(vx_device_h hdevice) {
|
|||
}
|
||||
|
||||
// set device constant registers
|
||||
extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value) {
|
||||
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
|
@ -491,8 +506,8 @@ extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value
|
|||
return -1;
|
||||
|
||||
// write CSR value
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA, value));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_WRITE));
|
||||
|
||||
|
@ -500,7 +515,7 @@ extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value
|
|||
}
|
||||
|
||||
// get device constant registers
|
||||
extern int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* value) {
|
||||
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) {
|
||||
if (nullptr == hdevice || nullptr == value)
|
||||
return -1;
|
||||
|
||||
|
@ -512,8 +527,8 @@ extern int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* valu
|
|||
|
||||
|
||||
// write CSR value
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_READ));
|
||||
|
||||
// Ensure ready for new command
|
||||
|
|
|
@ -28,6 +28,8 @@ CFLAGS += -fPIC
|
|||
|
||||
CFLAGS += -DUSE_RTLSIM $(CONFIGS)
|
||||
|
||||
CFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
# LDFLAGS += -dynamiclib -pthread
|
||||
|
||||
|
|
|
@ -69,7 +69,28 @@ public:
|
|||
}
|
||||
|
||||
~vx_device() {
|
||||
simulator_.print_stats(std::cout);
|
||||
#ifdef DUMP_PERF_STATS
|
||||
unsigned num_cores;
|
||||
this->get_csr(0, CSR_NC, &num_cores);
|
||||
if (num_cores > 1) {
|
||||
uint64_t total_instrs = 0, total_cycles = 0;
|
||||
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
|
||||
uint64_t instrs, cycles;
|
||||
vx_get_perf(this, core_id, &instrs, &cycles);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
|
||||
total_instrs += instrs;
|
||||
total_cycles = std::max<uint64_t>(total_cycles, cycles);
|
||||
}
|
||||
float IPC = (float)(double(total_instrs) / double(total_cycles));
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
|
||||
} else {
|
||||
uint64_t instrs, cycles;
|
||||
vx_get_perf(this, 0, &instrs, &cycles);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
}
|
||||
#endif
|
||||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
|
@ -152,6 +173,28 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int set_csr(int core_id, int addr, unsigned value) {
|
||||
if (future_.valid()) {
|
||||
future_.wait(); // ensure prior run completed
|
||||
}
|
||||
simulator_.set_csr(core_id, addr, value);
|
||||
while (simulator_.is_busy()) {
|
||||
simulator_.step();
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_csr(int core_id, int addr, unsigned *value) {
|
||||
if (future_.valid()) {
|
||||
future_.wait(); // ensure prior run completed
|
||||
}
|
||||
simulator_.get_csr(core_id, addr, value);
|
||||
while (simulator_.is_busy()) {
|
||||
simulator_.step();
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
size_t mem_allocation_;
|
||||
|
@ -324,10 +367,20 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
|||
return device->wait(timeout);
|
||||
}
|
||||
|
||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) {
|
||||
return -1;
|
||||
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->set_csr(core_id, addr, value);
|
||||
}
|
||||
|
||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) {
|
||||
return -1;
|
||||
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->get_csr(core_id, addr, value);
|
||||
}
|
|
@ -358,10 +358,10 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
|||
return device->wait(timeout);
|
||||
}
|
||||
|
||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) {
|
||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned /*value*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) {
|
||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned* /*value*/) {
|
||||
return -1;
|
||||
}
|
|
@ -48,10 +48,10 @@ extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) {
|
||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned /*value*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) {
|
||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned* /*value*/) {
|
||||
return -1;
|
||||
}
|
|
@ -22,6 +22,7 @@ Simulator::Simulator() {
|
|||
|
||||
dram_rsp_active_ = false;
|
||||
snp_req_active_ = false;
|
||||
csr_req_active_ = false;
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
|
@ -163,15 +164,6 @@ void Simulator::eval_io_bus() {
|
|||
vortex_->io_rsp_valid = 0;
|
||||
}
|
||||
|
||||
void Simulator::eval_csr_bus() {
|
||||
vortex_->csr_io_req_valid = 0;
|
||||
vortex_->csr_io_req_coreid = 0;
|
||||
vortex_->csr_io_req_addr = 0;
|
||||
vortex_->csr_io_req_rw = 0;
|
||||
vortex_->csr_io_req_data = 0;
|
||||
vortex_->csr_io_rsp_ready = 1;
|
||||
}
|
||||
|
||||
void Simulator::eval_snp_bus() {
|
||||
if (snp_req_active_) {
|
||||
if (vortex_->snp_rsp_valid) {
|
||||
|
@ -204,6 +196,27 @@ void Simulator::eval_snp_bus() {
|
|||
}
|
||||
}
|
||||
|
||||
void Simulator::eval_csr_bus() {
|
||||
if (csr_req_active_) {
|
||||
if (vortex_->csr_io_req_rw) {
|
||||
if (vortex_->csr_io_req_ready) {
|
||||
vortex_->snp_req_valid = 0;
|
||||
csr_req_active_ = false;
|
||||
}
|
||||
} else {
|
||||
if (vortex_->csr_io_rsp_valid) {
|
||||
*csr_rsp_value_ = vortex_->csr_io_rsp_data;
|
||||
vortex_->snp_req_valid = 0;
|
||||
vortex_->csr_io_rsp_ready = 0;
|
||||
csr_req_active_ = false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
vortex_->csr_io_req_valid = 0;
|
||||
vortex_->csr_io_rsp_ready = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void Simulator::wait(uint32_t cycles) {
|
||||
for (int i = 0; i < cycles; ++i) {
|
||||
this->step();
|
||||
|
@ -211,7 +224,9 @@ void Simulator::wait(uint32_t cycles) {
|
|||
}
|
||||
|
||||
bool Simulator::is_busy() const {
|
||||
return vortex_->busy || snp_req_active_;
|
||||
return vortex_->busy
|
||||
|| snp_req_active_
|
||||
|| csr_req_active_;
|
||||
}
|
||||
|
||||
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
||||
|
@ -221,22 +236,52 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
|||
if (0 == size)
|
||||
return;
|
||||
|
||||
snp_req_active_ = true;
|
||||
snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
|
||||
|
||||
vortex_->snp_req_addr = mem_addr / GLOBAL_BLOCK_SIZE;
|
||||
vortex_->snp_req_tag = 0;
|
||||
vortex_->snp_req_valid = 1;
|
||||
vortex_->snp_rsp_ready = 1;
|
||||
|
||||
snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
|
||||
--snp_req_size_;
|
||||
pending_snp_reqs_ = 1;
|
||||
|
||||
snp_req_active_ = true;
|
||||
|
||||
#ifdef DBG_PRINT_CACHE_SNP
|
||||
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Simulator::set_csr(int core_id, int addr, unsigned value) {
|
||||
#ifndef NDEBUG
|
||||
std::cout << timestamp << ": [sim] set_csr()" << std::endl;
|
||||
#endif
|
||||
|
||||
vortex_->csr_io_req_valid = 1;
|
||||
vortex_->csr_io_req_coreid = core_id;
|
||||
vortex_->csr_io_req_addr = addr;
|
||||
vortex_->csr_io_req_rw = 1;
|
||||
vortex_->csr_io_req_data = value;
|
||||
vortex_->csr_io_rsp_ready = 0;
|
||||
|
||||
csr_req_active_ = true;
|
||||
}
|
||||
|
||||
void Simulator::get_csr(int core_id, int addr, unsigned *value) {
|
||||
#ifndef NDEBUG
|
||||
std::cout << timestamp << ": [sim] get_csr()" << std::endl;
|
||||
#endif
|
||||
|
||||
vortex_->csr_io_req_valid = 1;
|
||||
vortex_->csr_io_req_coreid = core_id;
|
||||
vortex_->csr_io_req_addr = addr;
|
||||
vortex_->csr_io_req_rw = 0;
|
||||
vortex_->csr_io_rsp_ready = 1;
|
||||
|
||||
csr_rsp_value_ = value;
|
||||
csr_req_active_ = true;
|
||||
}
|
||||
|
||||
void Simulator::run() {
|
||||
#ifndef NDEBUG
|
||||
std::cout << timestamp << ": [sim] run()" << std::endl;
|
||||
|
|
|
@ -31,6 +31,8 @@ public:
|
|||
Simulator();
|
||||
virtual ~Simulator();
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
void load_bin(const char* program_file);
|
||||
void load_ihex(const char* program_file);
|
||||
|
||||
|
@ -39,12 +41,14 @@ public:
|
|||
void reset();
|
||||
void step();
|
||||
void wait(uint32_t cycles);
|
||||
|
||||
void flush_caches(uint32_t mem_addr, uint32_t size);
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
void set_csr(int core_id, int addr, unsigned value);
|
||||
void get_csr(int core_id, int addr, unsigned *value);
|
||||
|
||||
void run();
|
||||
int get_last_wb_value(int reg) const;
|
||||
|
||||
void print_stats(std::ostream& out);
|
||||
|
||||
private:
|
||||
|
@ -60,8 +64,11 @@ private:
|
|||
int dram_rsp_active_;
|
||||
|
||||
bool snp_req_active_;
|
||||
bool csr_req_active_;
|
||||
|
||||
uint32_t snp_req_size_;
|
||||
uint32_t pending_snp_reqs_;
|
||||
uint32_t* csr_rsp_value_;
|
||||
|
||||
RAM *ram_;
|
||||
VVortex *vortex_;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue