mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 13:57:17 -04:00
bug fixes
This commit is contained in:
parent
704f525fd6
commit
53900bee4f
7 changed files with 24 additions and 37 deletions
|
@ -166,10 +166,8 @@
|
|||
`define VX_CSR_MPM_MEM_WRITES_H 12'hB99
|
||||
`define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency
|
||||
`define VX_CSR_MPM_MEM_LT_H 12'hB9A
|
||||
`define VX_CSR_MPM_MEM_BANK_CNTR 12'hB1E // memory bank requests
|
||||
`define VX_CSR_MPM_MEM_BANK_CNTR_H 12'hB9E
|
||||
`define VX_CSR_MPM_MEM_BANK_TICK 12'hB1F // memory ticks
|
||||
`define VX_CSR_MPM_MEM_BANK_TICK_H 12'hB9F
|
||||
`define VX_CSR_MPM_MEM_BANK_ST 12'hB1E // bank conflicts
|
||||
`define VX_CSR_MPM_MEM_BANK_ST_H 12'hB9E
|
||||
// PERF: lmem
|
||||
`define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads
|
||||
`define VX_CSR_MPM_LMEM_READS_H 12'hB9B
|
||||
|
@ -178,8 +176,8 @@
|
|||
`define VX_CSR_MPM_LMEM_BANK_ST 12'hB1D // bank conflicts
|
||||
`define VX_CSR_MPM_LMEM_BANK_ST_H 12'hB9D
|
||||
// PERF: coalescer
|
||||
`define VX_CSR_MPM_COALESCE_MISS 12'hB1E // coalescer misses
|
||||
`define VX_CSR_MPM_COALESCE_MISS_H 12'hB9E
|
||||
`define VX_CSR_MPM_COALESCER_MISS 12'hB1F // coalescer misses
|
||||
`define VX_CSR_MPM_COALESCER_MISS_H 12'hB9F
|
||||
|
||||
// Machine Performance-monitoring memory counters (class 3) ///////////////////
|
||||
// <Add your own counters: use addresses hB03..B1F, hB83..hB9F>
|
||||
|
|
|
@ -272,7 +272,7 @@ import VX_fpu_pkg::*;
|
|||
`CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_w, sysmem_perf.mem.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_w, sysmem_perf.mem.latency);
|
||||
// PERF: coalescer
|
||||
`CSR_READ_64(`VX_CSR_MPM_COALESCE_MISS, read_data_ro_w, sysmem_perf.coalescer.misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_COALESCER_MISS, read_data_ro_w, sysmem_perf.coalescer.misses);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -211,8 +211,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
uint64_t mem_reads = 0;
|
||||
uint64_t mem_writes = 0;
|
||||
uint64_t mem_lat = 0;
|
||||
uint64_t mem_req_counter = 0;
|
||||
uint64_t mem_ticks = 0;
|
||||
uint64_t mem_bank_stalls = 0;
|
||||
|
||||
uint64_t num_cores;
|
||||
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), {
|
||||
|
@ -480,7 +479,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
|
||||
// PERF: coalescer
|
||||
uint64_t coalescer_misses;
|
||||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_COALESCER_ST, core_id, &coalescer_misses), {
|
||||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_COALESCER_MISS, core_id, &coalescer_misses), {
|
||||
return err;
|
||||
});
|
||||
int coalescer_utilization = calcAvgPercent(dcache_requests_per_core - coalescer_misses, dcache_requests_per_core);
|
||||
|
@ -551,10 +550,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), {
|
||||
return err;
|
||||
});
|
||||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_CNTR, core_id, &mem_req_counter), {
|
||||
return err;
|
||||
});
|
||||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_TICK, core_id, &mem_ticks), {
|
||||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_ST, core_id, &mem_bank_stalls), {
|
||||
return err;
|
||||
});
|
||||
}
|
||||
|
@ -632,11 +628,14 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
fprintf(stream, "PERF: l3cache mshr stalls=%ld (utilization=%d%%)\n", l3cache_mshr_stalls, mshr_utilization);
|
||||
}
|
||||
|
||||
int mem_avg_lat = caclAverage(mem_lat, mem_reads);
|
||||
int memory_bank_port_utilization = calcAvgPercent(mem_req_counter, (mem_ticks * num_mem_bank_ports));
|
||||
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
|
||||
fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat);
|
||||
fprintf(stream, "PERF: memory bank port utilization=%d%%\n", memory_bank_port_utilization);
|
||||
{
|
||||
uint64_t mem_requests = mem_reads + mem_writes;
|
||||
int mem_avg_lat = caclAverage(mem_lat, mem_reads);
|
||||
int mem_bank_utilization = calcAvgPercent(mem_requests, mem_requests + mem_bank_stalls);
|
||||
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", mem_requests, mem_reads, mem_writes);
|
||||
fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat);
|
||||
fprintf(stream, "PERF: memory bank stalls=%ld (utilization=%d%%)\n", mem_bank_stalls, mem_bank_utilization);
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
|
|
|
@ -599,8 +599,9 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
|||
CSR_READ_64(VX_CSR_MPM_MEM_READS, proc_perf.mem_reads);
|
||||
CSR_READ_64(VX_CSR_MPM_MEM_WRITES, proc_perf.mem_writes);
|
||||
CSR_READ_64(VX_CSR_MPM_MEM_LT, proc_perf.mem_latency);
|
||||
CSR_READ_64(VX_CSR_MPM_MEM_BANK_CNTR, proc_perf.memsim.counter);
|
||||
CSR_READ_64(VX_CSR_MPM_MEM_BANK_TICK, proc_perf.memsim.ticks);
|
||||
CSR_READ_64(VX_CSR_MPM_MEM_BANK_ST, proc_perf.memsim.bank_stalls);
|
||||
|
||||
CSR_READ_64(VX_CSR_MPM_COALESCER_MISS, coalescer_misses);
|
||||
|
||||
CSR_READ_64(VX_CSR_MPM_LMEM_READS, lmem_perf.reads);
|
||||
CSR_READ_64(VX_CSR_MPM_LMEM_WRITES, lmem_perf.writes);
|
||||
|
|
|
@ -29,7 +29,7 @@ private:
|
|||
Config config_;
|
||||
MemCrossBar::Ptr mem_xbar_;
|
||||
DramSim dram_sim_;
|
||||
PerfStats perf_stats_;
|
||||
mutable PerfStats perf_stats_;
|
||||
|
||||
struct DramCallbackArgs {
|
||||
MemSim::Impl* memsim;
|
||||
|
@ -57,6 +57,7 @@ public:
|
|||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
perf_stats_.bank_stalls = mem_xbar_->req_collisions();
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
|
@ -66,7 +67,6 @@ public:
|
|||
|
||||
void tick() {
|
||||
dram_sim_.tick();
|
||||
uint32_t counter = 0;
|
||||
|
||||
for (uint32_t i = 0; i < config_.num_banks; ++i) {
|
||||
if (mem_xbar_->ReqOut.at(i).empty())
|
||||
|
@ -102,12 +102,6 @@ public:
|
|||
DT(3, simobject_->name() << "-mem-req[" << i << "]: " << mem_req);
|
||||
|
||||
mem_xbar_->ReqOut.at(i).pop();
|
||||
counter++;
|
||||
}
|
||||
|
||||
perf_stats_.counter += counter;
|
||||
if (counter > 0) {
|
||||
++perf_stats_.ticks;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -26,17 +26,14 @@ public:
|
|||
};
|
||||
|
||||
struct PerfStats {
|
||||
uint64_t counter;
|
||||
uint64_t ticks;
|
||||
uint64_t bank_stalls;
|
||||
|
||||
PerfStats()
|
||||
: counter(0)
|
||||
, ticks(0)
|
||||
: bank_stalls(0)
|
||||
{}
|
||||
|
||||
PerfStats& operator+=(const PerfStats& rhs) {
|
||||
this->counter += rhs.counter;
|
||||
this->ticks += rhs.ticks;
|
||||
this->bank_stalls += rhs.bank_stalls;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -850,9 +850,7 @@ public:
|
|||
if (output_idx != -1) {
|
||||
auto& rsp_out = RspOut.at(output_idx);
|
||||
auto& rsp = rsp_out.front();
|
||||
uint32_t input_idx = 0;
|
||||
if (lg2_inputs_ != 0) {
|
||||
input_idx = rsp.tag & (R-1);
|
||||
rsp.tag >>= lg2_inputs_;
|
||||
}
|
||||
DT(4, this->name() << "-rsp" << i << ": " << rsp);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue