mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 13:57:17 -04:00
simx perf counters updates
This commit is contained in:
parent
a711f0b6cd
commit
9143c19be3
14 changed files with 117 additions and 27 deletions
|
@ -147,18 +147,20 @@
|
|||
`define CSR_MPM_TEX_READS_H 12'hB83
|
||||
`define CSR_MPM_TEX_LAT 12'hB04 // texture latency
|
||||
`define CSR_MPM_TEX_LAT_H 12'hB84
|
||||
`define CSR_MPM_TEX_STALL 12'hB05 // texture latency
|
||||
`define CSR_MPM_TEX_STALL_H 12'hB85
|
||||
// PERF: texture cache
|
||||
`define CSR_MPM_TCACHE_READS 12'hB05 // total reads
|
||||
`define CSR_MPM_TCACHE_READS_H 12'hB85
|
||||
`define CSR_MPM_TCACHE_MISS_R 12'hB06 // read misses
|
||||
`define CSR_MPM_TCACHE_MISS_R_H 12'hB86
|
||||
`define CSR_MPM_TCACHE_BANK_ST 12'hB07 // bank stalls
|
||||
`define CSR_MPM_TCACHE_BANK_ST_H 12'hB87
|
||||
`define CSR_MPM_TCACHE_MSHR_ST 12'hB08 // MSHR stalls
|
||||
`define CSR_MPM_TCACHE_MSHR_ST_H 12'hB88
|
||||
`define CSR_MPM_TCACHE_READS 12'hB06 // total reads
|
||||
`define CSR_MPM_TCACHE_READS_H 12'hB86
|
||||
`define CSR_MPM_TCACHE_MISS_R 12'hB07 // read misses
|
||||
`define CSR_MPM_TCACHE_MISS_R_H 12'hB87
|
||||
`define CSR_MPM_TCACHE_BANK_ST 12'hB08 // bank stalls
|
||||
`define CSR_MPM_TCACHE_BANK_ST_H 12'hB88
|
||||
`define CSR_MPM_TCACHE_MSHR_ST 12'hB09 // MSHR stalls
|
||||
`define CSR_MPM_TCACHE_MSHR_ST_H 12'hB89
|
||||
// PERF: pipeline
|
||||
`define CSR_MPM_TEX_ISSUE_ST 12'hB09 // issue stalls
|
||||
`define CSR_MPM_TEX_ISSUE_ST_H 12'hB89
|
||||
`define CSR_MPM_TEX_ISSUE_ST 12'hB0A // issue stalls
|
||||
`define CSR_MPM_TEX_ISSUE_ST_H 12'hB8A
|
||||
|
||||
// Machine Performance-monitoring raster counters
|
||||
// PERF: raster unit
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
RTL_DIR = ../../../../../rtl
|
||||
AFU_DIR = ../../../../../afu/opae
|
||||
THIRD_PARTY_DIR = ../../../../../../third_party
|
||||
|
||||
ifeq ($(DEVICE_FAMILY), stratix10)
|
||||
|
|
|
@ -32,6 +32,7 @@ set_global_assignment -name DEVICE $opts(device)
|
|||
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
|
||||
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
|
||||
|
||||
#set_global_assignment -name OPTIMIZATION_TECHNIQUE AREA
|
||||
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
||||
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
|
||||
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
|
||||
|
|
|
@ -541,7 +541,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
int tex_stall_cycles_ratio = (int)(100 * double(tex_stall_cycles) / cycles);
|
||||
fprintf(stream, "PERF: tex memory reads=%ld\n", tex_mem_reads);
|
||||
fprintf(stream, "PERF: tex memory latency=%d cycles\n", tex_avg_lat);
|
||||
fprintf(stream, "PERF: raster stall cycles=%ld cycles (%d%%)\n", tex_stall_cycles, tex_stall_cycles_ratio);
|
||||
fprintf(stream, "PERF: tex stall cycles=%ld cycles (%d%%)\n", tex_stall_cycles, tex_stall_cycles_ratio);
|
||||
fprintf(stream, "PERF: tex issue stalls=%ld\n", tex_issue_stalls);
|
||||
int tcache_read_hit_ratio = (int)((1.0 - (double(tcache_read_misses) / double(tcache_reads))) * 100);
|
||||
int tcache_bank_utilization = (int)((double(tcache_reads) / double(tcache_reads + tcache_bank_stalls)) * 100);
|
||||
|
|
|
@ -265,7 +265,7 @@ private:
|
|||
PerfStats perf_stats_;
|
||||
uint64_t pending_read_reqs_;
|
||||
uint64_t pending_write_reqs_;
|
||||
uint64_t pending_fill_reqs_;
|
||||
uint64_t pending_fill_reqs_;
|
||||
|
||||
public:
|
||||
Impl(CacheSim* simobject, const Config& config)
|
||||
|
|
|
@ -280,6 +280,7 @@ Cluster::PerfStats Cluster::perf_stats() const {
|
|||
perf.tcache = tcaches_->perf_stats();
|
||||
perf.ocache = ocaches_->perf_stats();
|
||||
perf.rcache = rcaches_->perf_stats();
|
||||
perf.l2cache = l2cache_->perf_stats();
|
||||
|
||||
for (auto sharedmem : sharedmems_) {
|
||||
perf.sharedmem += sharedmem->perf_stats();
|
||||
|
|
|
@ -39,6 +39,7 @@ public:
|
|||
CacheSim::PerfStats icache;
|
||||
CacheSim::PerfStats dcache;
|
||||
SharedMem::PerfStats sharedmem;
|
||||
CacheSim::PerfStats l2cache;
|
||||
CacheSim::PerfStats tcache;
|
||||
CacheSim::PerfStats ocache;
|
||||
CacheSim::PerfStats rcache;
|
||||
|
@ -50,6 +51,7 @@ public:
|
|||
this->icache += rhs.icache;
|
||||
this->dcache += rhs.dcache;
|
||||
this->sharedmem += rhs.sharedmem;
|
||||
this->l2cache += rhs.l2cache;
|
||||
this->tcache += rhs.tcache;
|
||||
this->ocache += rhs.ocache;
|
||||
this->rcache += rhs.rcache;
|
||||
|
|
|
@ -102,6 +102,7 @@ void Core::reset() {
|
|||
ecall_ = false;
|
||||
ebreak_ = false;
|
||||
perf_stats_ = PerfStats();
|
||||
pending_ifetches_ = 0;
|
||||
}
|
||||
|
||||
void Core::attach_ram(RAM* ram) {
|
||||
|
@ -159,6 +160,8 @@ void Core::schedule() {
|
|||
}
|
||||
|
||||
void Core::fetch() {
|
||||
perf_stats_.ifetch_latency += pending_ifetches_;
|
||||
|
||||
// handle icache reponse
|
||||
auto& icache_rsp_port = icache_rsp_ports.at(0);
|
||||
if (!icache_rsp_port.empty()){
|
||||
|
@ -168,6 +171,7 @@ void Core::fetch() {
|
|||
DT(3, "icache-rsp: addr=" << std::hex << trace->PC << ", tag=" << mem_rsp.tag << ", " << *trace);
|
||||
pending_icache_.release(mem_rsp.tag);
|
||||
icache_rsp_port.pop();
|
||||
--pending_ifetches_;
|
||||
}
|
||||
|
||||
// send icache request
|
||||
|
@ -180,9 +184,11 @@ void Core::fetch() {
|
|||
mem_req.cid = trace->cid;
|
||||
mem_req.uuid = trace->uuid;
|
||||
icache_req_ports.at(0).send(mem_req, 1);
|
||||
DT(3, "icache-req: addr=" << std::hex << mem_req.addr << ", tag=" << mem_req.tag << ", " << *trace);
|
||||
fetch_latch_.pop();
|
||||
}
|
||||
DT(3, "icache-req: addr=" << std::hex << mem_req.addr << ", tag=" << mem_req.tag << ", " << *trace);
|
||||
fetch_latch_.pop();
|
||||
++pending_ifetches_;
|
||||
++perf_stats_.ifetches;
|
||||
}
|
||||
}
|
||||
|
||||
void Core::decode() {
|
||||
|
@ -214,8 +220,6 @@ void Core::decode() {
|
|||
perf_stats_.loads += active_threads;
|
||||
if (trace->exe_type == ExeType::LSU && trace->lsu_type == LsuType::STORE)
|
||||
perf_stats_.stores += active_threads;
|
||||
if (trace->exe_type == ExeType::ALU && trace->alu_type == AluType::BRANCH)
|
||||
perf_stats_.branches += active_threads;
|
||||
|
||||
DT(3, "pipeline-decode: " << *trace);
|
||||
|
||||
|
@ -483,13 +487,20 @@ uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
|||
case CSR_MPM_GPU_ST: return perf_stats_.gpu_stalls & 0xffffffff;
|
||||
case CSR_MPM_GPU_ST_H: return perf_stats_.gpu_stalls >> 32;
|
||||
|
||||
case CSR_MPM_IFETCHES: return perf_stats_.ifetches & 0xffffffff;
|
||||
case CSR_MPM_IFETCHES_H: return perf_stats_.ifetches >> 32;
|
||||
case CSR_MPM_LOADS: return perf_stats_.loads & 0xffffffff;
|
||||
case CSR_MPM_LOADS_H: return perf_stats_.loads >> 32;
|
||||
case CSR_MPM_STORES: return perf_stats_.stores & 0xffffffff;
|
||||
case CSR_MPM_STORES_H: return perf_stats_.stores >> 32;
|
||||
case CSR_MPM_BRANCHES: return perf_stats_.branches & 0xffffffff;
|
||||
case CSR_MPM_BRANCHES_H:return perf_stats_.branches >> 32;
|
||||
|
||||
case CSR_MPM_IFETCH_LAT: return perf_stats_.ifetch_latency & 0xffffffff;
|
||||
case CSR_MPM_IFETCH_LAT_H: return perf_stats_.ifetch_latency >> 32;
|
||||
case CSR_MPM_LOAD_LAT: return perf_stats_.load_latency & 0xffffffff;
|
||||
case CSR_MPM_LOAD_LAT_H: return perf_stats_.load_latency >> 32;
|
||||
}
|
||||
} break;
|
||||
case DCR_MPM_CLASS_MEM: {
|
||||
switch (addr) {
|
||||
case CSR_MPM_ICACHE_READS: return proc_perf.clusters.icache.reads & 0xffffffff;
|
||||
case CSR_MPM_ICACHE_READS_H: return proc_perf.clusters.icache.reads >> 32;
|
||||
case CSR_MPM_ICACHE_MISS_R: return proc_perf.clusters.icache.read_misses & 0xffffffff;
|
||||
|
@ -515,6 +526,32 @@ uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
|||
case CSR_MPM_SMEM_BANK_ST: return proc_perf.clusters.sharedmem.bank_stalls & 0xffffffff;
|
||||
case CSR_MPM_SMEM_BANK_ST_H:return proc_perf.clusters.sharedmem.bank_stalls >> 32;
|
||||
|
||||
case CSR_MPM_L2CACHE_READS: return proc_perf.clusters.l2cache.reads & 0xffffffff;
|
||||
case CSR_MPM_L2CACHE_READS_H: return proc_perf.clusters.l2cache.reads >> 32;
|
||||
case CSR_MPM_L2CACHE_WRITES: return proc_perf.clusters.l2cache.writes & 0xffffffff;
|
||||
case CSR_MPM_L2CACHE_WRITES_H: return proc_perf.clusters.l2cache.writes >> 32;
|
||||
case CSR_MPM_L2CACHE_MISS_R: return proc_perf.clusters.l2cache.read_misses & 0xffffffff;
|
||||
case CSR_MPM_L2CACHE_MISS_R_H: return proc_perf.clusters.l2cache.read_misses >> 32;
|
||||
case CSR_MPM_L2CACHE_MISS_W: return proc_perf.clusters.l2cache.write_misses & 0xffffffff;
|
||||
case CSR_MPM_L2CACHE_MISS_W_H: return proc_perf.clusters.l2cache.write_misses >> 32;
|
||||
case CSR_MPM_L2CACHE_BANK_ST: return proc_perf.clusters.l2cache.bank_stalls & 0xffffffff;
|
||||
case CSR_MPM_L2CACHE_BANK_ST_H:return proc_perf.clusters.l2cache.bank_stalls >> 32;
|
||||
case CSR_MPM_L2CACHE_MSHR_ST: return proc_perf.clusters.l2cache.mshr_stalls & 0xffffffff;
|
||||
case CSR_MPM_L2CACHE_MSHR_ST_H:return proc_perf.clusters.l2cache.mshr_stalls >> 32;
|
||||
|
||||
case CSR_MPM_L3CACHE_READS: return proc_perf.l3cache.reads & 0xffffffff;
|
||||
case CSR_MPM_L3CACHE_READS_H: return proc_perf.l3cache.reads >> 32;
|
||||
case CSR_MPM_L3CACHE_WRITES: return proc_perf.l3cache.writes & 0xffffffff;
|
||||
case CSR_MPM_L3CACHE_WRITES_H: return proc_perf.l3cache.writes >> 32;
|
||||
case CSR_MPM_L3CACHE_MISS_R: return proc_perf.l3cache.read_misses & 0xffffffff;
|
||||
case CSR_MPM_L3CACHE_MISS_R_H: return proc_perf.l3cache.read_misses >> 32;
|
||||
case CSR_MPM_L3CACHE_MISS_W: return proc_perf.l3cache.write_misses & 0xffffffff;
|
||||
case CSR_MPM_L3CACHE_MISS_W_H: return proc_perf.l3cache.write_misses >> 32;
|
||||
case CSR_MPM_L3CACHE_BANK_ST: return proc_perf.l3cache.bank_stalls & 0xffffffff;
|
||||
case CSR_MPM_L3CACHE_BANK_ST_H:return proc_perf.l3cache.bank_stalls >> 32;
|
||||
case CSR_MPM_L3CACHE_MSHR_ST: return proc_perf.l3cache.mshr_stalls & 0xffffffff;
|
||||
case CSR_MPM_L3CACHE_MSHR_ST_H:return proc_perf.l3cache.mshr_stalls >> 32;
|
||||
|
||||
case CSR_MPM_MEM_READS: return proc_perf.mem_reads & 0xffffffff;
|
||||
case CSR_MPM_MEM_READS_H: return proc_perf.mem_reads >> 32;
|
||||
case CSR_MPM_MEM_WRITES: return proc_perf.mem_writes & 0xffffffff;
|
||||
|
@ -529,6 +566,8 @@ uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
|||
case CSR_MPM_TEX_READS_H: return proc_perf.clusters.tex_unit.reads >> 32;
|
||||
case CSR_MPM_TEX_LAT: return proc_perf.clusters.tex_unit.latency & 0xffffffff;
|
||||
case CSR_MPM_TEX_LAT_H: return proc_perf.clusters.tex_unit.latency >> 32;
|
||||
case CSR_MPM_TEX_STALL: return proc_perf.clusters.tex_unit.stalls & 0xffffffff;
|
||||
case CSR_MPM_TEX_STALL_H: return proc_perf.clusters.tex_unit.stalls >> 32;
|
||||
|
||||
case CSR_MPM_TCACHE_READS: return proc_perf.clusters.tcache.reads & 0xffffffff;
|
||||
case CSR_MPM_TCACHE_READS_H: return proc_perf.clusters.tcache.reads >> 32;
|
||||
|
@ -538,6 +577,9 @@ uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
|||
case CSR_MPM_TCACHE_BANK_ST_H:return proc_perf.clusters.tcache.bank_stalls >> 32;
|
||||
case CSR_MPM_TCACHE_MSHR_ST: return proc_perf.clusters.tcache.mshr_stalls & 0xffffffff;
|
||||
case CSR_MPM_TCACHE_MSHR_ST_H:return proc_perf.clusters.tcache.mshr_stalls >> 32;
|
||||
|
||||
case CSR_MPM_TEX_ISSUE_ST: return perf_stats_.tex_issue_stalls & 0xffffffff;
|
||||
case CSR_MPM_TEX_ISSUE_ST_H: return perf_stats_.tex_issue_stalls >> 32;
|
||||
}
|
||||
} break;
|
||||
case DCR_MPM_CLASS_RASTER: {
|
||||
|
@ -557,6 +599,9 @@ uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
|||
case CSR_MPM_RCACHE_BANK_ST_H:return proc_perf.clusters.rcache.bank_stalls >> 32;
|
||||
case CSR_MPM_RCACHE_MSHR_ST: return proc_perf.clusters.rcache.mshr_stalls & 0xffffffff;
|
||||
case CSR_MPM_RCACHE_MSHR_ST_H:return proc_perf.clusters.rcache.mshr_stalls >> 32;
|
||||
|
||||
case CSR_MPM_RASTER_ISSUE_ST: return perf_stats_.raster_issue_stalls & 0xffffffff;
|
||||
case CSR_MPM_RASTER_ISSUE_ST_H: return perf_stats_.raster_issue_stalls >> 32;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
@ -584,6 +629,9 @@ uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
|||
case CSR_MPM_OCACHE_BANK_ST_H:return proc_perf.clusters.ocache.bank_stalls >> 32;
|
||||
case CSR_MPM_OCACHE_MSHR_ST: return proc_perf.clusters.ocache.mshr_stalls & 0xffffffff;
|
||||
case CSR_MPM_OCACHE_MSHR_ST_H:return proc_perf.clusters.ocache.mshr_stalls >> 32;
|
||||
|
||||
case CSR_MPM_ROP_ISSUE_ST: return perf_stats_.rop_issue_stalls & 0xffffffff;
|
||||
case CSR_MPM_ROP_ISSUE_ST_H: return perf_stats_.rop_issue_stalls >> 32;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -41,9 +41,14 @@ public:
|
|||
uint64_t csr_stalls;
|
||||
uint64_t fpu_stalls;
|
||||
uint64_t gpu_stalls;
|
||||
uint64_t tex_issue_stalls;
|
||||
uint64_t rop_issue_stalls;
|
||||
uint64_t raster_issue_stalls;
|
||||
uint64_t ifetches;
|
||||
uint64_t loads;
|
||||
uint64_t stores;
|
||||
uint64_t branches;
|
||||
uint64_t ifetch_latency;
|
||||
uint64_t load_latency;
|
||||
|
||||
PerfStats()
|
||||
: instrs(0)
|
||||
|
@ -54,9 +59,14 @@ public:
|
|||
, csr_stalls(0)
|
||||
, fpu_stalls(0)
|
||||
, gpu_stalls(0)
|
||||
, tex_issue_stalls(0)
|
||||
, rop_issue_stalls(0)
|
||||
, raster_issue_stalls(0)
|
||||
, ifetches(0)
|
||||
, loads(0)
|
||||
, stores(0)
|
||||
, branches(0)
|
||||
, ifetch_latency(0)
|
||||
, load_latency(0)
|
||||
{}
|
||||
};
|
||||
|
||||
|
@ -165,6 +175,8 @@ private:
|
|||
bool ecall_;
|
||||
bool ebreak_;
|
||||
|
||||
uint64_t pending_ifetches_;
|
||||
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
||||
PerfStats perf_stats_;
|
||||
|
|
|
@ -27,15 +27,19 @@ LsuUnit::LsuUnit(const SimContext& ctx, Core* core)
|
|||
: ExeUnit(ctx, core, "LSU")
|
||||
, pending_rd_reqs_(LSUQ_SIZE)
|
||||
, num_threads_(core->arch().num_threads())
|
||||
, pending_loads_(0)
|
||||
, fence_lock_(false)
|
||||
{}
|
||||
|
||||
void LsuUnit::reset() {
|
||||
pending_rd_reqs_.clear();
|
||||
pending_loads_ = 0;
|
||||
fence_lock_ = false;
|
||||
}
|
||||
|
||||
void LsuUnit::tick() {
|
||||
core_->perf_stats_.load_latency += pending_loads_;
|
||||
|
||||
// handle dcache response
|
||||
for (uint32_t t = 0; t < num_threads_; ++t) {
|
||||
auto& dcache_rsp_port = core_->dcache_rsp_ports.at(t);
|
||||
|
@ -52,7 +56,8 @@ void LsuUnit::tick() {
|
|||
Output.send(trace, 1);
|
||||
pending_rd_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
dcache_rsp_port.pop();
|
||||
dcache_rsp_port.pop();
|
||||
--pending_loads_;
|
||||
}
|
||||
|
||||
// handle shared memory response
|
||||
|
@ -72,6 +77,7 @@ void LsuUnit::tick() {
|
|||
pending_rd_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
smem_rsp_port.pop();
|
||||
--pending_loads_;
|
||||
}
|
||||
|
||||
if (fence_lock_) {
|
||||
|
@ -81,7 +87,7 @@ void LsuUnit::tick() {
|
|||
Output.send(fence_state_, 1);
|
||||
fence_lock_ = false;
|
||||
DT(3, "fence-unlock: " << fence_state_);
|
||||
}
|
||||
}
|
||||
|
||||
// check input queue
|
||||
if (Input.empty())
|
||||
|
@ -156,6 +162,9 @@ void LsuUnit::tick() {
|
|||
dcache_req_port.send(mem_req, 2);
|
||||
DT(3, "dcache-req: addr=" << std::hex << mem_req.addr << ", tag=" << tag
|
||||
<< ", lsu_type=" << trace->lsu_type << ", tid=" << t << ", addr_type=" << mem_req.addr_type << ", " << *trace);
|
||||
|
||||
++pending_loads_;
|
||||
++core_->perf_stats_.loads;
|
||||
|
||||
if (is_dup)
|
||||
break;
|
||||
|
@ -165,6 +174,8 @@ void LsuUnit::tick() {
|
|||
if (is_write) {
|
||||
pending_rd_reqs_.release(tag);
|
||||
Output.send(trace, 1);
|
||||
|
||||
++core_->perf_stats_.stores;
|
||||
}
|
||||
|
||||
// remove input
|
||||
|
@ -279,7 +290,9 @@ void GpuUnit::tick() {
|
|||
|
||||
auto trace = Input.front();
|
||||
|
||||
switch (trace->gpu_type) {
|
||||
auto gpu_type = trace->gpu_type;
|
||||
|
||||
switch (gpu_type) {
|
||||
case GpuType::TMC: {
|
||||
Output.send(trace, 1);
|
||||
auto trace_data = std::dynamic_pointer_cast<GPUTraceData>(trace->data);
|
||||
|
@ -325,6 +338,12 @@ void GpuUnit::tick() {
|
|||
if (trace->fetch_stall) {
|
||||
core_->stalled_warps_.reset(trace->wid);
|
||||
}
|
||||
|
||||
auto time = Input.pop();
|
||||
core_->perf_stats_.gpu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
auto stalls = (SimPlatform::instance().cycles() - time);
|
||||
|
||||
if (gpu_type == GpuType::TEX) core_->perf_stats_.tex_issue_stalls += stalls;
|
||||
if (gpu_type == GpuType::ROP) core_->perf_stats_.rop_issue_stalls += stalls;
|
||||
if (gpu_type == GpuType::RASTER) core_->perf_stats_.raster_issue_stalls += stalls;
|
||||
core_->perf_stats_.gpu_stalls += stalls;
|
||||
}
|
|
@ -53,6 +53,7 @@ private:
|
|||
HashTable<pending_req_t> pending_rd_reqs_;
|
||||
uint32_t num_threads_;
|
||||
pipeline_trace_t* fence_state_;
|
||||
uint64_t pending_loads_;
|
||||
bool fence_lock_;
|
||||
|
||||
public:
|
||||
|
|
|
@ -100,6 +100,7 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const {
|
|||
perf.mem_reads = perf_mem_reads_;
|
||||
perf.mem_writes = perf_mem_writes_;
|
||||
perf.mem_latency = perf_mem_pending_reads_;
|
||||
perf.l3cache = l3cache_->perf_stats();
|
||||
for (auto cluster : clusters_) {
|
||||
perf.clusters += cluster->perf_stats();
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ public:
|
|||
uint64_t mem_reads;
|
||||
uint64_t mem_writes;
|
||||
uint64_t mem_latency;
|
||||
CacheSim::PerfStats l3cache;
|
||||
Cluster::PerfStats clusters;
|
||||
|
||||
PerfStats()
|
||||
|
|
|
@ -566,7 +566,8 @@ public:
|
|||
// check input trace
|
||||
if (simobject_->Input.empty())
|
||||
return;
|
||||
perf_stats_.stalls += simobject_->Input.stalled();
|
||||
|
||||
perf_stats_.stalls += simobject_->Input.stalled();
|
||||
auto trace = simobject_->Input.front();
|
||||
auto data = std::dynamic_pointer_cast<RopUnit::TraceData>(trace->data);
|
||||
data->cid = trace->cid;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue