mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
hbm for vortex 2.2
This commit is contained in:
parent
c94c3651ec
commit
de81baaabf
16 changed files with 180 additions and 78 deletions
|
@ -617,7 +617,7 @@
|
|||
|
||||
// Number of Banks
|
||||
`ifndef L3_NUM_BANKS
|
||||
`define L3_NUM_BANKS `MIN(4, `NUM_CLUSTERS)
|
||||
`define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS)
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
|
|
|
@ -173,6 +173,11 @@
|
|||
`define VX_CSR_MPM_LMEM_WRITES_H 12'hB9C
|
||||
`define VX_CSR_MPM_LMEM_BANK_ST 12'hB1D // bank conflicts
|
||||
`define VX_CSR_MPM_LMEM_BANK_ST_H 12'hB9D
|
||||
// PERF: hbm
|
||||
`define VX_CSR_HBM_BANK_CNTR 12'hB1E // hbm banks
|
||||
`define VX_CSR_HBM_BANK_CNTR_H 12'hB9E
|
||||
`define VX_CSR_HBM_BANK_TICK 12'hB1F // hbm ticks
|
||||
`define VX_CSR_HBM_BANK_TICK_H 12'hB9F
|
||||
|
||||
// Machine Performance-monitoring memory counters (class 3) ///////////////////
|
||||
// <Add your own counters: use addresses hB03..B1F, hB83..hB9F>
|
||||
|
|
|
@ -34,6 +34,7 @@ typedef void* vx_buffer_h;
|
|||
#define VX_CAPS_GLOBAL_MEM_SIZE 0x5
|
||||
#define VX_CAPS_LOCAL_MEM_SIZE 0x6
|
||||
#define VX_CAPS_ISA_FLAGS 0x7
|
||||
#define VX_CAPS_L3CACHE_NUM_BANKS 0x8
|
||||
|
||||
// device isa flags
|
||||
#define VX_ISA_STD_A (1ull << ISA_STD_A)
|
||||
|
|
|
@ -81,6 +81,9 @@ public:
|
|||
case VX_CAPS_ISA_FLAGS:
|
||||
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
||||
break;
|
||||
case VX_CAPS_L3CACHE_NUM_BANKS:
|
||||
_value = L3_NUM_BANKS;
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
std::abort();
|
||||
|
|
|
@ -211,6 +211,10 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
uint64_t mem_reads = 0;
|
||||
uint64_t mem_writes = 0;
|
||||
uint64_t mem_lat = 0;
|
||||
|
||||
// PERF: hbm
|
||||
uint64_t hbm_counter = 0;
|
||||
uint64_t hbm_ticks = 0;
|
||||
|
||||
uint64_t num_cores;
|
||||
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), {
|
||||
|
@ -222,6 +226,11 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
return err;
|
||||
});
|
||||
|
||||
uint64_t l3cache_banks;
|
||||
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_L3CACHE_NUM_BANKS, &l3cache_banks), {
|
||||
return err;
|
||||
});
|
||||
|
||||
bool icache_enable = isa_flags & VX_ISA_EXT_ICACHE;
|
||||
bool dcache_enable = isa_flags & VX_ISA_EXT_DCACHE;
|
||||
bool l2cache_enable = isa_flags & VX_ISA_EXT_L2CACHE;
|
||||
|
@ -522,6 +531,14 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_MSHR_ST, core_id, &l3cache_mshr_stalls), {
|
||||
return err;
|
||||
});
|
||||
|
||||
// PERF: HBM
|
||||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_HBM_BANK_CNTR, core_id, &hbm_counter), {
|
||||
return err;
|
||||
});
|
||||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_HBM_BANK_TICK, core_id, &hbm_ticks), {
|
||||
return err;
|
||||
});
|
||||
}
|
||||
// PERF: memory
|
||||
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_READS, core_id, &mem_reads), {
|
||||
|
@ -606,6 +623,10 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
fprintf(stream, "PERF: l3cache write misses=%ld (hit ratio=%d%%)\n", l3cache_write_misses, write_hit_ratio);
|
||||
fprintf(stream, "PERF: l3cache bank stalls=%ld (utilization=%d%%)\n", l3cache_bank_stalls, bank_utilization);
|
||||
fprintf(stream, "PERF: l3cache mshr stalls=%ld (utilization=%d%%)\n", l3cache_mshr_stalls, mshr_utilization);
|
||||
|
||||
// HBM
|
||||
float util = (float)hbm_counter / (hbm_ticks * l3cache_banks) * 100;
|
||||
fprintf(stream, "PERF: hbm bank utilization=%f\n", util);
|
||||
}
|
||||
|
||||
int mem_avg_lat = caclAverage(mem_lat, mem_reads);
|
||||
|
|
|
@ -41,6 +41,7 @@ public:
|
|||
dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2";
|
||||
dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb";
|
||||
dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192;
|
||||
dram_config["MemorySystem"]["DRAM"]["org"]["channel"] = 8;
|
||||
dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps";
|
||||
dram_config["MemorySystem"]["Controller"]["impl"] = "Generic";
|
||||
dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS";
|
||||
|
|
|
@ -77,8 +77,8 @@ public:
|
|||
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
|
||||
}
|
||||
|
||||
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
|
||||
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
|
||||
caches_.at(i)->MemReqPorts.at(0).bind(&cache_arb->ReqIn.at(i));
|
||||
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(0));
|
||||
}
|
||||
|
||||
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <vector>
|
||||
#include <list>
|
||||
#include <queue>
|
||||
#include <string.h>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
|
@ -315,27 +316,74 @@ public:
|
|||
simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i));
|
||||
bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
|
||||
}
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort);
|
||||
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0));
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
|
||||
return;
|
||||
}
|
||||
|
||||
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort);
|
||||
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0));
|
||||
if (strcmp(simobject->name().c_str(), "l3cache")) {
|
||||
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
|
||||
|
||||
if (config.B != 0) {
|
||||
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
|
||||
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
|
||||
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
|
||||
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
|
||||
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
if (config.B != 0) {
|
||||
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
|
||||
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
|
||||
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
|
||||
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
|
||||
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
}
|
||||
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
|
||||
} else {
|
||||
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
||||
}
|
||||
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
|
||||
} else {
|
||||
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
||||
uint32_t max = MAX(2, config_.num_inputs);
|
||||
//printf("%s connecting\n", simobject_->name().c_str());
|
||||
//3
|
||||
if (config.B != 0) {
|
||||
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, max, max);
|
||||
for (uint32_t i = 0; i < max; ++i) {
|
||||
//printf("%s connecting input=%d to MemPorts\n", simobject_->name().c_str(), i);
|
||||
bypass_switch_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B)));
|
||||
simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_switch_->RspOut.at(i));
|
||||
}
|
||||
} else {
|
||||
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
|
||||
}
|
||||
|
||||
if (config.B != 0)
|
||||
{
|
||||
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
|
||||
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B));
|
||||
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i)
|
||||
{
|
||||
//1
|
||||
//printf("%s Connecting memory ports to bank=%d\n", simobject_->name().c_str(), i);
|
||||
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
|
||||
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
}
|
||||
//2
|
||||
if (config_.num_inputs > 1) {
|
||||
for (uint32_t i = 0; i < max; ++i) {
|
||||
//printf("%s connecting bank and bypass port=%d\n", simobject_->name().c_str(), i);
|
||||
bank_switch_->ReqOut.at(i % (1 << config.B)).bind(&bypass_switch_->ReqIn.at(i));
|
||||
bypass_switch_->RspIn.at(i).bind(&bank_switch_->RspOut.at(i % (1 << config.B)));
|
||||
}
|
||||
} else {
|
||||
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
||||
}
|
||||
}
|
||||
|
||||
// calculate cache initialization cycles
|
||||
|
@ -673,8 +721,8 @@ CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config
|
|||
: SimObject<CacheSim>(ctx, name)
|
||||
, CoreReqPorts(config.num_inputs, this)
|
||||
, CoreRspPorts(config.num_inputs, this)
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, MemReqPorts((1 << config.B), this)
|
||||
, MemRspPorts((1 << config.B), this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
|
|
|
@ -75,8 +75,8 @@ public:
|
|||
|
||||
std::vector<SimPort<MemReq>> CoreReqPorts;
|
||||
std::vector<SimPort<MemRsp>> CoreRspPorts;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
std::vector<SimPort<MemReq>> MemReqPorts;
|
||||
std::vector<SimPort<MemRsp>> MemRspPorts;
|
||||
|
||||
CacheSim(const SimContext& ctx, const char* name, const Config& config);
|
||||
~CacheSim();
|
||||
|
|
|
@ -76,8 +76,8 @@ Cluster::Cluster(const SimContext& ctx,
|
|||
2, // pipeline latency
|
||||
});
|
||||
|
||||
l2cache_->MemReqPort.bind(&this->mem_req_port);
|
||||
this->mem_rsp_port.bind(&l2cache_->MemRspPort);
|
||||
l2cache_->MemReqPorts.at(0).bind(&this->mem_req_port);
|
||||
this->mem_rsp_port.bind(&l2cache_->MemRspPorts.at(0));
|
||||
|
||||
icache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0));
|
||||
l2cache_->CoreRspPorts.at(0).bind(&icache_switch->RspOut.at(0));
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#endif
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#define MEMORY_BANKS 2
|
||||
#define MEMORY_BANKS 8
|
||||
#endif
|
||||
|
||||
#define LSU_WORD_SIZE (XLEN / 8)
|
||||
|
|
|
@ -455,6 +455,9 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
|||
CSR_READ_64(VX_CSR_MPM_LMEM_READS, lmem_perf.reads);
|
||||
CSR_READ_64(VX_CSR_MPM_LMEM_WRITES, lmem_perf.writes);
|
||||
CSR_READ_64(VX_CSR_MPM_LMEM_BANK_ST, lmem_perf.bank_stalls);
|
||||
|
||||
CSR_READ_64(VX_CSR_HBM_BANK_CNTR, proc_perf.memsim.counter);
|
||||
CSR_READ_64(VX_CSR_HBM_BANK_TICK, proc_perf.memsim.ticks);
|
||||
}
|
||||
} break;
|
||||
default: {
|
||||
|
|
|
@ -33,6 +33,7 @@ private:
|
|||
struct DramCallbackArgs {
|
||||
MemSim* simobject;
|
||||
MemReq request;
|
||||
uint32_t i;
|
||||
};
|
||||
|
||||
public:
|
||||
|
@ -56,46 +57,49 @@ public:
|
|||
|
||||
void tick() {
|
||||
dram_sim_.tick();
|
||||
uint32_t counter = 0;
|
||||
|
||||
if (simobject_->MemReqPort.empty())
|
||||
return;
|
||||
for (uint32_t i = 0; i < L3_NUM_BANKS; ++i) {
|
||||
if (simobject_->MemReqPorts.at(i).empty())
|
||||
continue;
|
||||
|
||||
auto& mem_req = simobject_->MemReqPort.front();
|
||||
auto& mem_req = simobject_->MemReqPorts.at(i).front();
|
||||
|
||||
// try to enqueue the request to the memory system
|
||||
auto req_args = new DramCallbackArgs{simobject_, mem_req};
|
||||
auto enqueue_success = dram_sim_.send_request(
|
||||
mem_req.write,
|
||||
mem_req.addr,
|
||||
0,
|
||||
[](void* arg) {
|
||||
auto rsp_args = reinterpret_cast<const DramCallbackArgs*>(arg);
|
||||
// only send a response for read requests
|
||||
if (!rsp_args->request.write) {
|
||||
MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
|
||||
rsp_args->simobject->MemRspPort.push(mem_rsp, 1);
|
||||
DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp);
|
||||
}
|
||||
delete rsp_args;
|
||||
},
|
||||
req_args
|
||||
);
|
||||
// try to enqueue the request to the memory system
|
||||
auto req_args = new DramCallbackArgs{simobject_, mem_req, i};
|
||||
auto enqueue_success = dram_sim_.send_request(
|
||||
mem_req.write,
|
||||
mem_req.addr,
|
||||
i,
|
||||
[](void* arg) {
|
||||
auto rsp_args = reinterpret_cast<const DramCallbackArgs*>(arg);
|
||||
// only send a response for read requests
|
||||
if (!rsp_args->request.write) {
|
||||
MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
|
||||
rsp_args->simobject->MemRspPorts.at(rsp_args->i).push(mem_rsp, 1);
|
||||
DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp);
|
||||
}
|
||||
delete rsp_args;
|
||||
},
|
||||
req_args
|
||||
);
|
||||
|
||||
// check if the request was enqueued successfully
|
||||
if (!enqueue_success) {
|
||||
delete req_args;
|
||||
return;
|
||||
// check if the request was enqueued successfully
|
||||
if (!enqueue_success) {
|
||||
delete req_args;
|
||||
continue;
|
||||
}
|
||||
|
||||
DT(3, simobject_->name() << " mem-req: " << mem_req << " bank: " << i);
|
||||
|
||||
simobject_->MemReqPorts.at(i).pop();
|
||||
counter++;
|
||||
}
|
||||
|
||||
if (mem_req.write) {
|
||||
++perf_stats_.writes;
|
||||
} else {
|
||||
++perf_stats_.reads;
|
||||
perf_stats_.counter += counter;
|
||||
if (counter > 0) {
|
||||
++perf_stats_.ticks;
|
||||
}
|
||||
|
||||
DT(3, simobject_->name() << " mem-req: " << mem_req);
|
||||
|
||||
simobject_->MemReqPort.pop();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -103,8 +107,8 @@ public:
|
|||
|
||||
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<MemSim>(ctx, name)
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, MemReqPorts(L3_NUM_BANKS, this)
|
||||
, MemRspPorts(L3_NUM_BANKS, this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
|
@ -118,4 +122,8 @@ void MemSim::reset() {
|
|||
|
||||
void MemSim::tick() {
|
||||
impl_->tick();
|
||||
}
|
||||
|
||||
const MemSim::PerfStats &MemSim::perf_stats() const {
|
||||
return impl_->perf_stats();
|
||||
}
|
|
@ -26,17 +26,23 @@ public:
|
|||
};
|
||||
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t writes;
|
||||
uint64_t counter;
|
||||
uint64_t ticks;
|
||||
|
||||
PerfStats()
|
||||
: reads(0)
|
||||
, writes(0)
|
||||
: counter(0)
|
||||
, ticks(0)
|
||||
{}
|
||||
|
||||
PerfStats& operator+=(const PerfStats& rhs) {
|
||||
this->counter += rhs.counter;
|
||||
this->ticks += rhs.ticks;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
std::vector<SimPort<MemReq>> MemReqPorts;
|
||||
std::vector<SimPort<MemRsp>> MemRspPorts;
|
||||
|
||||
MemSim(const SimContext& ctx, const char* name, const Config& config);
|
||||
~MemSim();
|
||||
|
|
|
@ -47,8 +47,10 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
);
|
||||
|
||||
// connect L3 memory ports
|
||||
l3cache_->MemReqPort.bind(&memsim_->MemReqPort);
|
||||
memsim_->MemRspPort.bind(&l3cache_->MemRspPort);
|
||||
for (uint32_t i = 0; i < L3_NUM_BANKS; ++i) {
|
||||
l3cache_->MemReqPorts.at(i).bind(&memsim_->MemReqPorts.at(i));
|
||||
memsim_->MemRspPorts.at(i).bind(&l3cache_->MemRspPorts.at(i));
|
||||
}
|
||||
|
||||
// create clusters
|
||||
for (uint32_t i = 0; i < arch.num_clusters(); ++i) {
|
||||
|
@ -59,16 +61,18 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
}
|
||||
|
||||
// set up memory profiling
|
||||
memsim_->MemReqPort.tx_callback([&](const MemReq& req, uint64_t cycle){
|
||||
__unused (cycle);
|
||||
perf_mem_reads_ += !req.write;
|
||||
perf_mem_writes_ += req.write;
|
||||
perf_mem_pending_reads_ += !req.write;
|
||||
});
|
||||
memsim_->MemRspPort.tx_callback([&](const MemRsp&, uint64_t cycle){
|
||||
__unused (cycle);
|
||||
--perf_mem_pending_reads_;
|
||||
});
|
||||
for (uint32_t i = 0; i < L3_NUM_BANKS; ++i) {
|
||||
memsim_->MemReqPorts.at(i).tx_callback([&](const MemReq& req, uint64_t cycle){
|
||||
__unused (cycle);
|
||||
perf_mem_reads_ += !req.write;
|
||||
perf_mem_writes_ += req.write;
|
||||
perf_mem_pending_reads_ += !req.write;
|
||||
});
|
||||
memsim_->MemRspPorts.at(i).tx_callback([&](const MemRsp&, uint64_t cycle){
|
||||
__unused (cycle);
|
||||
--perf_mem_pending_reads_;
|
||||
});
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
// dump device configuration
|
||||
|
@ -131,6 +135,7 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const {
|
|||
perf.mem_writes = perf_mem_writes_;
|
||||
perf.mem_latency = perf_mem_latency_;
|
||||
perf.l3cache = l3cache_->perf_stats();
|
||||
perf.memsim = memsim_->perf_stats();
|
||||
return perf;
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ class ProcessorImpl {
|
|||
public:
|
||||
struct PerfStats {
|
||||
CacheSim::PerfStats l3cache;
|
||||
MemSim::PerfStats memsim;
|
||||
uint64_t mem_reads;
|
||||
uint64_t mem_writes;
|
||||
uint64_t mem_latency;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue