Merge branch 'master' of https://github.com/vortexgpgpu/vortex into develop

This commit is contained in:
Blaise Tine 2024-09-02 04:13:35 -07:00
commit c4df7221c6
22 changed files with 207 additions and 99 deletions

View file

@ -617,7 +617,7 @@
// Number of Banks
`ifndef L3_NUM_BANKS
`define L3_NUM_BANKS `MIN(4, `NUM_CLUSTERS)
`define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS)
`endif
// Core Response Queue Size
@ -650,6 +650,15 @@
`define L3_WRITEBACK 0
`endif
`ifndef MEMORY_BANKS
`define MEMORY_BANKS 8
`endif
// Number of Memory Ports from LLC
`ifndef NUM_MEM_PORTS
`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS)
`endif
// ISA Extensions /////////////////////////////////////////////////////////////
`ifdef EXT_A_ENABLE

View file

@ -166,6 +166,10 @@
`define VX_CSR_MPM_MEM_WRITES_H 12'hB99
`define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency
`define VX_CSR_MPM_MEM_LT_H 12'hB9A
`define VX_CSR_MPM_MEM_BANK_CNTR 12'hB1E // memory bank requests
`define VX_CSR_MPM_MEM_BANK_CNTR_H 12'hB9E
`define VX_CSR_MPM_MEM_BANK_TICK 12'hB1F // memory ticks
`define VX_CSR_MPM_MEM_BANK_TICK_H 12'hB9F
// PERF: lmem
`define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads
`define VX_CSR_MPM_LMEM_READS_H 12'hB9B

View file

@ -34,6 +34,7 @@ typedef void* vx_buffer_h;
#define VX_CAPS_GLOBAL_MEM_SIZE 0x5
#define VX_CAPS_LOCAL_MEM_SIZE 0x6
#define VX_CAPS_ISA_FLAGS 0x7
#define VX_CAPS_NUM_MEM_BANKS 0x8
// device isa flags
#define VX_ISA_STD_A (1ull << ISA_STD_A)

View file

@ -231,6 +231,9 @@ public:
case VX_CAPS_ISA_FLAGS:
_value = isa_caps_;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
std::abort();

View file

@ -77,6 +77,9 @@ public:
case VX_CAPS_ISA_FLAGS:
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();

View file

@ -81,6 +81,9 @@ public:
case VX_CAPS_ISA_FLAGS:
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();

View file

@ -211,6 +211,8 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
uint64_t mem_reads = 0;
uint64_t mem_writes = 0;
uint64_t mem_lat = 0;
uint64_t mem_req_counter = 0;
uint64_t mem_ticks = 0;
uint64_t num_cores;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), {
@ -221,6 +223,11 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_ISA_FLAGS, &isa_flags), {
return err;
});
uint64_t num_mem_bank_ports;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_MEM_BANKS, &num_mem_bank_ports), {
return err;
});
bool icache_enable = isa_flags & VX_ISA_EXT_ICACHE;
bool dcache_enable = isa_flags & VX_ISA_EXT_DCACHE;
@ -533,6 +540,12 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), {
return err;
});
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_CNTR, core_id, &mem_req_counter), {
return err;
});
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_TICK, core_id, &mem_ticks), {
return err;
});
}
} break;
default:
@ -599,7 +612,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
int read_hit_ratio = calcRatio(l3cache_read_misses, l3cache_reads);
int write_hit_ratio = calcRatio(l3cache_write_misses, l3cache_writes);
int bank_utilization = calcAvgPercent(l3cache_reads + l3cache_writes, l3cache_reads + l3cache_writes + l3cache_bank_stalls);
int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls);
int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls);
fprintf(stream, "PERF: l3cache reads=%ld\n", l3cache_reads);
fprintf(stream, "PERF: l3cache writes=%ld\n", l3cache_writes);
fprintf(stream, "PERF: l3cache read misses=%ld (hit ratio=%d%%)\n", l3cache_read_misses, read_hit_ratio);
@ -609,8 +622,10 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
}
int mem_avg_lat = caclAverage(mem_lat, mem_reads);
int memory_bank_port_utilization = calcAvgPercent(mem_req_counter, (mem_ticks * num_mem_bank_ports));
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat);
fprintf(stream, "PERF: memory bank port utilization=%d%%\n", memory_bank_port_utilization);
} break;
default:
break;

View file

@ -421,6 +421,9 @@ public:
case VX_CAPS_ISA_FLAGS:
_value = isa_caps_;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
std::abort();

View file

@ -41,11 +41,11 @@ public:
dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2";
dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb";
dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192;
dram_config["MemorySystem"]["DRAM"]["org"]["channel"] = 8;
dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps";
dram_config["MemorySystem"]["Controller"]["impl"] = "Generic";
dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS";
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy";
{
YAML::Node draw_plugin;
@ -66,7 +66,7 @@ public:
auto original_buf = std::cout.rdbuf();
std::cout.rdbuf(nullstream.rdbuf());
ramulator_frontend_->finalize();
ramulator_memorysystem_->finalize();
ramulator_memorysystem_->finalize();
std::cout.rdbuf(original_buf);
}

View file

@ -168,23 +168,23 @@ public:
{}
void* operator new(size_t /*size*/) {
return allocator().allocate();
return allocator_.allocate();
}
void operator delete(void* ptr) {
allocator().deallocate(ptr);
allocator_.deallocate(ptr);
}
protected:
Func func_;
Pkt pkt_;
static MemoryPool<SimCallEvent<Pkt>>& allocator() {
static MemoryPool<SimCallEvent<Pkt>> instance(64);
return instance;
}
static MemoryPool<SimCallEvent<Pkt>> allocator_;
};
template <typename Pkt>
MemoryPool<SimCallEvent<Pkt>> SimCallEvent<Pkt>::allocator_(64);
///////////////////////////////////////////////////////////////////////////////
template <typename Pkt>
@ -201,23 +201,23 @@ public:
{}
void* operator new(size_t /*size*/) {
return allocator().allocate();
return allocator_.allocate();
}
void operator delete(void* ptr) {
allocator().deallocate(ptr);
allocator_.deallocate(ptr);
}
protected:
const SimPort<Pkt>* port_;
Pkt pkt_;
static MemoryPool<SimPortEvent<Pkt>>& allocator() {
static MemoryPool<SimPortEvent<Pkt>> instance(64);
return instance;
}
static MemoryPool<SimPortEvent<Pkt>> allocator_;
};
template <typename Pkt>
MemoryPool<SimPortEvent<Pkt>> SimPortEvent<Pkt>::allocator_(64);
///////////////////////////////////////////////////////////////////////////////
class SimContext;

View file

@ -35,13 +35,13 @@
#include <unordered_map>
#include <util.h>
#ifndef MEMORY_BANKS
//#ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#else
#define MEMORY_BANKS 2
#endif
#endif
//#endif
#ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1

View file

@ -77,8 +77,8 @@ public:
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
}
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
caches_.at(i)->MemReqPorts.at(0).bind(&cache_arb->ReqIn.at(i));
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(0));
}
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);

View file

@ -19,6 +19,7 @@
#include <vector>
#include <list>
#include <queue>
#include <string.h>
using namespace vortex;
@ -315,27 +316,75 @@ public:
simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i));
bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
}
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort);
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0));
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
return;
}
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort);
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0));
if (strcmp(simobject->name().c_str(), "l3cache")) {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
if (config.B != 0) {
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
if (config.B != 0) {
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
} else {
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
}
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
} else {
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
// TODO: Change this into a crossbar
uint32_t max = MAX(2, config_.num_inputs);
//printf("%s connecting\n", simobject_->name().c_str());
//3
if (config.B != 0) {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, max, max);
for (uint32_t i = 0; i < max; ++i) {
//printf("%s connecting input=%d to MemPorts\n", simobject_->name().c_str(), i);
bypass_switch_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B)));
simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_switch_->RspOut.at(i));
}
} else {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
}
if (config.B != 0)
{
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i)
{
//1
//printf("%s Connecting memory ports to bank=%d\n", simobject_->name().c_str(), i);
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}
//2
if (config_.num_inputs > 1) {
for (uint32_t i = 0; i < max; ++i) {
//printf("%s connecting bank and bypass port=%d\n", simobject_->name().c_str(), i);
bank_switch_->ReqOut.at(i % (1 << config.B)).bind(&bypass_switch_->ReqIn.at(i));
bypass_switch_->RspIn.at(i).bind(&bank_switch_->RspOut.at(i % (1 << config.B)));
}
} else {
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
}
}
else
{
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
}
}
// calculate cache initialization cycles
@ -673,8 +722,8 @@ CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config
: SimObject<CacheSim>(ctx, name)
, CoreReqPorts(config.num_inputs, this)
, CoreRspPorts(config.num_inputs, this)
, MemReqPort(this)
, MemRspPort(this)
, MemReqPorts(NUM_MEM_PORTS, this)
, MemRspPorts(NUM_MEM_PORTS, this)
, impl_(new Impl(this, config))
{}

View file

@ -75,8 +75,8 @@ public:
std::vector<SimPort<MemReq>> CoreReqPorts;
std::vector<SimPort<MemRsp>> CoreRspPorts;
SimPort<MemReq> MemReqPort;
SimPort<MemRsp> MemRspPort;
std::vector<SimPort<MemReq>> MemReqPorts;
std::vector<SimPort<MemRsp>> MemRspPorts;
CacheSim(const SimContext& ctx, const char* name, const Config& config);
~CacheSim();

View file

@ -76,8 +76,8 @@ Cluster::Cluster(const SimContext& ctx,
2, // pipeline latency
});
l2cache_->MemReqPort.bind(&this->mem_req_port);
this->mem_rsp_port.bind(&l2cache_->MemRspPort);
l2cache_->MemReqPorts.at(0).bind(&this->mem_req_port);
this->mem_rsp_port.bind(&l2cache_->MemRspPorts.at(0));
icache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0));
l2cache_->CoreRspPorts.at(0).bind(&icache_switch->RspOut.at(0));

View file

@ -21,10 +21,6 @@
#define MEM_CLOCK_RATIO 1
#endif
#ifndef MEMORY_BANKS
#define MEMORY_BANKS 2
#endif
#define LSU_WORD_SIZE (XLEN / 8)
#define LSU_CHANNELS NUM_LSU_LANES
#define LSU_NUM_REQS (NUM_LSU_BLOCKS * LSU_CHANNELS)

View file

@ -438,6 +438,8 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
CSR_READ_64(VX_CSR_MPM_MEM_READS, proc_perf.mem_reads);
CSR_READ_64(VX_CSR_MPM_MEM_WRITES, proc_perf.mem_writes);
CSR_READ_64(VX_CSR_MPM_MEM_LT, proc_perf.mem_latency);
CSR_READ_64(VX_CSR_MPM_MEM_BANK_CNTR, proc_perf.memsim.counter);
CSR_READ_64(VX_CSR_MPM_MEM_BANK_TICK, proc_perf.memsim.ticks);
CSR_READ_64(VX_CSR_MPM_LMEM_READS, lmem_perf.reads);
CSR_READ_64(VX_CSR_MPM_LMEM_WRITES, lmem_perf.writes);

View file

@ -33,6 +33,7 @@ private:
struct DramCallbackArgs {
MemSim* simobject;
MemReq request;
uint32_t i;
};
public:
@ -56,46 +57,49 @@ public:
void tick() {
dram_sim_.tick();
uint32_t counter = 0;
if (simobject_->MemReqPort.empty())
return;
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
if (simobject_->MemReqPorts.at(i).empty())
continue;
auto& mem_req = simobject_->MemReqPort.front();
auto& mem_req = simobject_->MemReqPorts.at(i).front();
// try to enqueue the request to the memory system
auto req_args = new DramCallbackArgs{simobject_, mem_req};
auto enqueue_success = dram_sim_.send_request(
mem_req.write,
mem_req.addr,
0,
[](void* arg) {
auto rsp_args = reinterpret_cast<const DramCallbackArgs*>(arg);
// only send a response for read requests
if (!rsp_args->request.write) {
MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
rsp_args->simobject->MemRspPort.push(mem_rsp, 1);
DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp);
}
delete rsp_args;
},
req_args
);
// try to enqueue the request to the memory system
auto req_args = new DramCallbackArgs{simobject_, mem_req, i};
auto enqueue_success = dram_sim_.send_request(
mem_req.write,
mem_req.addr,
0,
[](void* arg) {
auto rsp_args = reinterpret_cast<const DramCallbackArgs*>(arg);
// only send a response for read requests
if (!rsp_args->request.write) {
MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
rsp_args->simobject->MemRspPorts.at(rsp_args->i).push(mem_rsp, 1);
DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp << " bank: " << rsp_args->i);
}
delete rsp_args;
},
req_args
);
// check if the request was enqueued successfully
if (!enqueue_success) {
delete req_args;
return;
// check if the request was enqueued successfully
if (!enqueue_success) {
delete req_args;
continue;
}
DT(3, simobject_->name() << " mem-req: " << mem_req << " bank: " << i);
simobject_->MemReqPorts.at(i).pop();
counter++;
}
if (mem_req.write) {
++perf_stats_.writes;
} else {
++perf_stats_.reads;
perf_stats_.counter += counter;
if (counter > 0) {
++perf_stats_.ticks;
}
DT(3, simobject_->name() << " mem-req: " << mem_req);
simobject_->MemReqPort.pop();
}
};
@ -103,8 +107,8 @@ public:
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
: SimObject<MemSim>(ctx, name)
, MemReqPort(this)
, MemRspPort(this)
, MemReqPorts(NUM_MEM_PORTS, this)
, MemRspPorts(NUM_MEM_PORTS, this)
, impl_(new Impl(this, config))
{}
@ -118,4 +122,8 @@ void MemSim::reset() {
void MemSim::tick() {
impl_->tick();
}
const MemSim::PerfStats &MemSim::perf_stats() const {
return impl_->perf_stats();
}

View file

@ -26,17 +26,23 @@ public:
};
struct PerfStats {
uint64_t reads;
uint64_t writes;
uint64_t counter;
uint64_t ticks;
PerfStats()
: reads(0)
, writes(0)
: counter(0)
, ticks(0)
{}
PerfStats& operator+=(const PerfStats& rhs) {
this->counter += rhs.counter;
this->ticks += rhs.ticks;
return *this;
}
};
SimPort<MemReq> MemReqPort;
SimPort<MemRsp> MemRspPort;
std::vector<SimPort<MemReq>> MemReqPorts;
std::vector<SimPort<MemRsp>> MemRspPorts;
MemSim(const SimContext& ctx, const char* name, const Config& config);
~MemSim();

View file

@ -47,8 +47,10 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
);
// connect L3 memory ports
l3cache_->MemReqPort.bind(&memsim_->MemReqPort);
memsim_->MemRspPort.bind(&l3cache_->MemRspPort);
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
l3cache_->MemReqPorts.at(i).bind(&memsim_->MemReqPorts.at(i));
memsim_->MemRspPorts.at(i).bind(&l3cache_->MemRspPorts.at(i));
}
// create clusters
for (uint32_t i = 0; i < arch.num_clusters(); ++i) {
@ -59,16 +61,18 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
}
// set up memory profiling
memsim_->MemReqPort.tx_callback([&](const MemReq& req, uint64_t cycle){
__unused (cycle);
perf_mem_reads_ += !req.write;
perf_mem_writes_ += req.write;
perf_mem_pending_reads_ += !req.write;
});
memsim_->MemRspPort.tx_callback([&](const MemRsp&, uint64_t cycle){
__unused (cycle);
--perf_mem_pending_reads_;
});
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
memsim_->MemReqPorts.at(i).tx_callback([&](const MemReq& req, uint64_t cycle){
__unused (cycle);
perf_mem_reads_ += !req.write;
perf_mem_writes_ += req.write;
perf_mem_pending_reads_ += !req.write;
});
memsim_->MemRspPorts.at(i).tx_callback([&](const MemRsp&, uint64_t cycle){
__unused (cycle);
--perf_mem_pending_reads_;
});
}
#ifndef NDEBUG
// dump device configuration
@ -131,6 +135,7 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const {
perf.mem_writes = perf_mem_writes_;
perf.mem_latency = perf_mem_latency_;
perf.l3cache = l3cache_->perf_stats();
perf.memsim = memsim_->perf_stats();
return perf;
}

View file

@ -25,6 +25,7 @@ class ProcessorImpl {
public:
struct PerfStats {
CacheSim::PerfStats l3cache;
MemSim::PerfStats memsim;
uint64_t mem_reads;
uint64_t mem_writes;
uint64_t mem_latency;

View file

@ -50,7 +50,7 @@ public:
static const char* type_str() {
return "float";
}
static int generate() {
static float generate() {
return static_cast<float>(rand()) / RAND_MAX;
}
static bool compare(float a, float b, int index, int errors) {