Merge branch 'master' of https://github.com/vortexgpgpu/vortex into develop

This commit is contained in:
Blaise Tine 2024-09-02 04:13:35 -07:00
commit c4df7221c6
22 changed files with 207 additions and 99 deletions

View file

@ -617,7 +617,7 @@
// Number of Banks // Number of Banks
`ifndef L3_NUM_BANKS `ifndef L3_NUM_BANKS
`define L3_NUM_BANKS `MIN(4, `NUM_CLUSTERS) `define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS)
`endif `endif
// Core Response Queue Size // Core Response Queue Size
@ -650,6 +650,15 @@
`define L3_WRITEBACK 0 `define L3_WRITEBACK 0
`endif `endif
`ifndef MEMORY_BANKS
`define MEMORY_BANKS 8
`endif
// Number of Memory Ports from LLC
`ifndef NUM_MEM_PORTS
`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS)
`endif
// ISA Extensions ///////////////////////////////////////////////////////////// // ISA Extensions /////////////////////////////////////////////////////////////
`ifdef EXT_A_ENABLE `ifdef EXT_A_ENABLE

View file

@ -166,6 +166,10 @@
`define VX_CSR_MPM_MEM_WRITES_H 12'hB99 `define VX_CSR_MPM_MEM_WRITES_H 12'hB99
`define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency `define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency
`define VX_CSR_MPM_MEM_LT_H 12'hB9A `define VX_CSR_MPM_MEM_LT_H 12'hB9A
`define VX_CSR_MPM_MEM_BANK_CNTR 12'hB1E // memory bank requests
`define VX_CSR_MPM_MEM_BANK_CNTR_H 12'hB9E
`define VX_CSR_MPM_MEM_BANK_TICK 12'hB1F // memory ticks
`define VX_CSR_MPM_MEM_BANK_TICK_H 12'hB9F
// PERF: lmem // PERF: lmem
`define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads `define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads
`define VX_CSR_MPM_LMEM_READS_H 12'hB9B `define VX_CSR_MPM_LMEM_READS_H 12'hB9B

View file

@ -34,6 +34,7 @@ typedef void* vx_buffer_h;
#define VX_CAPS_GLOBAL_MEM_SIZE 0x5 #define VX_CAPS_GLOBAL_MEM_SIZE 0x5
#define VX_CAPS_LOCAL_MEM_SIZE 0x6 #define VX_CAPS_LOCAL_MEM_SIZE 0x6
#define VX_CAPS_ISA_FLAGS 0x7 #define VX_CAPS_ISA_FLAGS 0x7
#define VX_CAPS_NUM_MEM_BANKS 0x8
// device isa flags // device isa flags
#define VX_ISA_STD_A (1ull << ISA_STD_A) #define VX_ISA_STD_A (1ull << ISA_STD_A)

View file

@ -231,6 +231,9 @@ public:
case VX_CAPS_ISA_FLAGS: case VX_CAPS_ISA_FLAGS:
_value = isa_caps_; _value = isa_caps_;
break; break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default: default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
std::abort(); std::abort();

View file

@ -77,6 +77,9 @@ public:
case VX_CAPS_ISA_FLAGS: case VX_CAPS_ISA_FLAGS:
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD; _value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
break; break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default: default:
std::cout << "invalid caps id: " << caps_id << std::endl; std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort(); std::abort();

View file

@ -81,6 +81,9 @@ public:
case VX_CAPS_ISA_FLAGS: case VX_CAPS_ISA_FLAGS:
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD; _value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
break; break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default: default:
std::cout << "invalid caps id: " << caps_id << std::endl; std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort(); std::abort();

View file

@ -211,6 +211,8 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
uint64_t mem_reads = 0; uint64_t mem_reads = 0;
uint64_t mem_writes = 0; uint64_t mem_writes = 0;
uint64_t mem_lat = 0; uint64_t mem_lat = 0;
uint64_t mem_req_counter = 0;
uint64_t mem_ticks = 0;
uint64_t num_cores; uint64_t num_cores;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), { CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), {
@ -221,6 +223,11 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_ISA_FLAGS, &isa_flags), { CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_ISA_FLAGS, &isa_flags), {
return err; return err;
}); });
uint64_t num_mem_bank_ports;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_MEM_BANKS, &num_mem_bank_ports), {
return err;
});
bool icache_enable = isa_flags & VX_ISA_EXT_ICACHE; bool icache_enable = isa_flags & VX_ISA_EXT_ICACHE;
bool dcache_enable = isa_flags & VX_ISA_EXT_DCACHE; bool dcache_enable = isa_flags & VX_ISA_EXT_DCACHE;
@ -533,6 +540,12 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), { CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), {
return err; return err;
}); });
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_CNTR, core_id, &mem_req_counter), {
return err;
});
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_TICK, core_id, &mem_ticks), {
return err;
});
} }
} break; } break;
default: default:
@ -599,7 +612,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
int read_hit_ratio = calcRatio(l3cache_read_misses, l3cache_reads); int read_hit_ratio = calcRatio(l3cache_read_misses, l3cache_reads);
int write_hit_ratio = calcRatio(l3cache_write_misses, l3cache_writes); int write_hit_ratio = calcRatio(l3cache_write_misses, l3cache_writes);
int bank_utilization = calcAvgPercent(l3cache_reads + l3cache_writes, l3cache_reads + l3cache_writes + l3cache_bank_stalls); int bank_utilization = calcAvgPercent(l3cache_reads + l3cache_writes, l3cache_reads + l3cache_writes + l3cache_bank_stalls);
int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls); int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls);
fprintf(stream, "PERF: l3cache reads=%ld\n", l3cache_reads); fprintf(stream, "PERF: l3cache reads=%ld\n", l3cache_reads);
fprintf(stream, "PERF: l3cache writes=%ld\n", l3cache_writes); fprintf(stream, "PERF: l3cache writes=%ld\n", l3cache_writes);
fprintf(stream, "PERF: l3cache read misses=%ld (hit ratio=%d%%)\n", l3cache_read_misses, read_hit_ratio); fprintf(stream, "PERF: l3cache read misses=%ld (hit ratio=%d%%)\n", l3cache_read_misses, read_hit_ratio);
@ -609,8 +622,10 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
} }
int mem_avg_lat = caclAverage(mem_lat, mem_reads); int mem_avg_lat = caclAverage(mem_lat, mem_reads);
int memory_bank_port_utilization = calcAvgPercent(mem_req_counter, (mem_ticks * num_mem_bank_ports));
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes); fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat); fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat);
fprintf(stream, "PERF: memory bank port utilization=%d%%\n", memory_bank_port_utilization);
} break; } break;
default: default:
break; break;

View file

@ -421,6 +421,9 @@ public:
case VX_CAPS_ISA_FLAGS: case VX_CAPS_ISA_FLAGS:
_value = isa_caps_; _value = isa_caps_;
break; break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default: default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
std::abort(); std::abort();

View file

@ -41,11 +41,11 @@ public:
dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2"; dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2";
dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb"; dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb";
dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192; dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192;
dram_config["MemorySystem"]["DRAM"]["org"]["channel"] = 8;
dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps"; dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps";
dram_config["MemorySystem"]["Controller"]["impl"] = "Generic"; dram_config["MemorySystem"]["Controller"]["impl"] = "Generic";
dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS"; dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS";
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank"; dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy"; dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy";
{ {
YAML::Node draw_plugin; YAML::Node draw_plugin;
@ -66,7 +66,7 @@ public:
auto original_buf = std::cout.rdbuf(); auto original_buf = std::cout.rdbuf();
std::cout.rdbuf(nullstream.rdbuf()); std::cout.rdbuf(nullstream.rdbuf());
ramulator_frontend_->finalize(); ramulator_frontend_->finalize();
ramulator_memorysystem_->finalize(); ramulator_memorysystem_->finalize();
std::cout.rdbuf(original_buf); std::cout.rdbuf(original_buf);
} }

View file

@ -168,23 +168,23 @@ public:
{} {}
void* operator new(size_t /*size*/) { void* operator new(size_t /*size*/) {
return allocator().allocate(); return allocator_.allocate();
} }
void operator delete(void* ptr) { void operator delete(void* ptr) {
allocator().deallocate(ptr); allocator_.deallocate(ptr);
} }
protected: protected:
Func func_; Func func_;
Pkt pkt_; Pkt pkt_;
static MemoryPool<SimCallEvent<Pkt>>& allocator() { static MemoryPool<SimCallEvent<Pkt>> allocator_;
static MemoryPool<SimCallEvent<Pkt>> instance(64);
return instance;
}
}; };
template <typename Pkt>
MemoryPool<SimCallEvent<Pkt>> SimCallEvent<Pkt>::allocator_(64);
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
template <typename Pkt> template <typename Pkt>
@ -201,23 +201,23 @@ public:
{} {}
void* operator new(size_t /*size*/) { void* operator new(size_t /*size*/) {
return allocator().allocate(); return allocator_.allocate();
} }
void operator delete(void* ptr) { void operator delete(void* ptr) {
allocator().deallocate(ptr); allocator_.deallocate(ptr);
} }
protected: protected:
const SimPort<Pkt>* port_; const SimPort<Pkt>* port_;
Pkt pkt_; Pkt pkt_;
static MemoryPool<SimPortEvent<Pkt>>& allocator() { static MemoryPool<SimPortEvent<Pkt>> allocator_;
static MemoryPool<SimPortEvent<Pkt>> instance(64);
return instance;
}
}; };
template <typename Pkt>
MemoryPool<SimPortEvent<Pkt>> SimPortEvent<Pkt>::allocator_(64);
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
class SimContext; class SimContext;

View file

@ -35,13 +35,13 @@
#include <unordered_map> #include <unordered_map>
#include <util.h> #include <util.h>
#ifndef MEMORY_BANKS //#ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#else #else
#define MEMORY_BANKS 2 #define MEMORY_BANKS 2
#endif #endif
#endif //#endif
#ifndef MEM_CLOCK_RATIO #ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1 #define MEM_CLOCK_RATIO 1

View file

@ -77,8 +77,8 @@ public:
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i)); caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
} }
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i)); caches_.at(i)->MemReqPorts.at(0).bind(&cache_arb->ReqIn.at(i));
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort); cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(0));
} }
cache_arb->ReqOut.at(0).bind(&this->MemReqPort); cache_arb->ReqOut.at(0).bind(&this->MemReqPort);

View file

@ -19,6 +19,7 @@
#include <vector> #include <vector>
#include <list> #include <list>
#include <queue> #include <queue>
#include <string.h>
using namespace vortex; using namespace vortex;
@ -315,27 +316,75 @@ public:
simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i)); simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i));
bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i)); bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
} }
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort); bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0)); simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
return; return;
} }
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2); if (strcmp(simobject->name().c_str(), "l3cache")) {
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort); bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0)); bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
if (config.B != 0) { if (config.B != 0) {
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str()); snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B)); bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) { for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i)); mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i)); bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
} else {
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
} }
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
} else { } else {
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0)); // TODO: Change this into a crossbar
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0)); uint32_t max = MAX(2, config_.num_inputs);
//printf("%s connecting\n", simobject_->name().c_str());
//3
if (config.B != 0) {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, max, max);
for (uint32_t i = 0; i < max; ++i) {
//printf("%s connecting input=%d to MemPorts\n", simobject_->name().c_str(), i);
bypass_switch_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B)));
simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_switch_->RspOut.at(i));
}
} else {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
}
if (config.B != 0)
{
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i)
{
//1
//printf("%s Connecting memory ports to bank=%d\n", simobject_->name().c_str(), i);
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}
//2
if (config_.num_inputs > 1) {
for (uint32_t i = 0; i < max; ++i) {
//printf("%s connecting bank and bypass port=%d\n", simobject_->name().c_str(), i);
bank_switch_->ReqOut.at(i % (1 << config.B)).bind(&bypass_switch_->ReqIn.at(i));
bypass_switch_->RspIn.at(i).bind(&bank_switch_->RspOut.at(i % (1 << config.B)));
}
} else {
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
}
}
else
{
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
}
} }
// calculate cache initialization cycles // calculate cache initialization cycles
@ -673,8 +722,8 @@ CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config
: SimObject<CacheSim>(ctx, name) : SimObject<CacheSim>(ctx, name)
, CoreReqPorts(config.num_inputs, this) , CoreReqPorts(config.num_inputs, this)
, CoreRspPorts(config.num_inputs, this) , CoreRspPorts(config.num_inputs, this)
, MemReqPort(this) , MemReqPorts(NUM_MEM_PORTS, this)
, MemRspPort(this) , MemRspPorts(NUM_MEM_PORTS, this)
, impl_(new Impl(this, config)) , impl_(new Impl(this, config))
{} {}

View file

@ -75,8 +75,8 @@ public:
std::vector<SimPort<MemReq>> CoreReqPorts; std::vector<SimPort<MemReq>> CoreReqPorts;
std::vector<SimPort<MemRsp>> CoreRspPorts; std::vector<SimPort<MemRsp>> CoreRspPorts;
SimPort<MemReq> MemReqPort; std::vector<SimPort<MemReq>> MemReqPorts;
SimPort<MemRsp> MemRspPort; std::vector<SimPort<MemRsp>> MemRspPorts;
CacheSim(const SimContext& ctx, const char* name, const Config& config); CacheSim(const SimContext& ctx, const char* name, const Config& config);
~CacheSim(); ~CacheSim();

View file

@ -76,8 +76,8 @@ Cluster::Cluster(const SimContext& ctx,
2, // pipeline latency 2, // pipeline latency
}); });
l2cache_->MemReqPort.bind(&this->mem_req_port); l2cache_->MemReqPorts.at(0).bind(&this->mem_req_port);
this->mem_rsp_port.bind(&l2cache_->MemRspPort); this->mem_rsp_port.bind(&l2cache_->MemRspPorts.at(0));
icache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0)); icache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0));
l2cache_->CoreRspPorts.at(0).bind(&icache_switch->RspOut.at(0)); l2cache_->CoreRspPorts.at(0).bind(&icache_switch->RspOut.at(0));

View file

@ -21,10 +21,6 @@
#define MEM_CLOCK_RATIO 1 #define MEM_CLOCK_RATIO 1
#endif #endif
#ifndef MEMORY_BANKS
#define MEMORY_BANKS 2
#endif
#define LSU_WORD_SIZE (XLEN / 8) #define LSU_WORD_SIZE (XLEN / 8)
#define LSU_CHANNELS NUM_LSU_LANES #define LSU_CHANNELS NUM_LSU_LANES
#define LSU_NUM_REQS (NUM_LSU_BLOCKS * LSU_CHANNELS) #define LSU_NUM_REQS (NUM_LSU_BLOCKS * LSU_CHANNELS)

View file

@ -438,6 +438,8 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
CSR_READ_64(VX_CSR_MPM_MEM_READS, proc_perf.mem_reads); CSR_READ_64(VX_CSR_MPM_MEM_READS, proc_perf.mem_reads);
CSR_READ_64(VX_CSR_MPM_MEM_WRITES, proc_perf.mem_writes); CSR_READ_64(VX_CSR_MPM_MEM_WRITES, proc_perf.mem_writes);
CSR_READ_64(VX_CSR_MPM_MEM_LT, proc_perf.mem_latency); CSR_READ_64(VX_CSR_MPM_MEM_LT, proc_perf.mem_latency);
CSR_READ_64(VX_CSR_MPM_MEM_BANK_CNTR, proc_perf.memsim.counter);
CSR_READ_64(VX_CSR_MPM_MEM_BANK_TICK, proc_perf.memsim.ticks);
CSR_READ_64(VX_CSR_MPM_LMEM_READS, lmem_perf.reads); CSR_READ_64(VX_CSR_MPM_LMEM_READS, lmem_perf.reads);
CSR_READ_64(VX_CSR_MPM_LMEM_WRITES, lmem_perf.writes); CSR_READ_64(VX_CSR_MPM_LMEM_WRITES, lmem_perf.writes);

View file

@ -33,6 +33,7 @@ private:
struct DramCallbackArgs { struct DramCallbackArgs {
MemSim* simobject; MemSim* simobject;
MemReq request; MemReq request;
uint32_t i;
}; };
public: public:
@ -56,46 +57,49 @@ public:
void tick() { void tick() {
dram_sim_.tick(); dram_sim_.tick();
uint32_t counter = 0;
if (simobject_->MemReqPort.empty()) for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
return; if (simobject_->MemReqPorts.at(i).empty())
continue;
auto& mem_req = simobject_->MemReqPort.front(); auto& mem_req = simobject_->MemReqPorts.at(i).front();
// try to enqueue the request to the memory system // try to enqueue the request to the memory system
auto req_args = new DramCallbackArgs{simobject_, mem_req}; auto req_args = new DramCallbackArgs{simobject_, mem_req, i};
auto enqueue_success = dram_sim_.send_request( auto enqueue_success = dram_sim_.send_request(
mem_req.write, mem_req.write,
mem_req.addr, mem_req.addr,
0, 0,
[](void* arg) { [](void* arg) {
auto rsp_args = reinterpret_cast<const DramCallbackArgs*>(arg); auto rsp_args = reinterpret_cast<const DramCallbackArgs*>(arg);
// only send a response for read requests // only send a response for read requests
if (!rsp_args->request.write) { if (!rsp_args->request.write) {
MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid}; MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
rsp_args->simobject->MemRspPort.push(mem_rsp, 1); rsp_args->simobject->MemRspPorts.at(rsp_args->i).push(mem_rsp, 1);
DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp); DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp << " bank: " << rsp_args->i);
} }
delete rsp_args; delete rsp_args;
}, },
req_args req_args
); );
// check if the request was enqueued successfully // check if the request was enqueued successfully
if (!enqueue_success) { if (!enqueue_success) {
delete req_args; delete req_args;
return; continue;
}
DT(3, simobject_->name() << " mem-req: " << mem_req << " bank: " << i);
simobject_->MemReqPorts.at(i).pop();
counter++;
} }
if (mem_req.write) { perf_stats_.counter += counter;
++perf_stats_.writes; if (counter > 0) {
} else { ++perf_stats_.ticks;
++perf_stats_.reads;
} }
DT(3, simobject_->name() << " mem-req: " << mem_req);
simobject_->MemReqPort.pop();
} }
}; };
@ -103,8 +107,8 @@ public:
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config) MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
: SimObject<MemSim>(ctx, name) : SimObject<MemSim>(ctx, name)
, MemReqPort(this) , MemReqPorts(NUM_MEM_PORTS, this)
, MemRspPort(this) , MemRspPorts(NUM_MEM_PORTS, this)
, impl_(new Impl(this, config)) , impl_(new Impl(this, config))
{} {}
@ -118,4 +122,8 @@ void MemSim::reset() {
void MemSim::tick() { void MemSim::tick() {
impl_->tick(); impl_->tick();
}
const MemSim::PerfStats &MemSim::perf_stats() const {
return impl_->perf_stats();
} }

View file

@ -26,17 +26,23 @@ public:
}; };
struct PerfStats { struct PerfStats {
uint64_t reads; uint64_t counter;
uint64_t writes; uint64_t ticks;
PerfStats() PerfStats()
: reads(0) : counter(0)
, writes(0) , ticks(0)
{} {}
PerfStats& operator+=(const PerfStats& rhs) {
this->counter += rhs.counter;
this->ticks += rhs.ticks;
return *this;
}
}; };
SimPort<MemReq> MemReqPort; std::vector<SimPort<MemReq>> MemReqPorts;
SimPort<MemRsp> MemRspPort; std::vector<SimPort<MemRsp>> MemRspPorts;
MemSim(const SimContext& ctx, const char* name, const Config& config); MemSim(const SimContext& ctx, const char* name, const Config& config);
~MemSim(); ~MemSim();

View file

@ -47,8 +47,10 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
); );
// connect L3 memory ports // connect L3 memory ports
l3cache_->MemReqPort.bind(&memsim_->MemReqPort); for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
memsim_->MemRspPort.bind(&l3cache_->MemRspPort); l3cache_->MemReqPorts.at(i).bind(&memsim_->MemReqPorts.at(i));
memsim_->MemRspPorts.at(i).bind(&l3cache_->MemRspPorts.at(i));
}
// create clusters // create clusters
for (uint32_t i = 0; i < arch.num_clusters(); ++i) { for (uint32_t i = 0; i < arch.num_clusters(); ++i) {
@ -59,16 +61,18 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
} }
// set up memory profiling // set up memory profiling
memsim_->MemReqPort.tx_callback([&](const MemReq& req, uint64_t cycle){ for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
__unused (cycle); memsim_->MemReqPorts.at(i).tx_callback([&](const MemReq& req, uint64_t cycle){
perf_mem_reads_ += !req.write; __unused (cycle);
perf_mem_writes_ += req.write; perf_mem_reads_ += !req.write;
perf_mem_pending_reads_ += !req.write; perf_mem_writes_ += req.write;
}); perf_mem_pending_reads_ += !req.write;
memsim_->MemRspPort.tx_callback([&](const MemRsp&, uint64_t cycle){ });
__unused (cycle); memsim_->MemRspPorts.at(i).tx_callback([&](const MemRsp&, uint64_t cycle){
--perf_mem_pending_reads_; __unused (cycle);
}); --perf_mem_pending_reads_;
});
}
#ifndef NDEBUG #ifndef NDEBUG
// dump device configuration // dump device configuration
@ -131,6 +135,7 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const {
perf.mem_writes = perf_mem_writes_; perf.mem_writes = perf_mem_writes_;
perf.mem_latency = perf_mem_latency_; perf.mem_latency = perf_mem_latency_;
perf.l3cache = l3cache_->perf_stats(); perf.l3cache = l3cache_->perf_stats();
perf.memsim = memsim_->perf_stats();
return perf; return perf;
} }

View file

@ -25,6 +25,7 @@ class ProcessorImpl {
public: public:
struct PerfStats { struct PerfStats {
CacheSim::PerfStats l3cache; CacheSim::PerfStats l3cache;
MemSim::PerfStats memsim;
uint64_t mem_reads; uint64_t mem_reads;
uint64_t mem_writes; uint64_t mem_writes;
uint64_t mem_latency; uint64_t mem_latency;

View file

@ -50,7 +50,7 @@ public:
static const char* type_str() { static const char* type_str() {
return "float"; return "float";
} }
static int generate() { static float generate() {
return static_cast<float>(rand()) / RAND_MAX; return static_cast<float>(rand()) / RAND_MAX;
} }
static bool compare(float a, float b, int index, int errors) { static bool compare(float a, float b, int index, int errors) {