changes made for initial feedback

This commit is contained in:
sij814 2024-08-13 16:52:27 -07:00
parent 47427ab22e
commit ea34239b43
10 changed files with 39 additions and 42 deletions

View file

@ -650,6 +650,15 @@
`define L3_WRITEBACK 0
`endif
`ifndef MEMORY_BANKS
`define MEMORY_BANKS 8
`endif
// Number of Memory Ports from LLC
`ifndef NUM_MEM_PORTS
`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS)
`endif
// ISA Extensions /////////////////////////////////////////////////////////////
`ifdef EXT_A_ENABLE

View file

@ -166,6 +166,10 @@
`define VX_CSR_MPM_MEM_WRITES_H 12'hB99
`define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency
`define VX_CSR_MPM_MEM_LT_H 12'hB9A
`define VX_CSR_MPM_MEM_BANK_CNTR 12'hB1E // memory bank requests
`define VX_CSR_MPM_MEM_BANK_CNTR_H 12'hB9E
`define VX_CSR_MPM_MEM_BANK_TICK 12'hB1F // memory ticks
`define VX_CSR_MPM_MEM_BANK_TICK_H 12'hB9F
// PERF: lmem
`define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads
`define VX_CSR_MPM_LMEM_READS_H 12'hB9B
@ -173,11 +177,6 @@
`define VX_CSR_MPM_LMEM_WRITES_H 12'hB9C
`define VX_CSR_MPM_LMEM_BANK_ST 12'hB1D // bank conflicts
`define VX_CSR_MPM_LMEM_BANK_ST_H 12'hB9D
// PERF: hbm
`define VX_CSR_HBM_BANK_CNTR 12'hB1E // hbm banks
`define VX_CSR_HBM_BANK_CNTR_H 12'hB9E
`define VX_CSR_HBM_BANK_TICK 12'hB1F // hbm ticks
`define VX_CSR_HBM_BANK_TICK_H 12'hB9F
// Machine Performance-monitoring memory counters (class 3) ///////////////////
// <Add your own counters: use addresses hB03..B1F, hB83..hB9F>

View file

@ -34,7 +34,7 @@ typedef void* vx_buffer_h;
#define VX_CAPS_GLOBAL_MEM_SIZE 0x5
#define VX_CAPS_LOCAL_MEM_SIZE 0x6
#define VX_CAPS_ISA_FLAGS 0x7
#define VX_CAPS_L3CACHE_NUM_BANKS 0x8
#define VX_CAPS_NUM_MEM_BANKS 0x8
// device isa flags
#define VX_ISA_STD_A (1ull << ISA_STD_A)

View file

@ -81,8 +81,8 @@ public:
case VX_CAPS_ISA_FLAGS:
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
break;
case VX_CAPS_L3CACHE_NUM_BANKS:
_value = L3_NUM_BANKS;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;

View file

@ -211,10 +211,8 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
uint64_t mem_reads = 0;
uint64_t mem_writes = 0;
uint64_t mem_lat = 0;
// PERF: hbm
uint64_t hbm_counter = 0;
uint64_t hbm_ticks = 0;
uint64_t mem_req_counter = 0;
uint64_t mem_ticks = 0;
uint64_t num_cores;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), {
@ -225,9 +223,9 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_ISA_FLAGS, &isa_flags), {
return err;
});
uint64_t l3cache_banks;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_L3CACHE_NUM_BANKS, &l3cache_banks), {
uint64_t num_mem_bank_ports;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_MEM_BANKS, &num_mem_bank_ports), {
return err;
});
@ -531,14 +529,6 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_MSHR_ST, core_id, &l3cache_mshr_stalls), {
return err;
});
// PERF: HBM
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_HBM_BANK_CNTR, core_id, &hbm_counter), {
return err;
});
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_HBM_BANK_TICK, core_id, &hbm_ticks), {
return err;
});
}
// PERF: memory
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_READS, core_id, &mem_reads), {
@ -550,6 +540,12 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), {
return err;
});
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_CNTR, core_id, &mem_req_counter), {
return err;
});
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_TICK, core_id, &mem_ticks), {
return err;
});
}
} break;
default:
@ -616,22 +612,20 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
int read_hit_ratio = calcRatio(l3cache_read_misses, l3cache_reads);
int write_hit_ratio = calcRatio(l3cache_write_misses, l3cache_writes);
int bank_utilization = calcAvgPercent(l3cache_reads + l3cache_writes, l3cache_reads + l3cache_writes + l3cache_bank_stalls);
int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls);
int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls);
fprintf(stream, "PERF: l3cache reads=%ld\n", l3cache_reads);
fprintf(stream, "PERF: l3cache writes=%ld\n", l3cache_writes);
fprintf(stream, "PERF: l3cache read misses=%ld (hit ratio=%d%%)\n", l3cache_read_misses, read_hit_ratio);
fprintf(stream, "PERF: l3cache write misses=%ld (hit ratio=%d%%)\n", l3cache_write_misses, write_hit_ratio);
fprintf(stream, "PERF: l3cache bank stalls=%ld (utilization=%d%%)\n", l3cache_bank_stalls, bank_utilization);
fprintf(stream, "PERF: l3cache mshr stalls=%ld (utilization=%d%%)\n", l3cache_mshr_stalls, mshr_utilization);
// HBM
float util = (float)hbm_counter / (hbm_ticks * l3cache_banks) * 100;
fprintf(stream, "PERF: hbm bank utilization=%f\n", util);
}
int mem_avg_lat = caclAverage(mem_lat, mem_reads);
int memory_bank_port_utilization = calcAvgPercent(mem_req_counter, (mem_ticks * num_mem_bank_ports));
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat);
fprintf(stream, "PERF: memory bank port utilization=%d%%\n", memory_bank_port_utilization);
} break;
default:
break;

View file

@ -722,8 +722,8 @@ CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config
: SimObject<CacheSim>(ctx, name)
, CoreReqPorts(config.num_inputs, this)
, CoreRspPorts(config.num_inputs, this)
, MemReqPorts((1 << config.B), this)
, MemRspPorts((1 << config.B), this)
, MemReqPorts(NUM_MEM_PORTS, this)
, MemRspPorts(NUM_MEM_PORTS, this)
, impl_(new Impl(this, config))
{}

View file

@ -21,10 +21,6 @@
#define MEM_CLOCK_RATIO 1
#endif
#ifndef MEMORY_BANKS
#define MEMORY_BANKS 8
#endif
#define LSU_WORD_SIZE (XLEN / 8)
#define LSU_CHANNELS NUM_LSU_LANES
#define LSU_NUM_REQS (NUM_LSU_BLOCKS * LSU_CHANNELS)

View file

@ -451,13 +451,12 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
CSR_READ_64(VX_CSR_MPM_MEM_READS, proc_perf.mem_reads);
CSR_READ_64(VX_CSR_MPM_MEM_WRITES, proc_perf.mem_writes);
CSR_READ_64(VX_CSR_MPM_MEM_LT, proc_perf.mem_latency);
CSR_READ_64(VX_CSR_MPM_MEM_BANK_CNTR, proc_perf.memsim.counter);
CSR_READ_64(VX_CSR_MPM_MEM_BANK_TICK, proc_perf.memsim.ticks);
CSR_READ_64(VX_CSR_MPM_LMEM_READS, lmem_perf.reads);
CSR_READ_64(VX_CSR_MPM_LMEM_WRITES, lmem_perf.writes);
CSR_READ_64(VX_CSR_MPM_LMEM_BANK_ST, lmem_perf.bank_stalls);
CSR_READ_64(VX_CSR_HBM_BANK_CNTR, proc_perf.memsim.counter);
CSR_READ_64(VX_CSR_HBM_BANK_TICK, proc_perf.memsim.ticks);
}
} break;
default: {

View file

@ -59,7 +59,7 @@ public:
dram_sim_.tick();
uint32_t counter = 0;
for (uint32_t i = 0; i < L3_NUM_BANKS; ++i) {
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
if (simobject_->MemReqPorts.at(i).empty())
continue;
@ -107,8 +107,8 @@ public:
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
: SimObject<MemSim>(ctx, name)
, MemReqPorts(L3_NUM_BANKS, this)
, MemRspPorts(L3_NUM_BANKS, this)
, MemReqPorts(NUM_MEM_PORTS, this)
, MemRspPorts(NUM_MEM_PORTS, this)
, impl_(new Impl(this, config))
{}

View file

@ -47,7 +47,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
);
// connect L3 memory ports
for (uint32_t i = 0; i < L3_NUM_BANKS; ++i) {
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
l3cache_->MemReqPorts.at(i).bind(&memsim_->MemReqPorts.at(i));
memsim_->MemRspPorts.at(i).bind(&l3cache_->MemRspPorts.at(i));
}
@ -61,7 +61,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
}
// set up memory profiling
for (uint32_t i = 0; i < L3_NUM_BANKS; ++i) {
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
memsim_->MemReqPorts.at(i).tx_callback([&](const MemReq& req, uint64_t cycle){
__unused (cycle);
perf_mem_reads_ += !req.write;