mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
adding new performance counters (banks utilization and DRAM bus utilization)
This commit is contained in:
parent
4b7d871d62
commit
d956e268b9
14 changed files with 426 additions and 439 deletions
|
@ -108,37 +108,39 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
|
||||
uint64_t instrs = 0;
|
||||
uint64_t cycles = 0;
|
||||
|
||||
#ifdef PERF_ENABLE
|
||||
// PERF: pipeline stalls
|
||||
uint64_t ibuffer_stalls = 0;
|
||||
uint64_t scoreboard_stalls = 0;
|
||||
uint64_t lsu_stalls = 0;
|
||||
uint64_t fpu_stalls = 0;
|
||||
uint64_t mul_stalls = 0;
|
||||
uint64_t csr_stalls = 0;
|
||||
uint64_t alu_stalls = 0;
|
||||
uint64_t gpu_stalls = 0;
|
||||
uint64_t ibuffer_stalls = 0;
|
||||
uint64_t scoreboard_stalls = 0;
|
||||
uint64_t icache_stalls = 0;
|
||||
// PERF: Icache
|
||||
uint64_t icache_reads = 0;
|
||||
uint64_t icache_read_misses = 0;
|
||||
uint64_t icache_pipe_stalls = 0;
|
||||
uint64_t icache_dram_stalls = 0;
|
||||
uint64_t icache_mshr_stalls = 0;
|
||||
uint64_t icache_rsp_stalls = 0;
|
||||
// PERF: Dcache
|
||||
uint64_t dcache_reads = 0;
|
||||
uint64_t dcache_writes = 0;
|
||||
uint64_t dcache_read_misses = 0;
|
||||
uint64_t dcache_write_misses = 0;
|
||||
uint64_t dcache_pipe_stalls = 0;
|
||||
uint64_t dcache_dram_stalls = 0;
|
||||
uint64_t dcache_bank_stalls = 0;
|
||||
uint64_t dcache_mshr_stalls = 0;
|
||||
uint64_t dcache_rsp_stalls = 0;
|
||||
uint64_t dcache_evictions = 0;
|
||||
uint64_t dcache_pipe_stalls = 0;
|
||||
uint64_t dcache_rsp_stalls = 0;
|
||||
// PERF: SMEM
|
||||
uint64_t smem_reads = 0;
|
||||
uint64_t smem_writes = 0;
|
||||
uint64_t smem_bank_stalls = 0;
|
||||
// PERF: memory
|
||||
uint64_t dram_req = 0;
|
||||
uint64_t dram_rsp = 0;
|
||||
uint64_t dram_reads = 0;
|
||||
uint64_t dram_writes = 0;
|
||||
uint64_t dram_stalls = 0;
|
||||
uint64_t dram_lat = 0;
|
||||
#endif
|
||||
|
||||
|
@ -154,11 +156,6 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
|
||||
#ifdef PERF_ENABLE
|
||||
// PERF: pipeline
|
||||
// icache_stall
|
||||
uint64_t icache_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_ST, CSR_MPM_ICACHE_ST_H, &icache_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache stalls=%ld\n", core_id, icache_stalls_per_core);
|
||||
icache_stalls += icache_stalls_per_core;
|
||||
// ibuffer_stall
|
||||
uint64_t ibuffer_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_IBUF_ST, CSR_MPM_IBUF_ST_H, &ibuffer_stalls_per_core);
|
||||
|
@ -209,7 +206,8 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
// read misses
|
||||
uint64_t icache_miss_r_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_MISS_R, CSR_MPM_ICACHE_MISS_R_H, &icache_miss_r_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache read misses=%ld\n", core_id, icache_miss_r_per_core);
|
||||
int icache_read_hit_ratio = (int)((1.0 - (double(icache_miss_r_per_core) / double(icache_reads_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache read misses=%ld (hit ratio=%d%%)\n", core_id, icache_miss_r_per_core, icache_read_hit_ratio);
|
||||
icache_read_misses += icache_miss_r_per_core;
|
||||
// pipeline stalls
|
||||
uint64_t icache_pipe_st_per_core;
|
||||
|
@ -221,16 +219,6 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_CRSP_ST, CSR_MPM_ICACHE_CRSP_ST_H, &icache_crsp_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reponse stalls=%ld\n", core_id, icache_crsp_st_per_core);
|
||||
icache_rsp_stalls += icache_crsp_st_per_core;
|
||||
// dram_stalls
|
||||
uint64_t icache_dram_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_DREQ_ST, CSR_MPM_ICACHE_DREQ_ST_H, &icache_dram_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache dram stalls=%ld\n", core_id, icache_dram_st_per_core);
|
||||
icache_dram_stalls += icache_dram_st_per_core;
|
||||
// mshr_stalls
|
||||
uint64_t icache_mshr_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_MSHR_ST, CSR_MPM_ICACHE_MSHR_ST_H, &icache_mshr_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache mshr stalls=%ld\n", core_id, icache_mshr_st_per_core);
|
||||
icache_mshr_stalls += icache_mshr_st_per_core;
|
||||
|
||||
// PERF: Dcache
|
||||
// total reads
|
||||
|
@ -246,50 +234,70 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
// read misses
|
||||
uint64_t dcache_miss_r_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MISS_R, CSR_MPM_DCACHE_MISS_R_H, &dcache_miss_r_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache read misses=%ld\n", core_id, dcache_miss_r_per_core);
|
||||
int dcache_read_hit_ratio = (int)((1.0 - (double(dcache_miss_r_per_core) / double(dcache_reads_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache read misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_r_per_core, dcache_read_hit_ratio);
|
||||
dcache_read_misses += dcache_miss_r_per_core;
|
||||
// read misses
|
||||
uint64_t dcache_miss_w_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MISS_W, CSR_MPM_DCACHE_MISS_W_H, &dcache_miss_w_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache wrire misses=%ld\n", core_id, dcache_miss_w_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MISS_W, CSR_MPM_DCACHE_MISS_W_H, &dcache_miss_w_per_core);
|
||||
int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_miss_w_per_core) / double(dcache_writes_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache wrire misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_w_per_core, dcache_write_hit_ratio);
|
||||
dcache_write_misses += dcache_miss_w_per_core;
|
||||
// total_evictions
|
||||
uint64_t dcache_evictions_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_EVICTS, CSR_MPM_DCACHE_EVICTS_H, &dcache_evictions_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache evictions_per_core=%ld\n", core_id, dcache_evictions_per_core);
|
||||
dcache_evictions += dcache_evictions_per_core;
|
||||
// pipeline stalls
|
||||
uint64_t dcache_pipe_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_PIPE_ST, CSR_MPM_DCACHE_PIPE_ST_H, &dcache_pipe_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache pipeline stalls=%ld\n", core_id, dcache_pipe_st_per_core);
|
||||
dcache_pipe_stalls += dcache_pipe_st_per_core;
|
||||
// response stalls
|
||||
uint64_t dcache_crsp_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_CRSP_ST, CSR_MPM_DCACHE_CRSP_ST_H, &dcache_crsp_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reponse stalls=%ld\n", core_id, dcache_crsp_st_per_core);
|
||||
dcache_rsp_stalls += dcache_crsp_st_per_core;
|
||||
// dram_stalls
|
||||
uint64_t dcache_dram_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_DREQ_ST, CSR_MPM_DCACHE_DREQ_ST_H, &dcache_dram_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache dram stalls=%ld\n", core_id, dcache_dram_st_per_core);
|
||||
dcache_dram_stalls += dcache_dram_st_per_core;
|
||||
// bank_stalls
|
||||
uint64_t dcache_bank_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_BANK_ST, CSR_MPM_DCACHE_BANK_ST_H, &dcache_bank_st_per_core);
|
||||
int dcache_bank_utilization = (int)((1.0 - (double(dcache_reads_per_core + dcache_writes_per_core) / double(dcache_reads_per_core + dcache_writes_per_core + dcache_bank_st_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache bank stalls=%ld (utilization=%d%%)\n", core_id, dcache_bank_st_per_core, dcache_bank_utilization);
|
||||
dcache_bank_stalls += dcache_bank_st_per_core;
|
||||
// mshr_stalls
|
||||
uint64_t dcache_mshr_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MSHR_ST, CSR_MPM_DCACHE_MSHR_ST_H, &dcache_mshr_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache mshr stalls=%ld\n", core_id, dcache_mshr_st_per_core);
|
||||
dcache_mshr_stalls += dcache_mshr_st_per_core;
|
||||
// pipeline stalls
|
||||
uint64_t dcache_pipe_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_PIPE_ST, CSR_MPM_DCACHE_PIPE_ST_H, &dcache_pipe_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache pipeline stalls=%ld\n", core_id, dcache_pipe_st_per_core);
|
||||
dcache_pipe_stalls += dcache_pipe_st_per_core;
|
||||
// response stalls
|
||||
uint64_t dcache_crsp_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_CRSP_ST, CSR_MPM_DCACHE_CRSP_ST_H, &dcache_crsp_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reponse stalls=%ld\n", core_id, dcache_crsp_st_per_core);
|
||||
dcache_rsp_stalls += dcache_crsp_st_per_core;
|
||||
|
||||
// PERF: dram_latency
|
||||
uint64_t dram_req_per_core, dram_rsp_per_core, dram_lat_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_REQ, CSR_MPM_DRAM_REQ_H, &dram_req_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_RSP, CSR_MPM_DRAM_RSP_H, &dram_rsp_per_core);
|
||||
// PERF: SMEM
|
||||
// total reads
|
||||
uint64_t smem_reads_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_SMEM_READS, CSR_MPM_SMEM_READS_H, &smem_reads_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem reads=%ld\n", core_id, smem_reads_per_core);
|
||||
smem_reads += smem_reads_per_core;
|
||||
// total write
|
||||
uint64_t smem_writes_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_SMEM_WRITES, CSR_MPM_SMEM_WRITES_H, &smem_writes_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem writes=%ld\n", core_id, smem_writes_per_core);
|
||||
smem_writes += smem_writes_per_core;
|
||||
// bank_stalls
|
||||
uint64_t smem_bank_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_SMEM_BANK_ST, CSR_MPM_SMEM_BANK_ST_H, &smem_bank_st_per_core);
|
||||
int smem_bank_utilization = (int)((1.0 - (double(smem_reads_per_core + smem_writes_per_core) / double(smem_reads_per_core + smem_writes_per_core + smem_bank_st_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem bank stalls=%ld (utilization=%d%%)\n", core_id, smem_bank_st_per_core, smem_bank_utilization);
|
||||
smem_bank_stalls += smem_bank_st_per_core;
|
||||
|
||||
// PERF: DRAM
|
||||
uint64_t dram_reads_per_core, dram_writes_per_core, dram_stalls_per_core, dram_lat_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_READS, CSR_MPM_DRAM_READS_H, &dram_reads_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_WRITES, CSR_MPM_DRAM_WRITES_H, &dram_writes_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_ST, CSR_MPM_DRAM_ST_H, &dram_stalls_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_LAT, CSR_MPM_DRAM_LAT_H, &dram_lat_per_core);
|
||||
int avg_dram_lat_per_core = (int)(double(dram_lat_per_core) / double(dram_rsp_per_core));
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dram requests=%ld (reads=%ld, writes=%ld)\n", core_id, dram_req_per_core, dram_rsp_per_core, dram_req_per_core - dram_rsp_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: average dram latency=%d cycles\n", core_id, avg_dram_lat_per_core);
|
||||
dram_req += dram_req_per_core;
|
||||
dram_rsp += dram_rsp_per_core;
|
||||
dram_lat += dram_lat_per_core;
|
||||
int avg_dram_lat = (int)(double(dram_lat_per_core) / double(dram_reads_per_core));
|
||||
int dram_utilization = (int)((1.0 - (double(dram_reads_per_core + dram_writes_per_core) / double(dram_reads_per_core + dram_writes_per_core + dram_stalls_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dram requests=%ld (reads=%ld, writes=%ld)\n", core_id, (dram_reads_per_core + dram_writes_per_core), dram_reads_per_core, dram_writes_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dram stalls=%d (utilization=%d%%)\n", core_id, dram_stalls_per_core, dram_utilization);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: average dram latency=%d cycles\n", core_id, avg_dram_lat);
|
||||
dram_reads += dram_reads_per_core;
|
||||
dram_writes += dram_writes_per_core;
|
||||
dram_stalls += dram_stalls_per_core;
|
||||
dram_lat += dram_lat_per_core;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -297,7 +305,13 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
fprintf(stream, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
|
||||
#ifdef PERF_ENABLE
|
||||
fprintf(stream, "PERF: icache stalls=%ld\n", icache_stalls);
|
||||
int icache_read_hit_ratio = (int)((1.0 - (double(icache_read_misses) / double(icache_reads))) * 100);
|
||||
int dcache_read_hit_ratio = (int)((1.0 - (double(dcache_read_misses) / double(dcache_reads))) * 100);
|
||||
int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_write_misses) / double(dcache_writes))) * 100);
|
||||
int dcache_bank_utilization = (int)((1.0 - (double(dcache_reads + dcache_writes) / double(dcache_reads + dcache_writes + dcache_bank_stalls))) * 100);
|
||||
int smem_bank_utilization = (int)((1.0 - (double(smem_reads + smem_writes) / double(smem_reads + smem_writes + smem_bank_stalls))) * 100);
|
||||
int dram_utilization = (int)((1.0 - (double(dram_reads + dram_writes) / double(dram_reads + dram_writes + dram_stalls))) * 100);
|
||||
int avg_dram_lat = (int)(double(dram_lat) / double(dram_reads));
|
||||
fprintf(stream, "PERF: ibuffer stalls=%ld\n", ibuffer_stalls);
|
||||
fprintf(stream, "PERF: scoreboard stalls=%ld\n", scoreboard_stalls);
|
||||
fprintf(stream, "PERF: alu unit stalls=%ld\n", alu_stalls);
|
||||
|
@ -307,22 +321,22 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
fprintf(stream, "PERF: fpu unit stalls=%ld\n", fpu_stalls);
|
||||
fprintf(stream, "PERF: gpu unit stalls=%ld\n", gpu_stalls);
|
||||
fprintf(stream, "PERF: icache reads=%ld\n", icache_reads);
|
||||
fprintf(stream, "PERF: icache read misses=%ld\n", icache_read_misses);
|
||||
fprintf(stream, "PERF: icache read misses=%ld (hit ratio=%d%%)\n", icache_read_misses, icache_read_hit_ratio);
|
||||
fprintf(stream, "PERF: icache pipeline stalls=%ld\n", icache_pipe_stalls);
|
||||
fprintf(stream, "PERF: icache reponse stalls=%ld\n", icache_rsp_stalls);
|
||||
fprintf(stream, "PERF: icache pipeline stalls=%ld\n", icache_pipe_stalls);
|
||||
fprintf(stream, "PERF: icache dram stalls=%ld\n", icache_dram_stalls);
|
||||
fprintf(stream, "PERF: icache mshr stalls=%ld\n", icache_mshr_stalls);
|
||||
fprintf(stream, "PERF: dcache reads=%ld\n", dcache_reads);
|
||||
fprintf(stream, "PERF: dcache writes=%ld\n", dcache_writes);
|
||||
fprintf(stream, "PERF: dcache read misses=%ld\n", dcache_read_misses);
|
||||
fprintf(stream, "PERF: dcache wrire misses=%ld\n", dcache_write_misses);
|
||||
fprintf(stream, "PERF: dcache evictions=%ld\n", dcache_evictions);
|
||||
fprintf(stream, "PERF: dcache read misses=%ld (hit ratio=%d%%)\n", dcache_read_misses, dcache_read_hit_ratio);
|
||||
fprintf(stream, "PERF: dcache write misses=%ld (hit ratio=%d%%)\n", dcache_write_misses, dcache_write_hit_ratio);
|
||||
fprintf(stream, "PERF: dcache bank stalls=%ld (utilization=%d%%)\n", dcache_bank_stalls, dcache_bank_utilization);
|
||||
fprintf(stream, "PERF: dcache mshr stalls=%ld\n", dcache_mshr_stalls);
|
||||
fprintf(stream, "PERF: dcache pipeline stalls=%ld\n", dcache_pipe_stalls);
|
||||
fprintf(stream, "PERF: dcache reponse stalls=%ld\n", dcache_rsp_stalls);
|
||||
fprintf(stream, "PERF: dcache dram stalls=%ld\n", dcache_dram_stalls);
|
||||
fprintf(stream, "PERF: dcache mshr stalls=%ld\n", dcache_mshr_stalls);
|
||||
fprintf(stream, "PERF: dram requests=%ld (reads=%ld, writes=%ld)\n", dram_req, dram_rsp, dram_req - dram_rsp);
|
||||
int avg_dram_lat = (int)(double(dram_lat) / double(dram_rsp));
|
||||
fprintf(stream, "PERF: smem reads=%ld\n", smem_reads);
|
||||
fprintf(stream, "PERF: smem writes=%ld\n", smem_writes);
|
||||
fprintf(stream, "PERF: smem bank stalls=%ld (utilization=%d%%)\n", smem_bank_stalls, smem_bank_utilization);
|
||||
fprintf(stream, "PERF: dram requests=%ld (reads=%ld, writes=%ld)\n", (dram_reads + dram_writes), dram_reads, dram_writes);
|
||||
fprintf(stream, "PERF: dram stalls=%ld (utilization=%d%%)\n", dram_stalls, dram_utilization);
|
||||
fprintf(stream, "PERF: average dram latency=%d cycles\n", avg_dram_lat);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -167,63 +167,64 @@
|
|||
|
||||
// Machine Performance-monitoring counters
|
||||
// PERF: pipeline
|
||||
`define CSR_MPM_ICACHE_ST 12'hB03
|
||||
`define CSR_MPM_ICACHE_ST_H 12'hB83
|
||||
`define CSR_MPM_IBUF_ST 12'hB04
|
||||
`define CSR_MPM_IBUF_ST_H 12'hB84
|
||||
`define CSR_MPM_SCRB_ST 12'hB05
|
||||
`define CSR_MPM_SCRB_ST_H 12'hB85
|
||||
`define CSR_MPM_ALU_ST 12'hB06
|
||||
`define CSR_MPM_ALU_ST_H 12'hB86
|
||||
`define CSR_MPM_LSU_ST 12'hB07
|
||||
`define CSR_MPM_LSU_ST_H 12'hB87
|
||||
`define CSR_MPM_CSR_ST 12'hB08
|
||||
`define CSR_MPM_CSR_ST_H 12'hB88
|
||||
`define CSR_MPM_MUL_ST 12'hB09
|
||||
`define CSR_MPM_MUL_ST_H 12'hB89
|
||||
`define CSR_MPM_FPU_ST 12'hB0A
|
||||
`define CSR_MPM_FPU_ST_H 12'hB8A
|
||||
`define CSR_MPM_GPU_ST 12'hB0B
|
||||
`define CSR_MPM_GPU_ST_H 12'hB8B
|
||||
`define CSR_MPM_IBUF_ST 12'hB03
|
||||
`define CSR_MPM_IBUF_ST_H 12'hB83
|
||||
`define CSR_MPM_SCRB_ST 12'hB04
|
||||
`define CSR_MPM_SCRB_ST_H 12'hB84
|
||||
`define CSR_MPM_ALU_ST 12'hB05
|
||||
`define CSR_MPM_ALU_ST_H 12'hB85
|
||||
`define CSR_MPM_LSU_ST 12'hB06
|
||||
`define CSR_MPM_LSU_ST_H 12'hB86
|
||||
`define CSR_MPM_CSR_ST 12'hB07
|
||||
`define CSR_MPM_CSR_ST_H 12'hB87
|
||||
`define CSR_MPM_MUL_ST 12'hB08
|
||||
`define CSR_MPM_MUL_ST_H 12'hB88
|
||||
`define CSR_MPM_FPU_ST 12'hB09
|
||||
`define CSR_MPM_FPU_ST_H 12'hB89
|
||||
`define CSR_MPM_GPU_ST 12'hB0A
|
||||
`define CSR_MPM_GPU_ST_H 12'hB8A
|
||||
// PERF: icache
|
||||
`define CSR_MPM_ICACHE_MISS_R 12'hB0C // read misses
|
||||
`define CSR_MPM_ICACHE_READS 12'hB0B // total reads
|
||||
`define CSR_MPM_ICACHE_READS_H 12'hB8B
|
||||
`define CSR_MPM_ICACHE_MISS_R 12'hB0C // total misses
|
||||
`define CSR_MPM_ICACHE_MISS_R_H 12'hB8C
|
||||
`define CSR_MPM_ICACHE_DREQ_ST 12'hB0D // dram request stalls
|
||||
`define CSR_MPM_ICACHE_DREQ_ST_H 12'hB8D
|
||||
`define CSR_MPM_ICACHE_PIPE_ST 12'hB0D // pipeline stalls
|
||||
`define CSR_MPM_ICACHE_PIPE_ST_H 12'hB8D
|
||||
`define CSR_MPM_ICACHE_CRSP_ST 12'hB0E // core response stalls
|
||||
`define CSR_MPM_ICACHE_CRSP_ST_H 12'hB8E
|
||||
`define CSR_MPM_ICACHE_MSHR_ST 12'hB0F // MSHR stalls
|
||||
`define CSR_MPM_ICACHE_MSHR_ST_H 12'hB8F
|
||||
`define CSR_MPM_ICACHE_PIPE_ST 12'hB10 // pipeline stalls
|
||||
`define CSR_MPM_ICACHE_PIPE_ST_H 12'hB90
|
||||
`define CSR_MPM_ICACHE_READS 12'hB11 // total reads
|
||||
`define CSR_MPM_ICACHE_READS_H 12'hB91
|
||||
// PERF: dcache
|
||||
`define CSR_MPM_DCACHE_MISS_R 12'hB12 // read misses
|
||||
`define CSR_MPM_DCACHE_MISS_R_H 12'hB92
|
||||
`define CSR_MPM_DCACHE_MISS_W 12'hB13 // write misses
|
||||
`define CSR_MPM_DCACHE_MISS_W_H 12'hB93
|
||||
`define CSR_MPM_DCACHE_DREQ_ST 12'hB14 // dram request stalls
|
||||
`define CSR_MPM_DCACHE_DREQ_ST_H 12'hB94
|
||||
`define CSR_MPM_DCACHE_CRSP_ST 12'hB15 // core response stalls
|
||||
`define CSR_MPM_DCACHE_CRSP_ST_H 12'hB95
|
||||
`define CSR_MPM_DCACHE_MSHR_ST 12'hB16 // MSHR stalls
|
||||
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB96
|
||||
`define CSR_MPM_DCACHE_PIPE_ST 12'hB17 // pipeline stalls
|
||||
`define CSR_MPM_DCACHE_PIPE_ST_H 12'hB97
|
||||
`define CSR_MPM_DCACHE_READS 12'hB18 // total reads
|
||||
`define CSR_MPM_DCACHE_READS_H 12'hB98
|
||||
`define CSR_MPM_DCACHE_WRITES 12'hB19 // total writes
|
||||
`define CSR_MPM_DCACHE_WRITES_H 12'hB99
|
||||
`define CSR_MPM_DCACHE_EVICTS 12'hB1A // total evictions
|
||||
`define CSR_MPM_DCACHE_EVICTS_H 12'hB9A
|
||||
`define CSR_MPM_DCACHE_READS 12'hB0F // total reads
|
||||
`define CSR_MPM_DCACHE_READS_H 12'hB8F
|
||||
`define CSR_MPM_DCACHE_WRITES 12'hB10 // total writes
|
||||
`define CSR_MPM_DCACHE_WRITES_H 12'hB90
|
||||
`define CSR_MPM_DCACHE_MISS_R 12'hB11 // read misses
|
||||
`define CSR_MPM_DCACHE_MISS_R_H 12'hB91
|
||||
`define CSR_MPM_DCACHE_MISS_W 12'hB12 // write misses
|
||||
`define CSR_MPM_DCACHE_MISS_W_H 12'hB92
|
||||
`define CSR_MPM_DCACHE_BANK_ST 12'hB13 // bank conflicts stalls
|
||||
`define CSR_MPM_DCACHE_BANK_ST_H 12'hB93
|
||||
`define CSR_MPM_DCACHE_MSHR_ST 12'hB14 // MSHR stalls
|
||||
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB94
|
||||
`define CSR_MPM_DCACHE_PIPE_ST 12'hB15 // pipeline stalls
|
||||
`define CSR_MPM_DCACHE_PIPE_ST_H 12'hB95
|
||||
`define CSR_MPM_DCACHE_CRSP_ST 12'hB16 // core response stalls
|
||||
`define CSR_MPM_DCACHE_CRSP_ST_H 12'hB96
|
||||
// PERF: smem
|
||||
`define CSR_MPM_SMEM_READS 12'hB17 // total reads
|
||||
`define CSR_MPM_SMEM_READS_H 12'hB97
|
||||
`define CSR_MPM_SMEM_WRITES 12'hB18 // total writes
|
||||
`define CSR_MPM_SMEM_WRITES_H 12'hB98
|
||||
`define CSR_MPM_SMEM_BANK_ST 12'hB19 // bank conflicts stalls
|
||||
`define CSR_MPM_SMEM_BANK_ST_H 12'hB99
|
||||
// PERF: memory
|
||||
`define CSR_MPM_DRAM_LAT 12'hB1B // dram latency (total)
|
||||
`define CSR_MPM_DRAM_LAT_H 12'hB9B
|
||||
`define CSR_MPM_DRAM_REQ 12'hB1C // dram requests
|
||||
`define CSR_MPM_DRAM_REQ_H 12'hB9C
|
||||
`define CSR_MPM_DRAM_RSP 12'hB1D // dram responses
|
||||
`define CSR_MPM_DRAM_RSP_H 12'hB9D
|
||||
`define CSR_MPM_DRAM_READS 12'hB1A // dram reads
|
||||
`define CSR_MPM_DRAM_READS_H 12'hB9A
|
||||
`define CSR_MPM_DRAM_WRITES 12'hB1B // dram writes
|
||||
`define CSR_MPM_DRAM_WRITES_H 12'hB9B
|
||||
`define CSR_MPM_DRAM_ST 12'hB1C // dram request stalls
|
||||
`define CSR_MPM_DRAM_ST_H 12'hB9C
|
||||
`define CSR_MPM_DRAM_LAT 12'hB1D // dram latency (total)
|
||||
`define CSR_MPM_DRAM_LAT_H 12'hB9D
|
||||
|
||||
// Machine Information Registers
|
||||
`define CSR_MVENDORID 12'hF11
|
||||
|
|
|
@ -121,63 +121,64 @@ module VX_csr_data #(
|
|||
|
||||
`ifdef PERF_ENABLE
|
||||
// PERF: pipeline
|
||||
`CSR_MPM_ICACHE_ST : read_data_r = perf_pipeline_if.icache_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_ST_H : read_data_r = perf_pipeline_if.icache_stalls[63:32];
|
||||
`CSR_MPM_IBUF_ST : read_data_r = perf_pipeline_if.ibuffer_stalls[31:0];
|
||||
`CSR_MPM_IBUF_ST_H : read_data_r = perf_pipeline_if.ibuffer_stalls[63:32];
|
||||
`CSR_MPM_SCRB_ST : read_data_r = perf_pipeline_if.scoreboard_stalls[31:0];
|
||||
`CSR_MPM_SCRB_ST_H : read_data_r = perf_pipeline_if.scoreboard_stalls[63:32];
|
||||
`CSR_MPM_ALU_ST : read_data_r = perf_pipeline_if.alu_stalls[31:0];
|
||||
`CSR_MPM_ALU_ST_H : read_data_r = perf_pipeline_if.alu_stalls[63:32];
|
||||
`CSR_MPM_LSU_ST : read_data_r = perf_pipeline_if.lsu_stalls[31:0];
|
||||
`CSR_MPM_LSU_ST_H : read_data_r = perf_pipeline_if.lsu_stalls[63:32];
|
||||
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
|
||||
`CSR_MPM_CSR_ST_H : read_data_r = perf_pipeline_if.csr_stalls[63:32];
|
||||
`CSR_MPM_MUL_ST : read_data_r = perf_pipeline_if.mul_stalls[31:0];
|
||||
`CSR_MPM_MUL_ST_H : read_data_r = perf_pipeline_if.mul_stalls[63:32];
|
||||
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = perf_pipeline_if.fpu_stalls[63:32];
|
||||
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
|
||||
`CSR_MPM_GPU_ST_H : read_data_r = perf_pipeline_if.gpu_stalls[63:32];
|
||||
`CSR_MPM_IBUF_ST : read_data_r = perf_pipeline_if.ibf_stalls[31:0];
|
||||
`CSR_MPM_IBUF_ST_H : read_data_r = perf_pipeline_if.ibf_stalls[63:32];
|
||||
`CSR_MPM_SCRB_ST : read_data_r = perf_pipeline_if.scb_stalls[31:0];
|
||||
`CSR_MPM_SCRB_ST_H : read_data_r = perf_pipeline_if.scb_stalls[63:32];
|
||||
`CSR_MPM_ALU_ST : read_data_r = perf_pipeline_if.alu_stalls[31:0];
|
||||
`CSR_MPM_ALU_ST_H : read_data_r = perf_pipeline_if.alu_stalls[63:32];
|
||||
`CSR_MPM_LSU_ST : read_data_r = perf_pipeline_if.lsu_stalls[31:0];
|
||||
`CSR_MPM_LSU_ST_H : read_data_r = perf_pipeline_if.lsu_stalls[63:32];
|
||||
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
|
||||
`CSR_MPM_CSR_ST_H : read_data_r = perf_pipeline_if.csr_stalls[63:32];
|
||||
`CSR_MPM_MUL_ST : read_data_r = perf_pipeline_if.mul_stalls[31:0];
|
||||
`CSR_MPM_MUL_ST_H : read_data_r = perf_pipeline_if.mul_stalls[63:32];
|
||||
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = perf_pipeline_if.fpu_stalls[63:32];
|
||||
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
|
||||
`CSR_MPM_GPU_ST_H : read_data_r = perf_pipeline_if.gpu_stalls[63:32];
|
||||
// PERF: icache
|
||||
`CSR_MPM_ICACHE_MISS_R : read_data_r = perf_memsys_if.icache_read_misses[31:0];
|
||||
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = perf_memsys_if.icache_read_misses[63:32];
|
||||
`CSR_MPM_ICACHE_DREQ_ST : read_data_r = perf_memsys_if.icache_dreq_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_DREQ_ST_H : read_data_r = perf_memsys_if.icache_dreq_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_CRSP_ST : read_data_r = perf_memsys_if.icache_crsp_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = perf_memsys_if.icache_crsp_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_MSHR_ST : read_data_r = perf_memsys_if.icache_mshr_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_MSHR_ST_H : read_data_r = perf_memsys_if.icache_mshr_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_PIPE_ST : read_data_r = perf_memsys_if.icache_pipe_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = perf_memsys_if.icache_pipe_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_READS : read_data_r = perf_memsys_if.icache_reads[31:0];
|
||||
`CSR_MPM_ICACHE_READS_H : read_data_r = perf_memsys_if.icache_reads[63:32];
|
||||
// PERF: dcache
|
||||
`CSR_MPM_DCACHE_MISS_R : read_data_r = perf_memsys_if.dcache_read_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = perf_memsys_if.dcache_read_misses[63:32];
|
||||
`CSR_MPM_DCACHE_MISS_W : read_data_r = perf_memsys_if.dcache_write_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = perf_memsys_if.dcache_write_misses[63:32];
|
||||
`CSR_MPM_DCACHE_DREQ_ST : read_data_r = perf_memsys_if.dcache_dreq_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_DREQ_ST_H : read_data_r = perf_memsys_if.dcache_dreq_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_CRSP_ST : read_data_r = perf_memsys_if.dcache_crsp_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = perf_memsys_if.dcache_crsp_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_MSHR_ST : read_data_r = perf_memsys_if.dcache_mshr_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = perf_memsys_if.dcache_mshr_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_PIPE_ST : read_data_r = perf_memsys_if.dcache_pipe_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = perf_memsys_if.dcache_pipe_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_MISS_R : read_data_r = perf_memsys_if.icache_read_misses[31:0];
|
||||
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = perf_memsys_if.icache_read_misses[63:32];
|
||||
`CSR_MPM_ICACHE_PIPE_ST : read_data_r = perf_memsys_if.icache_pipe_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = perf_memsys_if.icache_pipe_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_CRSP_ST : read_data_r = perf_memsys_if.icache_crsp_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = perf_memsys_if.icache_crsp_stalls[63:32];
|
||||
// PERF: dcache
|
||||
`CSR_MPM_DCACHE_READS : read_data_r = perf_memsys_if.dcache_reads[31:0];
|
||||
`CSR_MPM_DCACHE_READS_H : read_data_r = perf_memsys_if.dcache_reads[63:32];
|
||||
`CSR_MPM_DCACHE_WRITES : read_data_r = perf_memsys_if.dcache_writes[31:0];
|
||||
`CSR_MPM_DCACHE_WRITES_H : read_data_r = perf_memsys_if.dcache_writes[63:32];
|
||||
`CSR_MPM_DCACHE_EVICTS : read_data_r = perf_memsys_if.dcache_evictions[31:0];
|
||||
`CSR_MPM_DCACHE_EVICTS_H : read_data_r = perf_memsys_if.dcache_evictions[63:32];
|
||||
// PERF: memory
|
||||
`CSR_MPM_DRAM_LAT : read_data_r = perf_memsys_if.dram_latency[31:0];
|
||||
`CSR_MPM_DRAM_LAT_H : read_data_r = perf_memsys_if.dram_latency[63:32];
|
||||
`CSR_MPM_DRAM_REQ : read_data_r = perf_memsys_if.dram_requests[31:0];
|
||||
`CSR_MPM_DRAM_REQ_H : read_data_r = perf_memsys_if.dram_requests[63:32];
|
||||
`CSR_MPM_DRAM_RSP : read_data_r = perf_memsys_if.dram_responses[31:0];
|
||||
`CSR_MPM_DRAM_RSP_H : read_data_r = perf_memsys_if.dram_responses[63:32];
|
||||
`CSR_MPM_DCACHE_MISS_R : read_data_r = perf_memsys_if.dcache_read_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = perf_memsys_if.dcache_read_misses[63:32];
|
||||
`CSR_MPM_DCACHE_MISS_W : read_data_r = perf_memsys_if.dcache_write_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = perf_memsys_if.dcache_write_misses[63:32];
|
||||
`CSR_MPM_DCACHE_BANK_ST : read_data_r = perf_memsys_if.dcache_bank_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = perf_memsys_if.dcache_bank_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_MSHR_ST : read_data_r = perf_memsys_if.dcache_mshr_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = perf_memsys_if.dcache_mshr_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_PIPE_ST : read_data_r = perf_memsys_if.dcache_pipe_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = perf_memsys_if.dcache_pipe_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_CRSP_ST : read_data_r = perf_memsys_if.dcache_crsp_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = perf_memsys_if.dcache_crsp_stalls[63:32];
|
||||
// PERF: smem
|
||||
`CSR_MPM_SMEM_READS : read_data_r = perf_memsys_if.smem_reads[31:0];
|
||||
`CSR_MPM_SMEM_READS_H : read_data_r = perf_memsys_if.smem_reads[63:32];
|
||||
`CSR_MPM_SMEM_WRITES : read_data_r = perf_memsys_if.smem_writes[31:0];
|
||||
`CSR_MPM_SMEM_WRITES_H : read_data_r = perf_memsys_if.smem_writes[63:32];
|
||||
`CSR_MPM_SMEM_BANK_ST : read_data_r = perf_memsys_if.smem_bank_stalls[31:0];
|
||||
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = perf_memsys_if.smem_bank_stalls[63:32];
|
||||
// PERF: DRAM
|
||||
`CSR_MPM_DRAM_READS : read_data_r = perf_memsys_if.dram_reads[31:0];
|
||||
`CSR_MPM_DRAM_READS_H : read_data_r = perf_memsys_if.dram_reads[63:32];
|
||||
`CSR_MPM_DRAM_WRITES : read_data_r = perf_memsys_if.dram_writes[31:0];
|
||||
`CSR_MPM_DRAM_WRITES_H : read_data_r = perf_memsys_if.dram_writes[63:32];
|
||||
`CSR_MPM_DRAM_ST : read_data_r = perf_memsys_if.dram_stalls[31:0];
|
||||
`CSR_MPM_DRAM_ST_H : read_data_r = perf_memsys_if.dram_stalls[63:32];
|
||||
`CSR_MPM_DRAM_LAT : read_data_r = perf_memsys_if.dram_latency[31:0];
|
||||
`CSR_MPM_DRAM_LAT_H : read_data_r = perf_memsys_if.dram_latency[63:32];
|
||||
`endif
|
||||
|
||||
`CSR_SATP : read_data_r = 32'(csr_satp);
|
||||
|
|
|
@ -123,18 +123,77 @@ module VX_issue #(
|
|||
`SCOPE_ASSIGN (writeback_data, writeback_if.data);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [63:0] perf_scoreboard_stalls;
|
||||
reg [63:0] perf_ibf_stalls ;
|
||||
reg [63:0] perf_scb_stalls ;
|
||||
reg [63:0] perf_alu_stalls;
|
||||
reg [63:0] perf_lsu_stalls;
|
||||
reg [63:0] perf_csr_stalls;
|
||||
reg [63:0] perf_gpu_stalls;
|
||||
`ifdef EXT_M_ENABLE
|
||||
reg [63:0] perf_mul_stalls;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
reg [63:0] perf_fpu_stalls;
|
||||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_scoreboard_stalls <= 0;
|
||||
perf_ibf_stalls <= 0;
|
||||
perf_scb_stalls <= 0;
|
||||
perf_alu_stalls <= 0;
|
||||
perf_lsu_stalls <= 0;
|
||||
perf_csr_stalls <= 0;
|
||||
perf_gpu_stalls <= 0;
|
||||
`ifdef EXT_M_ENABLE
|
||||
perf_mul_stalls <= 0;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
perf_fpu_stalls <= 0;
|
||||
`endif
|
||||
end else begin
|
||||
// scoreboard_stall
|
||||
if (ibuf_deq_if.valid & scoreboard_delay) begin
|
||||
perf_scoreboard_stalls <= perf_scoreboard_stalls + 64'd1;
|
||||
if (decode_if.valid & !decode_if.ready) begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + 64'd1;
|
||||
end
|
||||
if (ibuf_deq_if.valid & scoreboard_delay) begin
|
||||
perf_scb_stalls <= perf_scb_stalls + 64'd1;
|
||||
end
|
||||
if (alu_req_if.valid & !alu_req_if.ready) begin
|
||||
perf_alu_stalls <= perf_alu_stalls + 64'd1;
|
||||
end
|
||||
if (lsu_req_if.valid & !lsu_req_if.ready) begin
|
||||
perf_lsu_stalls <= perf_lsu_stalls + 64'd1;
|
||||
end
|
||||
if (csr_req_if.valid & !csr_req_if.ready) begin
|
||||
perf_csr_stalls <= perf_csr_stalls + 64'd1;
|
||||
end
|
||||
if (gpu_req_if.valid & !gpu_req_if.ready) begin
|
||||
perf_gpu_stalls <= perf_gpu_stalls + 64'd1;
|
||||
end
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (mul_req_if.valid & !mul_req_if.ready) begin
|
||||
perf_mul_stalls <= perf_mul_stalls + 64'd1;
|
||||
end
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_req_if.valid & !fpu_req_if.ready) begin
|
||||
perf_fpu_stalls <= perf_fpu_stalls + 64'd1;
|
||||
end
|
||||
`endif
|
||||
end
|
||||
end
|
||||
assign perf_pipeline_if.scoreboard_stalls = perf_scoreboard_stalls;
|
||||
|
||||
assign perf_pipeline_if.ibf_stalls = perf_ibf_stalls;
|
||||
assign perf_pipeline_if.scb_stalls = perf_scb_stalls;
|
||||
assign perf_pipeline_if.alu_stalls = perf_alu_stalls;
|
||||
assign perf_pipeline_if.lsu_stalls = perf_lsu_stalls;
|
||||
assign perf_pipeline_if.csr_stalls = perf_csr_stalls;
|
||||
assign perf_pipeline_if.gpu_stalls = perf_gpu_stalls;
|
||||
`ifdef EXT_M_ENABLE
|
||||
assign perf_pipeline_if.mul_stalls = perf_mul_stalls;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign perf_pipeline_if.fpu_stalls = perf_fpu_stalls;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
|
|
|
@ -363,60 +363,72 @@ module VX_mem_unit # (
|
|||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
assign perf_memsys_if.icache_reads = perf_icache_if.reads;
|
||||
assign perf_memsys_if.icache_reads = perf_icache_if.reads;
|
||||
assign perf_memsys_if.icache_read_misses = perf_icache_if.read_misses;
|
||||
assign perf_memsys_if.icache_mshr_stalls = perf_icache_if.mshr_stalls;
|
||||
assign perf_memsys_if.icache_crsp_stalls = perf_icache_if.crsp_stalls;
|
||||
assign perf_memsys_if.icache_dreq_stalls = perf_icache_if.dreq_stalls;
|
||||
assign perf_memsys_if.icache_pipe_stalls = perf_icache_if.pipe_stalls;
|
||||
assign perf_memsys_if.icache_crsp_stalls = perf_icache_if.crsp_stalls;
|
||||
|
||||
assign perf_memsys_if.dcache_reads = perf_dcache_if.reads;
|
||||
assign perf_memsys_if.dcache_writes = perf_dcache_if.writes;
|
||||
assign perf_memsys_if.dcache_reads = perf_dcache_if.reads;
|
||||
assign perf_memsys_if.dcache_writes = perf_dcache_if.writes;
|
||||
assign perf_memsys_if.dcache_read_misses = perf_dcache_if.read_misses;
|
||||
assign perf_memsys_if.dcache_write_misses = perf_dcache_if.write_misses;
|
||||
assign perf_memsys_if.dcache_evictions = perf_dcache_if.evictions;
|
||||
assign perf_memsys_if.dcache_mshr_stalls = perf_dcache_if.mshr_stalls;
|
||||
assign perf_memsys_if.dcache_crsp_stalls = perf_dcache_if.crsp_stalls;
|
||||
assign perf_memsys_if.dcache_dreq_stalls = perf_dcache_if.dreq_stalls;
|
||||
assign perf_memsys_if.dcache_write_misses= perf_dcache_if.write_misses;
|
||||
assign perf_memsys_if.dcache_bank_stalls = perf_dcache_if.bank_stalls;
|
||||
assign perf_memsys_if.dcache_mshr_stalls = perf_dcache_if.mshr_stalls;
|
||||
assign perf_memsys_if.dcache_pipe_stalls = perf_dcache_if.pipe_stalls;
|
||||
assign perf_memsys_if.dcache_crsp_stalls = perf_dcache_if.crsp_stalls;
|
||||
|
||||
if (`SM_ENABLE) begin
|
||||
assign perf_memsys_if.smem_reads = perf_smem_if.reads;
|
||||
assign perf_memsys_if.smem_writes = perf_smem_if.writes;
|
||||
assign perf_memsys_if.smem_bank_stalls = perf_smem_if.bank_stalls;
|
||||
end else begin
|
||||
assign perf_memsys_if.smem_reads = 0;
|
||||
assign perf_memsys_if.smem_writes = 0;
|
||||
assign perf_memsys_if.smem_bank_stalls = 0;
|
||||
end
|
||||
|
||||
reg [63:0] perf_dram_lat_per_cycle;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_dram_lat_per_cycle <= 0;
|
||||
end else begin
|
||||
if (dram_req_if.valid & (~dram_req_if.rw) & dram_req_if.ready & dram_rsp_if.valid & dram_rsp_if.ready) begin
|
||||
end else begin
|
||||
if (dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready && dram_rsp_if.valid && dram_rsp_if.ready) begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle;
|
||||
end else if (dram_req_if.valid & (~dram_req_if.rw) & dram_req_if.ready) begin
|
||||
end else if (dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready) begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle + 64'd1;
|
||||
end else if (dram_rsp_if.valid & dram_rsp_if.ready) begin
|
||||
end else if (dram_rsp_if.valid && dram_rsp_if.ready) begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle - 64'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [63:0] perf_dram_req, perf_dram_rsp, perf_dram_lat;
|
||||
reg [63:0] perf_dram_reads, perf_dram_writes, perf_dram_lat, perf_dram_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_dram_req <= 0;
|
||||
perf_dram_rsp <= 0;
|
||||
perf_dram_lat <= 0;
|
||||
end else begin
|
||||
if (dram_req_if.valid & dram_req_if.ready) begin
|
||||
perf_dram_req <= perf_dram_req + 64'd1;
|
||||
end
|
||||
if (dram_rsp_if.valid & dram_rsp_if.ready) begin
|
||||
perf_dram_rsp <= perf_dram_rsp + 64'd1;
|
||||
perf_dram_reads <= 0;
|
||||
perf_dram_writes <= 0;
|
||||
perf_dram_lat <= 0;
|
||||
perf_dram_stalls <= 0;
|
||||
end else begin
|
||||
if (dram_req_if.valid && dram_req_if.ready && !dram_req_if.rw) begin
|
||||
perf_dram_reads <= perf_dram_reads + 64'd1;
|
||||
end
|
||||
if (dram_req_if.valid && dram_req_if.ready && dram_req_if.rw) begin
|
||||
perf_dram_writes <= perf_dram_writes + 64'd1;
|
||||
end
|
||||
if (dram_req_if.valid && !dram_req_if.ready) begin
|
||||
perf_dram_stalls <= perf_dram_stalls + 64'd1;
|
||||
end
|
||||
perf_dram_lat <= perf_dram_lat + perf_dram_lat_per_cycle;
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_memsys_if.dram_requests = perf_dram_req;
|
||||
assign perf_memsys_if.dram_responses = perf_dram_rsp;
|
||||
assign perf_memsys_if.dram_latency = perf_dram_lat;
|
||||
assign perf_memsys_if.dram_reads = perf_dram_reads;
|
||||
assign perf_memsys_if.dram_writes = perf_dram_writes;
|
||||
assign perf_memsys_if.dram_latency = perf_dram_lat;
|
||||
assign perf_memsys_if.dram_stalls = perf_dram_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -288,79 +288,5 @@ module VX_pipeline #(
|
|||
.writeback_if (writeback_if),
|
||||
.cmt_to_csr_if (cmt_to_csr_if)
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [63:0] perf_icache_stalls;
|
||||
reg [63:0] perf_ibuffer_stalls;
|
||||
reg [63:0] perf_alu_stalls;
|
||||
reg [63:0] perf_lsu_stalls;
|
||||
reg [63:0] perf_csr_stalls;
|
||||
reg [63:0] perf_gpu_stalls;
|
||||
`ifdef EXT_M_ENABLE
|
||||
reg [63:0] perf_mul_stalls;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
reg [63:0] perf_fpu_stalls;
|
||||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_icache_stalls <= 0;
|
||||
perf_ibuffer_stalls <= 0;
|
||||
perf_alu_stalls <= 0;
|
||||
perf_lsu_stalls <= 0;
|
||||
perf_csr_stalls <= 0;
|
||||
perf_gpu_stalls <= 0;
|
||||
`ifdef EXT_M_ENABLE
|
||||
perf_mul_stalls <= 0;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
perf_fpu_stalls <= 0;
|
||||
`endif
|
||||
end else begin
|
||||
if (core_icache_req_if.valid & !core_icache_req_if.ready) begin
|
||||
perf_icache_stalls <= perf_icache_stalls + 64'd1;
|
||||
end
|
||||
if (decode_if.valid & !decode_if.ready) begin
|
||||
perf_ibuffer_stalls <= perf_ibuffer_stalls + 64'd1;
|
||||
end
|
||||
if (alu_req_if.valid & !alu_req_if.ready) begin
|
||||
perf_alu_stalls <= perf_alu_stalls + 64'd1;
|
||||
end
|
||||
if (lsu_req_if.valid & !lsu_req_if.ready) begin
|
||||
perf_lsu_stalls <= perf_lsu_stalls + 64'd1;
|
||||
end
|
||||
if (csr_req_if.valid & !csr_req_if.ready) begin
|
||||
perf_csr_stalls <= perf_csr_stalls + 64'd1;
|
||||
end
|
||||
if (gpu_req_if.valid & !gpu_req_if.ready) begin
|
||||
perf_gpu_stalls <= perf_gpu_stalls + 64'd1;
|
||||
end
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (mul_req_if.valid & !mul_req_if.ready) begin
|
||||
perf_mul_stalls <= perf_mul_stalls + 64'd1;
|
||||
end
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_req_if.valid & !fpu_req_if.ready) begin
|
||||
perf_fpu_stalls <= perf_fpu_stalls + 64'd1;
|
||||
end
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_pipeline_if.icache_stalls = perf_icache_stalls;
|
||||
assign perf_pipeline_if.ibuffer_stalls = perf_ibuffer_stalls;
|
||||
assign perf_pipeline_if.alu_stalls = perf_alu_stalls;
|
||||
assign perf_pipeline_if.lsu_stalls = perf_lsu_stalls;
|
||||
assign perf_pipeline_if.csr_stalls = perf_csr_stalls;
|
||||
assign perf_pipeline_if.gpu_stalls = perf_gpu_stalls;
|
||||
`ifdef EXT_M_ENABLE
|
||||
assign perf_pipeline_if.mul_stalls = perf_mul_stalls;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign perf_pipeline_if.fpu_stalls = perf_fpu_stalls;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
24
hw/rtl/cache/VX_bank.v
vendored
24
hw/rtl/cache/VX_bank.v
vendored
|
@ -98,11 +98,10 @@ module VX_bank #(
|
|||
input wire snp_rsp_ready,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire perf_mshr_stall,
|
||||
output wire perf_pipe_stall,
|
||||
output wire perf_evict,
|
||||
output wire perf_read_miss,
|
||||
output wire perf_write_miss,
|
||||
output wire perf_read_misses,
|
||||
output wire perf_write_misses,
|
||||
output wire perf_mshr_stalls,
|
||||
output wire perf_pipe_stalls,
|
||||
`endif
|
||||
|
||||
// Misses
|
||||
|
@ -335,7 +334,7 @@ module VX_bank #(
|
|||
wire dreq_push_stall;
|
||||
wire srsq_push_stall;
|
||||
wire pipeline_stall;
|
||||
|
||||
|
||||
wire is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2);
|
||||
wire is_mshr_miss_st3 = valid_st3 && is_mshr_st3 && (miss_st3 || force_miss_st3);
|
||||
|
||||
|
@ -938,15 +937,10 @@ end
|
|||
`SCOPE_ASSIGN (addr_st3, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID));
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign perf_pipe_stall = pipeline_stall;
|
||||
assign perf_mshr_stall = mshr_going_full;
|
||||
assign perf_read_miss = !pipeline_stall & miss_st2 & !is_mshr_st2 & !mem_rw_st2;
|
||||
assign perf_write_miss = !pipeline_stall & miss_st2 & !is_mshr_st2 & mem_rw_st2;
|
||||
if (DRAM_ENABLE) begin
|
||||
assign perf_evict = dreq_push & do_writeback_st3 & !is_snp_st3;
|
||||
end else begin
|
||||
assign perf_evict = 0;
|
||||
end
|
||||
assign perf_read_misses = !pipeline_stall && miss_st2 && !is_mshr_st2 && !mem_rw_st2;
|
||||
assign perf_write_misses = !pipeline_stall && miss_st2 && !is_mshr_st2 && mem_rw_st2;
|
||||
assign perf_mshr_stalls = mshr_going_full;
|
||||
assign perf_pipe_stalls = pipeline_stall || mshr_going_full;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_BANK
|
||||
|
|
197
hw/rtl/cache/VX_cache.v
vendored
197
hw/rtl/cache/VX_cache.v
vendored
|
@ -134,15 +134,13 @@ module VX_cache #(
|
|||
wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_miss;
|
||||
assign miss_vec = per_bank_miss;
|
||||
|
||||
assign miss_vec = per_bank_miss;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_pipe_stall_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_evict_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_pipe_stall_per_bank;
|
||||
`endif
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
|
@ -156,13 +154,20 @@ module VX_cache #(
|
|||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS)
|
||||
) cache_core_req_bank_sel (
|
||||
) cache_core_req_bank_sel (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.bank_stalls (perf_cache_if.bank_stalls),
|
||||
`else
|
||||
`UNUSED_PIN (bank_stalls),
|
||||
`endif
|
||||
.core_req_valid (core_req_valid),
|
||||
.core_req_addr (core_req_addr),
|
||||
.core_req_ready (core_req_ready),
|
||||
.per_bank_valid (per_bank_core_req_valid),
|
||||
.per_bank_tid (per_bank_core_req_tid),
|
||||
.per_bank_ready (per_bank_core_req_ready)
|
||||
.per_bank_ready (per_bank_core_req_ready)
|
||||
);
|
||||
|
||||
assign dram_req_tag = dram_req_addr;
|
||||
|
@ -297,7 +302,8 @@ module VX_cache #(
|
|||
`SCOPE_BIND_VX_cache_bank(i)
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (reset),
|
||||
|
||||
// Core request
|
||||
.core_req_valid (curr_bank_core_req_valid),
|
||||
.core_req_tid (curr_bank_core_req_tid),
|
||||
|
@ -330,11 +336,10 @@ module VX_cache #(
|
|||
.dram_rsp_ready (curr_bank_dram_rsp_ready),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_mshr_stall (perf_mshr_stall_per_bank[i]),
|
||||
.perf_pipe_stall (perf_pipe_stall_per_bank[i]),
|
||||
.perf_evict (perf_evict_per_bank[i]),
|
||||
.perf_read_miss (perf_read_miss_per_bank[i]),
|
||||
.perf_write_miss (perf_write_miss_per_bank[i]),
|
||||
.perf_read_misses (perf_read_miss_per_bank[i]),
|
||||
.perf_write_misses (perf_write_miss_per_bank[i]),
|
||||
.perf_mshr_stalls (perf_mshr_stall_per_bank[i]),
|
||||
.perf_pipe_stalls (perf_pipe_stall_per_bank[i]),
|
||||
`endif
|
||||
|
||||
// Snoop request
|
||||
|
@ -434,47 +439,33 @@ module VX_cache #(
|
|||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// per cycle: core_req_r, core_req_w
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_req_r_per_cycle, perf_core_req_w_per_cycle;
|
||||
// per cycle: core_reads, core_writes
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle;
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_reads_count (
|
||||
.valids (core_req_valid & core_req_ready & ~core_req_rw),
|
||||
.count (perf_core_reads_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_writes_count (
|
||||
.valids (core_req_valid & core_req_ready & core_req_rw),
|
||||
.count (perf_core_writes_per_cycle)
|
||||
);
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
VX_countones #( // core_req_r
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_req_r_count (
|
||||
.valids (core_req_valid & {NUM_REQS{core_req_ready & ~core_req_rw}}),
|
||||
.count (perf_core_req_r_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #( // core_req_w
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_req_w_count (
|
||||
.valids (core_req_valid & {NUM_REQS{core_req_ready & core_req_rw}}),
|
||||
.count (perf_core_req_w_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #( // core_rsp
|
||||
VX_countones #(
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_rsp_count (
|
||||
.valids (core_rsp_valid & {NUM_REQS{!core_rsp_ready}}),
|
||||
.count (perf_crsp_stall_per_cycle)
|
||||
);
|
||||
end else begin
|
||||
VX_countones #( // core_req_r
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_req_r_count (
|
||||
.valids (core_req_valid & core_req_ready & ~core_req_rw),
|
||||
.count (perf_core_req_r_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #( // core_req_w
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_req_w_count (
|
||||
.valids (core_req_valid & core_req_ready & core_req_rw),
|
||||
.count (perf_core_req_w_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #( // core_rsp
|
||||
VX_countones #(
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_rsp_count (
|
||||
.valids (core_rsp_valid & ~core_rsp_ready),
|
||||
|
@ -482,33 +473,11 @@ module VX_cache #(
|
|||
);
|
||||
end
|
||||
|
||||
// per cycle: msrq stalls, pipeline stalls, evictions, read misses, write misses
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_mshr_stall_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_pipe_stall_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_evictions_per_cycle;
|
||||
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_read_miss_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_write_miss_per_cycle;
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_mshr_stall_count (
|
||||
.valids (perf_mshr_stall_per_bank),
|
||||
.count (perf_mshr_stall_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_total_stall_count (
|
||||
.valids (perf_pipe_stall_per_bank),
|
||||
.count (perf_pipe_stall_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_EVICTSict_count (
|
||||
.valids (perf_evict_per_bank),
|
||||
.count (perf_evictions_per_cycle)
|
||||
);
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_mshr_stall_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_pipe_stall_per_cycle;
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
|
@ -524,59 +493,55 @@ module VX_cache #(
|
|||
.count (perf_write_miss_per_cycle)
|
||||
);
|
||||
|
||||
reg [63:0] perf_core_req_r;
|
||||
reg [63:0] perf_core_req_w;
|
||||
reg [63:0] perf_mshr_stall;
|
||||
reg [63:0] perf_pipe_stall;
|
||||
reg [63:0] perf_evictions;
|
||||
reg [63:0] perf_read_miss;
|
||||
reg [63:0] perf_write_miss;
|
||||
reg [63:0] perf_crsp_stall;
|
||||
reg [63:0] perf_dreq_stall;
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_mshr_stall_count (
|
||||
.valids (perf_mshr_stall_per_bank),
|
||||
.count (perf_mshr_stall_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_total_stall_count (
|
||||
.valids (perf_pipe_stall_per_bank),
|
||||
.count (perf_pipe_stall_per_cycle)
|
||||
);
|
||||
|
||||
reg [63:0] perf_core_reads;
|
||||
reg [63:0] perf_core_writes;
|
||||
reg [63:0] perf_read_misses;
|
||||
reg [63:0] perf_write_misses;
|
||||
reg [63:0] perf_mshr_stalls;
|
||||
reg [63:0] perf_pipe_stalls;
|
||||
reg [63:0] perf_crsp_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_core_req_r <= 0;
|
||||
perf_core_req_w <= 0;
|
||||
perf_crsp_stall <= 0;
|
||||
perf_mshr_stall <= 0;
|
||||
perf_pipe_stall <= 0;
|
||||
perf_evictions <= 0;
|
||||
perf_read_miss <= 0;
|
||||
perf_write_miss <= 0;
|
||||
perf_dreq_stall <= 0;
|
||||
perf_core_reads <= 0;
|
||||
perf_core_writes <= 0;
|
||||
perf_read_misses <= 0;
|
||||
perf_write_misses <= 0;
|
||||
perf_mshr_stalls <= 0;
|
||||
perf_pipe_stalls <= 0;
|
||||
perf_crsp_stalls <= 0;
|
||||
end else begin
|
||||
// core requests
|
||||
perf_core_req_r <= perf_core_req_r + $bits(perf_core_req_r)'(perf_core_req_r_per_cycle);
|
||||
perf_core_req_w <= perf_core_req_w + $bits(perf_core_req_w)'(perf_core_req_w_per_cycle);
|
||||
// core response stalls
|
||||
perf_crsp_stall <= perf_crsp_stall + $bits(perf_crsp_stall)'(perf_crsp_stall_per_cycle);
|
||||
// miss reserve queue stalls
|
||||
perf_mshr_stall <= perf_mshr_stall + $bits(perf_mshr_stall)'(perf_mshr_stall_per_cycle);
|
||||
// pipeline stalls
|
||||
perf_pipe_stall <= perf_pipe_stall + $bits(perf_pipe_stall)'(perf_pipe_stall_per_cycle);
|
||||
// total evictions
|
||||
perf_evictions <= perf_evictions + $bits(perf_evictions)'(perf_evictions_per_cycle);
|
||||
// read misses
|
||||
perf_read_miss <= perf_read_miss + $bits(perf_read_miss)'(perf_read_miss_per_cycle);
|
||||
// write misses
|
||||
perf_write_miss <= perf_write_miss + $bits(perf_write_miss)'(perf_write_miss_per_cycle);
|
||||
// dram request stalls
|
||||
if (dram_req_valid & !dram_req_ready) begin
|
||||
perf_dreq_stall <= perf_dreq_stall + 64'd1;
|
||||
end
|
||||
perf_core_reads <= perf_core_reads + 64'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + 64'(perf_core_writes_per_cycle);
|
||||
perf_read_misses <= perf_read_misses + 64'(perf_read_miss_per_cycle);
|
||||
perf_write_misses <= perf_write_misses + 64'(perf_write_miss_per_cycle);
|
||||
perf_mshr_stalls <= perf_mshr_stalls + 64'(perf_mshr_stall_per_cycle);
|
||||
perf_pipe_stalls <= perf_pipe_stalls + 64'(perf_pipe_stall_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + 64'(perf_crsp_stall_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_cache_if.reads = perf_core_req_r;
|
||||
assign perf_cache_if.writes = perf_core_req_w;
|
||||
assign perf_cache_if.read_misses = perf_read_miss;
|
||||
assign perf_cache_if.write_misses = perf_write_miss;
|
||||
assign perf_cache_if.evictions = perf_evictions;
|
||||
assign perf_cache_if.mshr_stalls = perf_mshr_stall;
|
||||
assign perf_cache_if.pipe_stalls = perf_pipe_stall;
|
||||
assign perf_cache_if.crsp_stalls = perf_crsp_stall;
|
||||
assign perf_cache_if.dreq_stalls = perf_dreq_stall;
|
||||
assign perf_cache_if.reads = perf_core_reads;
|
||||
assign perf_cache_if.writes = perf_core_writes;
|
||||
assign perf_cache_if.read_misses = perf_read_misses;
|
||||
assign perf_cache_if.write_misses = perf_write_misses;
|
||||
assign perf_cache_if.mshr_stalls = perf_mshr_stalls;
|
||||
assign perf_cache_if.pipe_stalls = perf_pipe_stalls;
|
||||
assign perf_cache_if.crsp_stalls = perf_crsp_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
25
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
25
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
|
@ -10,17 +10,21 @@ module VX_cache_core_req_bank_sel #(
|
|||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] core_req_valid,
|
||||
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
output wire [NUM_REQS-1:0] core_req_ready,
|
||||
output wire [NUM_BANKS-1:0] per_bank_valid,
|
||||
output wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_tid,
|
||||
input wire [NUM_BANKS-1:0] per_bank_ready
|
||||
input wire [NUM_BANKS-1:0] per_bank_ready,
|
||||
output wire [63:0] bank_stalls
|
||||
);
|
||||
if (NUM_BANKS > 1) begin
|
||||
if (NUM_BANKS > 1) begin
|
||||
reg [NUM_BANKS-1:0] per_bank_valid_r;
|
||||
reg [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_tid_r;
|
||||
reg [NUM_REQS-1:0] core_req_ready_r;
|
||||
reg [NUM_BANKS-1:0] core_req_sel_r;
|
||||
wire [NUM_REQS-1:0][`BANK_BITS-1:0] core_req_bid;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
@ -40,28 +44,41 @@ module VX_cache_core_req_bank_sel #(
|
|||
|
||||
always @(*) begin
|
||||
core_req_ready_r = 0;
|
||||
core_req_sel_r = 0;
|
||||
for (integer j = 0; j < NUM_BANKS; ++j) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (core_req_valid[i] && (core_req_bid[i] == `BANK_BITS'(j))) begin
|
||||
core_req_ready_r[i] = per_bank_ready[j];
|
||||
core_req_sel_r[i] = 1;
|
||||
break;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [63:0] bank_stalls_r;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
bank_stalls_r <= 0;
|
||||
end else begin
|
||||
bank_stalls_r <= bank_stalls_r + 64'($countones(core_req_valid & ~core_req_sel_r));
|
||||
end
|
||||
end
|
||||
|
||||
assign per_bank_valid = per_bank_valid_r;
|
||||
assign per_bank_tid = per_bank_tid_r;
|
||||
assign core_req_ready = core_req_ready_r;
|
||||
assign bank_stalls = bank_stalls_r;
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (core_req_valid)
|
||||
`UNUSED_VAR (core_req_addr)
|
||||
assign per_bank_valid = core_req_valid;
|
||||
assign per_bank_tid = 0;
|
||||
assign core_req_ready[0] = per_bank_ready;
|
||||
|
||||
assign bank_stalls = 0;
|
||||
end
|
||||
|
||||
endmodule
|
1
hw/rtl/cache/VX_tag_store.v
vendored
1
hw/rtl/cache/VX_tag_store.v
vendored
|
@ -48,6 +48,7 @@ module VX_tag_store #(
|
|||
VX_dp_ram #(
|
||||
.DATAW(`TAG_SELECT_BITS),
|
||||
.SIZE(`BANK_LINE_COUNT),
|
||||
.FASTRAM(1),
|
||||
.RWCHECK(1)
|
||||
) tags (
|
||||
.clk(clk),
|
||||
|
|
|
@ -6,14 +6,13 @@
|
|||
interface VX_perf_cache_if ();
|
||||
|
||||
wire [63:0] reads;
|
||||
wire [63:0] writes;
|
||||
wire [63:0] writes;
|
||||
wire [63:0] read_misses;
|
||||
wire [63:0] write_misses;
|
||||
wire [63:0] evictions;
|
||||
wire [63:0] bank_stalls;
|
||||
wire [63:0] mshr_stalls;
|
||||
wire [63:0] pipe_stalls;
|
||||
wire [63:0] crsp_stalls;
|
||||
wire [63:0] dreq_stalls;
|
||||
wire [63:0] pipe_stalls;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -7,24 +7,26 @@ interface VX_perf_memsys_if ();
|
|||
|
||||
wire [63:0] icache_reads;
|
||||
wire [63:0] icache_read_misses;
|
||||
wire [63:0] icache_mshr_stalls;
|
||||
wire [63:0] icache_crsp_stalls;
|
||||
wire [63:0] icache_dreq_stalls;
|
||||
wire [63:0] icache_pipe_stalls;
|
||||
wire [63:0] icache_crsp_stalls;
|
||||
|
||||
wire [63:0] dcache_reads;
|
||||
wire [63:0] dcache_writes;
|
||||
wire [63:0] dcache_writes;
|
||||
wire [63:0] dcache_read_misses;
|
||||
wire [63:0] dcache_write_misses;
|
||||
wire [63:0] dcache_evictions;
|
||||
wire [63:0] dcache_bank_stalls;
|
||||
wire [63:0] dcache_mshr_stalls;
|
||||
wire [63:0] dcache_crsp_stalls;
|
||||
wire [63:0] dcache_dreq_stalls;
|
||||
wire [63:0] dcache_pipe_stalls;
|
||||
wire [63:0] dcache_crsp_stalls;
|
||||
|
||||
wire [63:0] smem_reads;
|
||||
wire [63:0] smem_writes;
|
||||
wire [63:0] smem_bank_stalls;
|
||||
|
||||
wire [63:0] dram_reads;
|
||||
wire [63:0] dram_writes;
|
||||
wire [63:0] dram_stalls;
|
||||
wire [63:0] dram_latency;
|
||||
wire [63:0] dram_requests;
|
||||
wire [63:0] dram_responses;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -4,12 +4,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_pipeline_if ();
|
||||
// from pipeline
|
||||
wire [63:0] icache_stalls;
|
||||
wire [63:0] ibuffer_stalls;
|
||||
// from issue
|
||||
wire [63:0] scoreboard_stalls;
|
||||
// from execute
|
||||
wire [63:0] ibf_stalls;
|
||||
wire [63:0] scb_stalls;
|
||||
wire [63:0] lsu_stalls;
|
||||
wire [63:0] csr_stalls;
|
||||
wire [63:0] alu_stalls;
|
||||
|
|
|
@ -71,8 +71,8 @@ void Simulator::reset() {
|
|||
|
||||
vortex_->dram_rsp_valid = 0;
|
||||
vortex_->dram_req_ready = 0;
|
||||
vortex_->io_req_ready = 0;
|
||||
vortex_->io_rsp_valid = 0;
|
||||
//vortex_->io_req_ready = 0;
|
||||
//vortex_->io_rsp_valid = 0;
|
||||
vortex_->snp_req_valid = 0;
|
||||
vortex_->snp_rsp_ready = 0;
|
||||
vortex_->csr_io_req_valid = 0;
|
||||
|
@ -201,7 +201,7 @@ void Simulator::eval_dram_bus() {
|
|||
}
|
||||
|
||||
void Simulator::eval_io_bus() {
|
||||
for (int i = 0; i < NUM_THREADS; ++i) {
|
||||
/*for (int i = 0; i < NUM_THREADS; ++i) {
|
||||
if (((vortex_->io_req_valid >> i) & 0x1)
|
||||
&& ((VL_WDATA_GETW(vortex_->io_req_addr, i, NUM_THREADS, 30) << 2) == IO_BUS_ADDR_COUT)) {
|
||||
assert(vortex_->io_req_rw);
|
||||
|
@ -217,7 +217,7 @@ void Simulator::eval_io_bus() {
|
|||
}
|
||||
}
|
||||
vortex_->io_req_ready = 1;
|
||||
vortex_->io_rsp_valid = 0;
|
||||
vortex_->io_rsp_valid = 0;*/
|
||||
}
|
||||
|
||||
void Simulator::eval_snp_bus() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue