mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
merging perf counters
This commit is contained in:
commit
d5438fd591
27 changed files with 1047 additions and 230 deletions
5
Makefile
5
Makefile
|
@ -6,6 +6,11 @@ all:
|
|||
$(MAKE) -C simX
|
||||
$(MAKE) -C benchmarks/opencl
|
||||
|
||||
perf-demo:
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C driver rtlsim
|
||||
$(MAKE) -C driver/tests/demo/ run-rtlsim
|
||||
|
||||
clean:
|
||||
$(MAKE) -C hw clean
|
||||
$(MAKE) -C driver clean
|
||||
|
|
|
@ -6,7 +6,7 @@ set -e
|
|||
show_usage()
|
||||
{
|
||||
echo "Vortex BlackBox Test Driver v1.0"
|
||||
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=<args>] [--help]]"
|
||||
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--perf] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=<args>] [--help]]"
|
||||
}
|
||||
|
||||
DRIVER=vlsim
|
||||
|
@ -64,6 +64,10 @@ case $i in
|
|||
SCOPE=1
|
||||
shift
|
||||
;;
|
||||
--perf)
|
||||
PERF=-DPERF_ENABLE
|
||||
shift
|
||||
;;
|
||||
--args=*)
|
||||
ARGS=${i#*=}
|
||||
HAS_ARGS=1
|
||||
|
@ -125,7 +129,7 @@ case $APP in
|
|||
;;
|
||||
esac
|
||||
|
||||
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3"
|
||||
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3 $PERF"
|
||||
|
||||
echo "CONFIGS=$CONFIGS"
|
||||
|
||||
|
|
|
@ -91,24 +91,240 @@ extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
|
|||
return err;
|
||||
}
|
||||
|
||||
extern int vx_get_perf(vx_device_h device, int core_id, size_t* instrs, size_t* cycles) {
|
||||
int vx_csr_get_l(vx_device_h device, int core_id, int addr, int addr_h, uint64_t* value) {
|
||||
int ret = 0;
|
||||
unsigned value_lo, value_hi;
|
||||
ret |= vx_csr_get(device, core_id, addr, &value_lo);
|
||||
ret |= vx_csr_get(device, core_id, addr_h, &value_hi);
|
||||
*value = (uint64_t(value_hi) << 32) | value_lo;
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
int ret = 0;
|
||||
|
||||
unsigned value;
|
||||
|
||||
if (instrs) {
|
||||
ret |= vx_csr_get(device, core_id, CSR_INSTRET_H, &value);
|
||||
*instrs = value;
|
||||
ret |= vx_csr_get(device, core_id, CSR_INSTRET, &value);
|
||||
*instrs = (*instrs << 32) | value;
|
||||
}
|
||||
unsigned num_cores;
|
||||
vx_csr_get(device, 0, CSR_NC, &num_cores);
|
||||
|
||||
if (cycles) {
|
||||
ret |= vx_csr_get(device, core_id, CSR_CYCLE_H, &value);
|
||||
*cycles = value;
|
||||
ret |= vx_csr_get(device, core_id, CSR_CYCLE, &value);
|
||||
*cycles = (*cycles << 32) | value;
|
||||
}
|
||||
uint64_t instrs = 0;
|
||||
uint64_t cycles = 0;
|
||||
#ifdef PERF_ENABLE
|
||||
// PERF: pipeline stalls
|
||||
uint64_t lsu_stalls = 0;
|
||||
uint64_t fpu_stalls = 0;
|
||||
uint64_t mul_stalls = 0;
|
||||
uint64_t csr_stalls = 0;
|
||||
uint64_t alu_stalls = 0;
|
||||
uint64_t gpu_stalls = 0;
|
||||
uint64_t ibuffer_stalls = 0;
|
||||
uint64_t scoreboard_stalls = 0;
|
||||
uint64_t icache_stalls = 0;
|
||||
// PERF: Icache
|
||||
uint64_t icache_reads = 0;
|
||||
uint64_t icache_read_misses = 0;
|
||||
uint64_t icache_pipe_stalls = 0;
|
||||
uint64_t icache_dram_stalls = 0;
|
||||
uint64_t icache_mshr_stalls = 0;
|
||||
uint64_t icache_rsp_stalls = 0;
|
||||
// PERF: Dcache
|
||||
uint64_t dcache_reads = 0;
|
||||
uint64_t dcache_writes = 0;
|
||||
uint64_t dcache_read_misses = 0;
|
||||
uint64_t dcache_write_misses = 0;
|
||||
uint64_t dcache_pipe_stalls = 0;
|
||||
uint64_t dcache_dram_stalls = 0;
|
||||
uint64_t dcache_mshr_stalls = 0;
|
||||
uint64_t dcache_rsp_stalls = 0;
|
||||
uint64_t dcache_evictions = 0;
|
||||
// PERF: memory
|
||||
uint64_t dram_req = 0;
|
||||
uint64_t dram_rsp = 0;
|
||||
uint64_t dram_lat = 0;
|
||||
#endif
|
||||
|
||||
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
|
||||
|
||||
uint64_t instrs_per_core, cycles_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MINSTRET, CSR_MINSTRET_H, &instrs_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MCYCLE, CSR_MCYCLE_H, &cycles_per_core);
|
||||
float IPC = (float)(double(instrs_per_core) / double(cycles_per_core));
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs_per_core, cycles_per_core, IPC);
|
||||
instrs += instrs_per_core;
|
||||
cycles = std::max<uint64_t>(cycles_per_core, cycles);
|
||||
|
||||
#ifdef PERF_ENABLE
|
||||
// PERF: pipeline
|
||||
// icache_stall
|
||||
uint64_t icache_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_ST, CSR_MPM_ICACHE_ST_H, &icache_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache stalls=%ld\n", core_id, icache_stalls_per_core);
|
||||
icache_stalls += icache_stalls_per_core;
|
||||
// ibuffer_stall
|
||||
uint64_t ibuffer_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_IBUF_ST, CSR_MPM_IBUF_ST_H, &ibuffer_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: ibuffer stalls=%ld\n", core_id, ibuffer_stalls_per_core);
|
||||
ibuffer_stalls += ibuffer_stalls_per_core;
|
||||
// scoreboard_stall
|
||||
uint64_t scoreboard_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_SCRB_ST, CSR_MPM_SCRB_ST_H, &scoreboard_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: scoreboard stalls=%ld\n", core_id, scoreboard_stalls_per_core);
|
||||
scoreboard_stalls += scoreboard_stalls_per_core;
|
||||
// alu_stall
|
||||
uint64_t alu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ALU_ST, CSR_MPM_ALU_ST_H, &alu_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: alu stalls=%ld\n", core_id, alu_stalls_per_core);
|
||||
alu_stalls += alu_stalls_per_core;
|
||||
// lsu_stall
|
||||
uint64_t lsu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_LSU_ST, CSR_MPM_LSU_ST_H, &lsu_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: lsu stalls=%ld\n", core_id, lsu_stalls_per_core);
|
||||
lsu_stalls += lsu_stalls_per_core;
|
||||
// csr_stall
|
||||
uint64_t csr_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_CSR_ST, CSR_MPM_CSR_ST_H, &csr_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: csr stalls=%ld\n", core_id, csr_stalls_per_core);
|
||||
csr_stalls += csr_stalls_per_core;
|
||||
// mul_stall
|
||||
uint64_t mul_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MUL_ST, CSR_MPM_MUL_ST_H, &mul_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: mul stalls=%ld\n", core_id, mul_stalls_per_core);
|
||||
mul_stalls += mul_stalls_per_core;
|
||||
// fpu_stall
|
||||
uint64_t fpu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_FPU_ST, CSR_MPM_FPU_ST_H, &fpu_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: fpu stalls=%ld\n", core_id, fpu_stalls_per_core);
|
||||
fpu_stalls += fpu_stalls_per_core;
|
||||
// gpu_stall
|
||||
uint64_t gpu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_GPU_ST, CSR_MPM_GPU_ST_H, &gpu_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: gpu stalls=%ld\n", core_id, gpu_stalls_per_core);
|
||||
gpu_stalls += gpu_stalls_per_core;
|
||||
|
||||
// PERF: Icache
|
||||
// total reads
|
||||
uint64_t icache_reads_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_READS, CSR_MPM_ICACHE_READS_H, &icache_reads_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reads=%ld\n", core_id, icache_reads_per_core);
|
||||
icache_reads += icache_reads_per_core;
|
||||
// read misses
|
||||
uint64_t icache_miss_r_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_MISS_R, CSR_MPM_ICACHE_MISS_R_H, &icache_miss_r_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache read misses=%ld\n", core_id, icache_miss_r_per_core);
|
||||
icache_read_misses += icache_miss_r_per_core;
|
||||
// pipeline stalls
|
||||
uint64_t icache_pipe_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_PIPE_ST, CSR_MPM_ICACHE_PIPE_ST_H, &icache_pipe_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache pipeline stalls=%ld\n", core_id, icache_pipe_st_per_core);
|
||||
icache_pipe_stalls += icache_pipe_st_per_core;
|
||||
// response stalls
|
||||
uint64_t icache_crsp_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_CRSP_ST, CSR_MPM_ICACHE_CRSP_ST_H, &icache_crsp_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reponse stalls=%ld\n", core_id, icache_crsp_st_per_core);
|
||||
icache_rsp_stalls += icache_crsp_st_per_core;
|
||||
// dram_stalls
|
||||
uint64_t icache_dram_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_DREQ_ST, CSR_MPM_ICACHE_DREQ_ST_H, &icache_dram_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache dram stalls=%ld\n", core_id, icache_dram_st_per_core);
|
||||
icache_dram_stalls += icache_dram_st_per_core;
|
||||
// mshr_stalls
|
||||
uint64_t icache_mshr_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_MSHR_ST, CSR_MPM_ICACHE_MSHR_ST_H, &icache_mshr_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache mshr stalls=%ld\n", core_id, icache_mshr_st_per_core);
|
||||
icache_mshr_stalls += icache_mshr_st_per_core;
|
||||
|
||||
// PERF: Dcache
|
||||
// total reads
|
||||
uint64_t dcache_reads_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_READS, CSR_MPM_DCACHE_READS_H, &dcache_reads_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reads=%ld\n", core_id, dcache_reads_per_core);
|
||||
dcache_reads += dcache_reads_per_core;
|
||||
// total write
|
||||
uint64_t dcache_writes_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_WRITES, CSR_MPM_DCACHE_WRITES_H, &dcache_writes_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache writes=%ld\n", core_id, dcache_writes_per_core);
|
||||
dcache_writes += dcache_writes_per_core;
|
||||
// read misses
|
||||
uint64_t dcache_miss_r_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MISS_R, CSR_MPM_DCACHE_MISS_R_H, &dcache_miss_r_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache read misses=%ld\n", core_id, dcache_miss_r_per_core);
|
||||
dcache_read_misses += dcache_miss_r_per_core;
|
||||
// read misses
|
||||
uint64_t dcache_miss_w_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MISS_W, CSR_MPM_DCACHE_MISS_W_H, &dcache_miss_w_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache wrire misses=%ld\n", core_id, dcache_miss_w_per_core);
|
||||
dcache_write_misses += dcache_miss_w_per_core;
|
||||
// total_evictions
|
||||
uint64_t dcache_evictions_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_EVICTS, CSR_MPM_DCACHE_EVICTS_H, &dcache_evictions_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache evictions_per_core=%ld\n", core_id, dcache_evictions_per_core);
|
||||
dcache_evictions += dcache_evictions_per_core;
|
||||
// pipeline stalls
|
||||
uint64_t dcache_pipe_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_PIPE_ST, CSR_MPM_DCACHE_PIPE_ST_H, &dcache_pipe_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache pipeline stalls=%ld\n", core_id, dcache_pipe_st_per_core);
|
||||
dcache_pipe_stalls += dcache_pipe_st_per_core;
|
||||
// response stalls
|
||||
uint64_t dcache_crsp_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_CRSP_ST, CSR_MPM_DCACHE_CRSP_ST_H, &dcache_crsp_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reponse stalls=%ld\n", core_id, dcache_crsp_st_per_core);
|
||||
dcache_rsp_stalls += dcache_crsp_st_per_core;
|
||||
// dram_stalls
|
||||
uint64_t dcache_dram_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_DREQ_ST, CSR_MPM_DCACHE_DREQ_ST_H, &dcache_dram_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache dram stalls=%ld\n", core_id, dcache_dram_st_per_core);
|
||||
dcache_dram_stalls += dcache_dram_st_per_core;
|
||||
// mshr_stalls
|
||||
uint64_t dcache_mshr_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MSHR_ST, CSR_MPM_DCACHE_MSHR_ST_H, &dcache_mshr_st_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache mshr stalls=%ld\n", core_id, dcache_mshr_st_per_core);
|
||||
dcache_mshr_stalls += dcache_mshr_st_per_core;
|
||||
|
||||
// PERF: dram_latency
|
||||
uint64_t dram_req_per_core, dram_rsp_per_core, dram_lat_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_REQ, CSR_MPM_DRAM_REQ_H, &dram_req_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_RSP, CSR_MPM_DRAM_RSP_H, &dram_rsp_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_LAT, CSR_MPM_DRAM_LAT_H, &dram_lat_per_core);
|
||||
int avg_dram_lat_per_core = (int)(double(dram_lat_per_core) / double(dram_rsp_per_core));
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dram requests=%ld (reads=%ld, writes=%ld)\n", core_id, dram_req_per_core, dram_rsp_per_core, dram_req_per_core - dram_rsp_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: average dram latency=%d cycles\n", core_id, avg_dram_lat_per_core);
|
||||
dram_req += dram_req_per_core;
|
||||
dram_rsp += dram_rsp_per_core;
|
||||
dram_lat += dram_lat_per_core;
|
||||
#endif
|
||||
}
|
||||
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stream, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
|
||||
#ifdef PERF_ENABLE
|
||||
fprintf(stream, "PERF: icache stalls=%ld\n", icache_stalls);
|
||||
fprintf(stream, "PERF: ibuffer stalls=%ld\n", ibuffer_stalls);
|
||||
fprintf(stream, "PERF: scoreboard stalls=%ld\n", scoreboard_stalls);
|
||||
fprintf(stream, "PERF: alu stalls=%ld\n", alu_stalls);
|
||||
fprintf(stream, "PERF: lsu stalls=%ld\n", lsu_stalls);
|
||||
fprintf(stream, "PERF: csr stalls=%ld\n", csr_stalls);
|
||||
fprintf(stream, "PERF: mul stalls=%ld\n", mul_stalls);
|
||||
fprintf(stream, "PERF: fpu stalls=%ld\n", fpu_stalls);
|
||||
fprintf(stream, "PERF: gpu stalls=%ld\n", gpu_stalls);
|
||||
fprintf(stream, "PERF: icache reads=%ld\n", icache_reads);
|
||||
fprintf(stream, "PERF: icache read misses=%ld\n", icache_read_misses);
|
||||
fprintf(stream, "PERF: icache reponse stalls=%ld\n", icache_rsp_stalls);
|
||||
fprintf(stream, "PERF: icache pipeline stalls=%ld\n", icache_pipe_stalls);
|
||||
fprintf(stream, "PERF: icache dram stalls=%ld\n", icache_dram_stalls);
|
||||
fprintf(stream, "PERF: icache mshr stalls=%ld\n", icache_mshr_stalls);
|
||||
fprintf(stream, "PERF: dcache reads=%ld\n", dcache_reads);
|
||||
fprintf(stream, "PERF: dcache writes=%ld\n", dcache_writes);
|
||||
fprintf(stream, "PERF: dcache read misses=%ld\n", dcache_read_misses);
|
||||
fprintf(stream, "PERF: dcache wrire misses=%ld\n", dcache_write_misses);
|
||||
fprintf(stream, "PERF: dcache evictions=%ld\n", dcache_evictions);
|
||||
fprintf(stream, "PERF: dcache pipeline stalls=%ld\n", dcache_pipe_stalls);
|
||||
fprintf(stream, "PERF: dcache reponse stalls=%ld\n", dcache_rsp_stalls);
|
||||
fprintf(stream, "PERF: dcache dram stalls=%ld\n", dcache_dram_stalls);
|
||||
fprintf(stream, "PERF: dcache mshr stalls=%ld\n", dcache_mshr_stalls);
|
||||
fprintf(stream, "PERF: dram requests=%ld (reads=%ld, writes=%ld)\n", dram_req, dram_rsp, dram_req - dram_rsp);
|
||||
int avg_dram_lat = (int)(double(dram_lat) / double(dram_rsp));
|
||||
fprintf(stream, "PERF: average dram latency=%d cycles\n", avg_dram_lat);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -2,6 +2,7 @@
|
|||
#define __VX_DRIVER_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -71,8 +72,8 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size)
|
|||
// upload kernel file to device
|
||||
int vx_upload_kernel_file(vx_device_h device, const char* filename);
|
||||
|
||||
// get performance counters
|
||||
int vx_get_perf(vx_device_h device, int core_id, size_t* instrs, size_t* cycles);
|
||||
// dump performance counters
|
||||
int vx_dump_perf(vx_device_h device, FILE* stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -58,6 +58,12 @@ ifdef SCOPE
|
|||
SCOPE_H = scope-defs.h
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
PERF_ENABLE = PERF=1
|
||||
endif
|
||||
|
||||
all: vlsim
|
||||
|
||||
# AFU info from JSON file, including AFU UUID
|
||||
|
@ -71,7 +77,7 @@ scope-defs.h: $(SCRIPT_DIR)/scope.json
|
|||
scope: scope-defs.h
|
||||
|
||||
vlsim-hw: $(SCOPE_H)
|
||||
$(SCOPE_ENABLE) $(MAKE) -C vlsim
|
||||
$(SCOPE_ENABLE) $(PERF_ENABLE) $(MAKE) -C vlsim
|
||||
|
||||
fpga: $(SRCS) $(SCOPE_H)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
|
||||
|
@ -94,7 +100,6 @@ $(ASE_DIR):
|
|||
clean:
|
||||
rm -rf $(PROJECT) $(PROJECT_ASE) $(PROJECT_VLSIM) *.o .depend
|
||||
$(MAKE) -C vlsim clean
|
||||
$(MAKE) -C ase clean
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
|
|
|
@ -43,8 +43,9 @@ RTL_DIR=../../../hw/rtl
|
|||
SRCS = fpga.cpp opae_sim.cpp
|
||||
SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/svdpi -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/svdpi -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
|
||||
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS)
|
||||
VL_FLAGS += -Wno-DECLFILENAME
|
||||
|
@ -70,6 +71,12 @@ ifdef SCOPE
|
|||
CFLAGS += -DSCOPE
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# use our OPAE shim
|
||||
VL_FLAGS += -DNOPAE
|
||||
CFLAGS += -DNOPAE
|
||||
|
@ -77,8 +84,6 @@ CFLAGS += -DNOPAE
|
|||
# use DPI FPU
|
||||
VL_FLAGS += -DFPU_FAST
|
||||
|
||||
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
|
||||
|
||||
PROJECT = libopae-c-vlsim.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
|
|
@ -244,27 +244,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
#endif
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
// Dump perf stats
|
||||
if (device->num_cores > 1) {
|
||||
uint64_t total_instrs = 0, total_cycles = 0;
|
||||
for (unsigned core_id = 0; core_id < device->num_cores; ++core_id) {
|
||||
uint64_t instrs, cycles;
|
||||
int ret = vx_get_perf(hdevice, core_id, &instrs, &cycles);
|
||||
assert(ret == 0);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stdout, "[VXDRV] PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
|
||||
total_instrs += instrs;
|
||||
total_cycles = std::max<uint64_t>(total_cycles, cycles);
|
||||
}
|
||||
float IPC = (float)(double(total_instrs) / double(total_cycles));
|
||||
fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
|
||||
} else {
|
||||
uint64_t instrs, cycles;
|
||||
int ret = vx_get_perf(hdevice, 0, &instrs, &cycles);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
assert(ret == 0);
|
||||
fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
}
|
||||
vx_dump_perf(device, stdout);
|
||||
#endif
|
||||
|
||||
fpgaClose(device->fpga);
|
||||
|
|
|
@ -64,6 +64,12 @@ else
|
|||
CFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# use DPI FPU
|
||||
VL_FLAGS += -DFPU_FAST
|
||||
|
||||
|
|
|
@ -239,26 +239,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
unsigned num_cores;
|
||||
vx_csr_get(hdevice, 0, CSR_NC, &num_cores);
|
||||
if (num_cores > 1) {
|
||||
uint64_t total_instrs = 0, total_cycles = 0;
|
||||
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
|
||||
uint64_t instrs, cycles;
|
||||
vx_get_perf(hdevice, core_id, &instrs, &cycles);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
|
||||
total_instrs += instrs;
|
||||
total_cycles = std::max<uint64_t>(total_cycles, cycles);
|
||||
}
|
||||
float IPC = (float)(double(total_instrs) / double(total_cycles));
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
|
||||
} else {
|
||||
uint64_t instrs, cycles;
|
||||
vx_get_perf(hdevice, 0, &instrs, &cycles);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
}
|
||||
vx_dump_perf(device, stdout);
|
||||
#endif
|
||||
|
||||
delete device;
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
+define+QUARTUS
|
||||
+define+FPU_FAST
|
||||
#+define+SCOPE
|
||||
#+define+PERF_ENABLE
|
||||
|
||||
#+define+DBG_PRINT_CORE_ICACHE
|
||||
#+define+DBG_PRINT_CORE_DCACHE
|
||||
|
|
|
@ -297,6 +297,9 @@ module VX_cluster #(
|
|||
);
|
||||
|
||||
if (`L2_ENABLE) begin
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_l2cache_if();
|
||||
`endif
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_dram_req_valid_qual;
|
||||
wire [`NUM_CORES-1:0] per_core_dram_req_rw_qual;
|
||||
|
@ -345,10 +348,14 @@ module VX_cluster #(
|
|||
.SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH)
|
||||
) l2cache (
|
||||
`SCOPE_BIND_VX_cluster_l2cache
|
||||
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_l2cache_if),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_core_dram_req_valid_qual),
|
||||
.core_req_rw (per_core_dram_req_rw_qual),
|
||||
|
|
|
@ -126,10 +126,12 @@
|
|||
|
||||
// CSR Addresses //////////////////////////////////////////////////////////////
|
||||
|
||||
// User Floating-Point CSRs
|
||||
`define CSR_FFLAGS 12'h001
|
||||
`define CSR_FRM 12'h002
|
||||
`define CSR_FCSR 12'h003
|
||||
|
||||
// SIMT CSRs
|
||||
`define CSR_LTID 12'h020
|
||||
`define CSR_LWID 12'h021
|
||||
`define CSR_GTID 12'h022
|
||||
|
@ -153,11 +155,73 @@
|
|||
|
||||
`define CSR_MEPC 12'h341
|
||||
|
||||
`define CSR_CYCLE 12'hC00
|
||||
`define CSR_CYCLE_H 12'hC80
|
||||
`define CSR_INSTRET 12'hC02
|
||||
`define CSR_INSTRET_H 12'hC82
|
||||
// Machine Counter/Timers
|
||||
`define CSR_MCYCLE 12'hB00
|
||||
`define CSR_MCYCLE_H 12'hB80
|
||||
`define CSR_MINSTRET 12'hB02
|
||||
`define CSR_MINSTRET_H 12'hB82
|
||||
|
||||
// Machine Performance-monitoring counters
|
||||
// PERF: pipeline
|
||||
`define CSR_MPM_ICACHE_ST 12'hB03
|
||||
`define CSR_MPM_ICACHE_ST_H 12'hB83
|
||||
`define CSR_MPM_IBUF_ST 12'hB04
|
||||
`define CSR_MPM_IBUF_ST_H 12'hB84
|
||||
`define CSR_MPM_SCRB_ST 12'hB05
|
||||
`define CSR_MPM_SCRB_ST_H 12'hB85
|
||||
`define CSR_MPM_ALU_ST 12'hB06
|
||||
`define CSR_MPM_ALU_ST_H 12'hB86
|
||||
`define CSR_MPM_LSU_ST 12'hB07
|
||||
`define CSR_MPM_LSU_ST_H 12'hB87
|
||||
`define CSR_MPM_CSR_ST 12'hB08
|
||||
`define CSR_MPM_CSR_ST_H 12'hB88
|
||||
`define CSR_MPM_MUL_ST 12'hB09
|
||||
`define CSR_MPM_MUL_ST_H 12'hB89
|
||||
`define CSR_MPM_FPU_ST 12'hB0A
|
||||
`define CSR_MPM_FPU_ST_H 12'hB8A
|
||||
`define CSR_MPM_GPU_ST 12'hB0B
|
||||
`define CSR_MPM_GPU_ST_H 12'hB8B
|
||||
// PERF: icache
|
||||
`define CSR_MPM_ICACHE_MISS_R 12'hB0C // read misses
|
||||
`define CSR_MPM_ICACHE_MISS_R_H 12'hB8C
|
||||
`define CSR_MPM_ICACHE_DREQ_ST 12'hB0D // dram request stalls
|
||||
`define CSR_MPM_ICACHE_DREQ_ST_H 12'hB8D
|
||||
`define CSR_MPM_ICACHE_CRSP_ST 12'hB0E // core response stalls
|
||||
`define CSR_MPM_ICACHE_CRSP_ST_H 12'hB8E
|
||||
`define CSR_MPM_ICACHE_MSHR_ST 12'hB0F // MSHR stalls
|
||||
`define CSR_MPM_ICACHE_MSHR_ST_H 12'hB8F
|
||||
`define CSR_MPM_ICACHE_PIPE_ST 12'hB10 // pipeline stalls
|
||||
`define CSR_MPM_ICACHE_PIPE_ST_H 12'hB90
|
||||
`define CSR_MPM_ICACHE_READS 12'hB11 // total reads
|
||||
`define CSR_MPM_ICACHE_READS_H 12'hB91
|
||||
// PERF: dcache
|
||||
`define CSR_MPM_DCACHE_MISS_R 12'hB12 // read misses
|
||||
`define CSR_MPM_DCACHE_MISS_R_H 12'hB92
|
||||
`define CSR_MPM_DCACHE_MISS_W 12'hB13 // write misses
|
||||
`define CSR_MPM_DCACHE_MISS_W_H 12'hB93
|
||||
`define CSR_MPM_DCACHE_DREQ_ST 12'hB14 // dram request stalls
|
||||
`define CSR_MPM_DCACHE_DREQ_ST_H 12'hB94
|
||||
`define CSR_MPM_DCACHE_CRSP_ST 12'hB15 // core response stalls
|
||||
`define CSR_MPM_DCACHE_CRSP_ST_H 12'hB95
|
||||
`define CSR_MPM_DCACHE_MSHR_ST 12'hB16 // MSHR stalls
|
||||
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB96
|
||||
`define CSR_MPM_DCACHE_PIPE_ST 12'hB17 // pipeline stalls
|
||||
`define CSR_MPM_DCACHE_PIPE_ST_H 12'hB97
|
||||
`define CSR_MPM_DCACHE_READS 12'hB18 // total reads
|
||||
`define CSR_MPM_DCACHE_READS_H 12'hB98
|
||||
`define CSR_MPM_DCACHE_WRITES 12'hB19 // total writes
|
||||
`define CSR_MPM_DCACHE_WRITES_H 12'hB99
|
||||
`define CSR_MPM_DCACHE_EVICTS 12'hB1A // total evictions
|
||||
`define CSR_MPM_DCACHE_EVICTS_H 12'hB9A
|
||||
// PERF: memory
|
||||
`define CSR_MPM_DRAM_LAT 12'hB1B // dram latency (total)
|
||||
`define CSR_MPM_DRAM_LAT_H 12'hB9B
|
||||
`define CSR_MPM_DRAM_REQ 12'hB1C // dram requests
|
||||
`define CSR_MPM_DRAM_REQ_H 12'hB9C
|
||||
`define CSR_MPM_DRAM_RSP 12'hB1D // dram responses
|
||||
`define CSR_MPM_DRAM_RSP_H 12'hB9D
|
||||
|
||||
// Machine Information Registers
|
||||
`define CSR_MVENDORID 12'hF11
|
||||
`define CSR_MARCHID 12'hF12
|
||||
`define CSR_MIMPID 12'hF13
|
||||
|
@ -185,6 +249,38 @@
|
|||
`define FPUQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Icache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef ICACHE_SIZE
|
||||
`define ICACHE_SIZE 4096
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef ICREQ_SIZE
|
||||
`define ICREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef ICRSQ_SIZE
|
||||
`define ICRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef IMSHR_SIZE
|
||||
`define IMSHR_SIZE `NUM_WARPS
|
||||
`endif
|
||||
|
||||
// DRAM Request Queue Size
|
||||
`ifndef IDREQ_SIZE
|
||||
`define IDREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
`ifndef IDRSQ_SIZE
|
||||
`define IDRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Dcache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
|
@ -232,38 +328,6 @@
|
|||
`define DSRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Icache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef ICACHE_SIZE
|
||||
`define ICACHE_SIZE 4096
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef ICREQ_SIZE
|
||||
`define ICREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef ICRSQ_SIZE
|
||||
`define ICRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef IMSHR_SIZE
|
||||
`define IMSHR_SIZE `NUM_WARPS
|
||||
`endif
|
||||
|
||||
// DRAM Request Queue Size
|
||||
`ifndef IDREQ_SIZE
|
||||
`define IDREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
`ifndef IDRSQ_SIZE
|
||||
`define IDRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// SM Configurable Knobs //////////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
|
|
|
@ -66,6 +66,10 @@ module VX_core #(
|
|||
output wire busy,
|
||||
output wire ebreak
|
||||
);
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if();
|
||||
`endif
|
||||
|
||||
VX_cache_dram_req_if #(
|
||||
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
|
||||
.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH),
|
||||
|
@ -174,6 +178,9 @@ module VX_core #(
|
|||
.CORE_ID(CORE_ID)
|
||||
) pipeline (
|
||||
`SCOPE_BIND_VX_core_pipeline
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
`endif
|
||||
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
|
@ -231,6 +238,9 @@ module VX_core #(
|
|||
.CORE_ID(CORE_ID)
|
||||
) mem_unit (
|
||||
`SCOPE_BIND_VX_core_mem_unit
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
`endif
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -238,7 +248,7 @@ module VX_core #(
|
|||
// Core <-> Dcache
|
||||
.core_dcache_req_if (core_dcache_req_if),
|
||||
.core_dcache_rsp_if (core_dcache_rsp_if),
|
||||
|
||||
|
||||
// Core <-> Icache
|
||||
.core_icache_req_if (core_icache_req_if),
|
||||
.core_icache_rsp_if (core_icache_rsp_if),
|
||||
|
|
|
@ -6,6 +6,11 @@ module VX_csr_data #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
|
@ -114,6 +119,67 @@ module VX_csr_data #(
|
|||
`CSR_NW : read_data_r = `NUM_WARPS;
|
||||
`CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// PERF: pipeline
|
||||
`CSR_MPM_ICACHE_ST : read_data_r = perf_pipeline_if.icache_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_ST_H : read_data_r = perf_pipeline_if.icache_stalls[63:32];
|
||||
`CSR_MPM_IBUF_ST : read_data_r = perf_pipeline_if.ibuffer_stalls[31:0];
|
||||
`CSR_MPM_IBUF_ST_H : read_data_r = perf_pipeline_if.ibuffer_stalls[63:32];
|
||||
`CSR_MPM_SCRB_ST : read_data_r = perf_pipeline_if.scoreboard_stalls[31:0];
|
||||
`CSR_MPM_SCRB_ST_H : read_data_r = perf_pipeline_if.scoreboard_stalls[63:32];
|
||||
`CSR_MPM_ALU_ST : read_data_r = perf_pipeline_if.alu_stalls[31:0];
|
||||
`CSR_MPM_ALU_ST_H : read_data_r = perf_pipeline_if.alu_stalls[63:32];
|
||||
`CSR_MPM_LSU_ST : read_data_r = perf_pipeline_if.lsu_stalls[31:0];
|
||||
`CSR_MPM_LSU_ST_H : read_data_r = perf_pipeline_if.lsu_stalls[63:32];
|
||||
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
|
||||
`CSR_MPM_CSR_ST_H : read_data_r = perf_pipeline_if.csr_stalls[63:32];
|
||||
`CSR_MPM_MUL_ST : read_data_r = perf_pipeline_if.mul_stalls[31:0];
|
||||
`CSR_MPM_MUL_ST_H : read_data_r = perf_pipeline_if.mul_stalls[63:32];
|
||||
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = perf_pipeline_if.fpu_stalls[63:32];
|
||||
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
|
||||
`CSR_MPM_GPU_ST_H : read_data_r = perf_pipeline_if.gpu_stalls[63:32];
|
||||
// PERF: icache
|
||||
`CSR_MPM_ICACHE_MISS_R : read_data_r = perf_memsys_if.icache_if.read_misses[31:0];
|
||||
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = perf_memsys_if.icache_if.read_misses[63:32];
|
||||
`CSR_MPM_ICACHE_DREQ_ST : read_data_r = perf_memsys_if.icache_if.dreq_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_DREQ_ST_H : read_data_r = perf_memsys_if.icache_if.dreq_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_CRSP_ST : read_data_r = perf_memsys_if.icache_if.crsp_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = perf_memsys_if.icache_if.crsp_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_MSHR_ST : read_data_r = perf_memsys_if.icache_if.mshr_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_MSHR_ST_H : read_data_r = perf_memsys_if.icache_if.mshr_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_PIPE_ST : read_data_r = perf_memsys_if.icache_if.pipe_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = perf_memsys_if.icache_if.pipe_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_READS : read_data_r = perf_memsys_if.icache_if.reads[31:0];
|
||||
`CSR_MPM_ICACHE_READS_H : read_data_r = perf_memsys_if.icache_if.reads[63:32];
|
||||
// PERF: dcache
|
||||
`CSR_MPM_DCACHE_MISS_R : read_data_r = perf_memsys_if.dcache_if.read_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = perf_memsys_if.dcache_if.read_misses[63:32];
|
||||
`CSR_MPM_DCACHE_MISS_W : read_data_r = perf_memsys_if.dcache_if.write_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = perf_memsys_if.dcache_if.write_misses[63:32];
|
||||
`CSR_MPM_DCACHE_DREQ_ST : read_data_r = perf_memsys_if.dcache_if.dreq_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_DREQ_ST_H : read_data_r = perf_memsys_if.dcache_if.dreq_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_CRSP_ST : read_data_r = perf_memsys_if.dcache_if.crsp_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = perf_memsys_if.dcache_if.crsp_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_MSHR_ST : read_data_r = perf_memsys_if.dcache_if.mshr_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = perf_memsys_if.dcache_if.mshr_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_PIPE_ST : read_data_r = perf_memsys_if.dcache_if.pipe_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = perf_memsys_if.dcache_if.pipe_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_READS : read_data_r = perf_memsys_if.dcache_if.reads[31:0];
|
||||
`CSR_MPM_DCACHE_READS_H : read_data_r = perf_memsys_if.dcache_if.reads[63:32];
|
||||
`CSR_MPM_DCACHE_WRITES : read_data_r = perf_memsys_if.dcache_if.writes[31:0];
|
||||
`CSR_MPM_DCACHE_WRITES_H : read_data_r = perf_memsys_if.dcache_if.writes[63:32];
|
||||
`CSR_MPM_DCACHE_EVICTS : read_data_r = perf_memsys_if.dcache_if.evictions[31:0];
|
||||
`CSR_MPM_DCACHE_EVICTS_H : read_data_r = perf_memsys_if.dcache_if.evictions[63:32];
|
||||
// PERF: memory
|
||||
`CSR_MPM_DRAM_LAT : read_data_r = perf_memsys_if.dram_latency[31:0];
|
||||
`CSR_MPM_DRAM_LAT_H : read_data_r = perf_memsys_if.dram_latency[63:32];
|
||||
`CSR_MPM_DRAM_REQ : read_data_r = perf_memsys_if.dram_requests[31:0];
|
||||
`CSR_MPM_DRAM_REQ_H : read_data_r = perf_memsys_if.dram_requests[63:32];
|
||||
`CSR_MPM_DRAM_RSP : read_data_r = perf_memsys_if.dram_responses[31:0];
|
||||
`CSR_MPM_DRAM_RSP_H : read_data_r = perf_memsys_if.dram_responses[63:32];
|
||||
`endif
|
||||
|
||||
`CSR_SATP : read_data_r = 32'(csr_satp);
|
||||
|
||||
`CSR_MSTATUS : read_data_r = 32'(csr_mstatus);
|
||||
|
@ -128,10 +194,10 @@ module VX_csr_data #(
|
|||
`CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]);
|
||||
`CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]);
|
||||
|
||||
`CSR_CYCLE : read_data_r = csr_cycle[31:0];
|
||||
`CSR_CYCLE_H : read_data_r = csr_cycle[63:32];
|
||||
`CSR_INSTRET : read_data_r = csr_instret[31:0];
|
||||
`CSR_INSTRET_H : read_data_r = csr_instret[63:32];
|
||||
`CSR_MCYCLE : read_data_r = csr_cycle[31:0];
|
||||
`CSR_MCYCLE_H : read_data_r = csr_cycle[63:32];
|
||||
`CSR_MINSTRET : read_data_r = csr_instret[31:0];
|
||||
`CSR_MINSTRET_H: read_data_r = csr_instret[63:32];
|
||||
|
||||
`CSR_MVENDORID : read_data_r = `VENDOR_ID;
|
||||
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
|
||||
|
|
|
@ -6,6 +6,11 @@ module VX_csr_unit #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
|
@ -51,6 +56,10 @@ module VX_csr_unit #(
|
|||
) csr_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.read_enable (csr_pipe_req_if.valid),
|
||||
|
|
|
@ -240,45 +240,10 @@
|
|||
`define DBG_CACHE_REQ_MDATAW 0
|
||||
`endif
|
||||
|
||||
////////////////////////// Dcache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
|
||||
|
||||
// Block size in bytes
|
||||
`define DBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
|
||||
|
||||
// Word size in bytes
|
||||
`define DWORD_SIZE 4
|
||||
|
||||
// TAG sharing enable
|
||||
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
|
||||
// Core request tag bits
|
||||
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
|
||||
// DRAM request data bits
|
||||
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
|
||||
|
||||
// DRAM request address bits
|
||||
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE))
|
||||
|
||||
// DRAM byte enable bits
|
||||
`define DDRAM_BYTEEN_WIDTH `DBANK_LINE_SIZE
|
||||
|
||||
// DRAM request tag bits
|
||||
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
|
||||
|
||||
// Core request size
|
||||
`define DNUM_REQUESTS `NUM_THREADS
|
||||
|
||||
// Snoop request tag bits
|
||||
`define DSNP_TAG_WIDTH ((`NUM_CORES > 1) ? `LOG2UP(`L2SREQ_SIZE) : `L2SNP_TAG_WIDTH)
|
||||
|
||||
////////////////////////// Icache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
|
||||
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
|
||||
|
||||
// Block size in bytes
|
||||
`define IBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
|
||||
|
@ -316,6 +281,41 @@
|
|||
// Core request size
|
||||
`define INUM_REQUESTS 1
|
||||
|
||||
////////////////////////// Dcache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
|
||||
|
||||
// Block size in bytes
|
||||
`define DBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
|
||||
|
||||
// Word size in bytes
|
||||
`define DWORD_SIZE 4
|
||||
|
||||
// TAG sharing enable
|
||||
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
|
||||
// Core request tag bits
|
||||
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
|
||||
// DRAM request data bits
|
||||
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
|
||||
|
||||
// DRAM request address bits
|
||||
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE))
|
||||
|
||||
// DRAM byte enable bits
|
||||
`define DDRAM_BYTEEN_WIDTH `DBANK_LINE_SIZE
|
||||
|
||||
// DRAM request tag bits
|
||||
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
|
||||
|
||||
// Core request size
|
||||
`define DNUM_REQUESTS `NUM_THREADS
|
||||
|
||||
// Snoop request tag bits
|
||||
`define DSNP_TAG_WIDTH ((`NUM_CORES > 1) ? `LOG2UP(`L2SREQ_SIZE) : `L2SNP_TAG_WIDTH)
|
||||
|
||||
////////////////////////// SM Configurable Knobs //////////////////////////////
|
||||
|
||||
// Cache ID
|
||||
|
|
|
@ -16,8 +16,13 @@ module VX_execute #(
|
|||
VX_cache_core_req_if dcache_req_if,
|
||||
VX_cache_core_rsp_if dcache_rsp_if,
|
||||
|
||||
// perf
|
||||
// commit status
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
// inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
|
@ -72,7 +77,11 @@ module VX_execute #(
|
|||
.CORE_ID(CORE_ID)
|
||||
) csr_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
|
|
|
@ -8,6 +8,10 @@ module VX_issue #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_writeback_if writeback_if,
|
||||
|
||||
|
@ -120,6 +124,21 @@ module VX_issue #(
|
|||
`SCOPE_ASSIGN (writeback_rd, writeback_if.rd);
|
||||
`SCOPE_ASSIGN (writeback_data, writeback_if.data);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [63:0] perf_scoreboard_stalls;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_scoreboard_stalls <= 0;
|
||||
end else begin
|
||||
// scoreboard_stall
|
||||
if (ibuf_deq_if.valid & scoreboard_delay) begin
|
||||
perf_scoreboard_stalls <= perf_scoreboard_stalls + 64'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
assign perf_pipeline_if.scoreboard_stalls = perf_scoreboard_stalls;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||
|
|
|
@ -7,6 +7,10 @@ module VX_mem_unit # (
|
|||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
`endif
|
||||
|
||||
// Core <-> Dcache
|
||||
VX_cache_core_req_if core_dcache_req_if,
|
||||
|
@ -28,6 +32,11 @@ module VX_mem_unit # (
|
|||
VX_cache_core_req_if io_req_if,
|
||||
VX_cache_core_rsp_if io_rsp_if
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_icache_if(), perf_dcache_if(), perf_smem_if();
|
||||
`endif
|
||||
|
||||
VX_cache_dram_req_if #(
|
||||
.DRAM_LINE_WIDTH (`DDRAM_LINE_WIDTH),
|
||||
.DRAM_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
|
||||
|
@ -80,6 +89,82 @@ module VX_mem_unit # (
|
|||
.smem_rsp_if (smem_rsp_if),
|
||||
.io_rsp_if (io_rsp_if),
|
||||
.core_rsp_if (core_dcache_rsp_if)
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`ICACHE_ID),
|
||||
.CACHE_SIZE (`ICACHE_SIZE),
|
||||
.BANK_LINE_SIZE (`IBANK_LINE_SIZE),
|
||||
.NUM_BANKS (`INUM_BANKS),
|
||||
.WORD_SIZE (`IWORD_SIZE),
|
||||
.NUM_REQS (`INUM_REQUESTS),
|
||||
.CREQ_SIZE (`ICREQ_SIZE),
|
||||
.MSHR_SIZE (`IMSHR_SIZE),
|
||||
.DRSQ_SIZE (`IDRSQ_SIZE),
|
||||
.SREQ_SIZE (1),
|
||||
.CRSQ_SIZE (`ICRSQ_SIZE),
|
||||
.DREQ_SIZE (`IDREQ_SIZE),
|
||||
.SRSQ_SIZE (1),
|
||||
.DRAM_ENABLE (1),
|
||||
.FLUSH_ENABLE (0),
|
||||
.WRITE_ENABLE (0),
|
||||
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
|
||||
) icache (
|
||||
`SCOPE_BIND_VX_mem_unit_icache
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core request
|
||||
.core_req_valid (core_icache_req_if.valid),
|
||||
.core_req_rw (core_icache_req_if.rw),
|
||||
.core_req_byteen (core_icache_req_if.byteen),
|
||||
.core_req_addr (core_icache_req_if.addr),
|
||||
.core_req_data (core_icache_req_if.data),
|
||||
.core_req_tag (core_icache_req_if.tag),
|
||||
.core_req_ready (core_icache_req_if.ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (core_icache_rsp_if.valid),
|
||||
.core_rsp_data (core_icache_rsp_if.data),
|
||||
.core_rsp_tag (core_icache_rsp_if.tag),
|
||||
.core_rsp_ready (core_icache_rsp_if.ready),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_icache_if),
|
||||
`endif
|
||||
|
||||
// DRAM Req
|
||||
.dram_req_valid (icache_dram_req_if.valid),
|
||||
.dram_req_rw (icache_dram_req_if.rw),
|
||||
.dram_req_byteen (icache_dram_req_if.byteen),
|
||||
.dram_req_addr (icache_dram_req_if.addr),
|
||||
.dram_req_data (icache_dram_req_if.data),
|
||||
.dram_req_tag (icache_dram_req_if.tag),
|
||||
.dram_req_ready (icache_dram_req_if.ready),
|
||||
|
||||
// DRAM response
|
||||
.dram_rsp_valid (icache_dram_rsp_if.valid),
|
||||
.dram_rsp_data (icache_dram_rsp_if.data),
|
||||
.dram_rsp_tag (icache_dram_rsp_if.tag),
|
||||
.dram_rsp_ready (icache_dram_rsp_if.ready),
|
||||
|
||||
// Snoop request
|
||||
.snp_req_valid (1'b0),
|
||||
.snp_req_addr (0),
|
||||
.snp_req_inv (1'b0),
|
||||
.snp_req_tag (0),
|
||||
`UNUSED_PIN (snp_req_ready),
|
||||
|
||||
// Snoop response
|
||||
`UNUSED_PIN (snp_rsp_valid),
|
||||
`UNUSED_PIN (snp_rsp_tag),
|
||||
.snp_rsp_ready (1'b0),
|
||||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
|
@ -124,6 +209,10 @@ module VX_mem_unit # (
|
|||
.core_rsp_tag (dcache_rsp_if.tag),
|
||||
.core_rsp_ready (dcache_rsp_if.ready),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_dcache_if),
|
||||
`endif
|
||||
|
||||
// DRAM request
|
||||
.dram_req_valid (dcache_dram_req_if.valid),
|
||||
.dram_req_rw (dcache_dram_req_if.rw),
|
||||
|
@ -151,78 +240,6 @@ module VX_mem_unit # (
|
|||
.snp_rsp_tag (dcache_snp_rsp_if.tag),
|
||||
.snp_rsp_ready (dcache_snp_rsp_if.ready),
|
||||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`ICACHE_ID),
|
||||
.CACHE_SIZE (`ICACHE_SIZE),
|
||||
.BANK_LINE_SIZE (`IBANK_LINE_SIZE),
|
||||
.NUM_BANKS (`INUM_BANKS),
|
||||
.WORD_SIZE (`IWORD_SIZE),
|
||||
.NUM_REQS (`INUM_REQUESTS),
|
||||
.CREQ_SIZE (`ICREQ_SIZE),
|
||||
.MSHR_SIZE (`IMSHR_SIZE),
|
||||
.DRSQ_SIZE (`IDRSQ_SIZE),
|
||||
.SREQ_SIZE (1),
|
||||
.CRSQ_SIZE (`ICRSQ_SIZE),
|
||||
.DREQ_SIZE (`IDREQ_SIZE),
|
||||
.SRSQ_SIZE (1),
|
||||
.DRAM_ENABLE (1),
|
||||
.FLUSH_ENABLE (0),
|
||||
.WRITE_ENABLE (0),
|
||||
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
|
||||
) icache (
|
||||
`SCOPE_BIND_VX_mem_unit_icache
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core request
|
||||
.core_req_valid (core_icache_req_if.valid),
|
||||
.core_req_rw (core_icache_req_if.rw),
|
||||
.core_req_byteen (core_icache_req_if.byteen),
|
||||
.core_req_addr (core_icache_req_if.addr),
|
||||
.core_req_data (core_icache_req_if.data),
|
||||
.core_req_tag (core_icache_req_if.tag),
|
||||
.core_req_ready (core_icache_req_if.ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (core_icache_rsp_if.valid),
|
||||
.core_rsp_data (core_icache_rsp_if.data),
|
||||
.core_rsp_tag (core_icache_rsp_if.tag),
|
||||
.core_rsp_ready (core_icache_rsp_if.ready),
|
||||
|
||||
// DRAM Req
|
||||
.dram_req_valid (icache_dram_req_if.valid),
|
||||
.dram_req_rw (icache_dram_req_if.rw),
|
||||
.dram_req_byteen (icache_dram_req_if.byteen),
|
||||
.dram_req_addr (icache_dram_req_if.addr),
|
||||
.dram_req_data (icache_dram_req_if.data),
|
||||
.dram_req_tag (icache_dram_req_if.tag),
|
||||
.dram_req_ready (icache_dram_req_if.ready),
|
||||
|
||||
// DRAM response
|
||||
.dram_rsp_valid (icache_dram_rsp_if.valid),
|
||||
.dram_rsp_data (icache_dram_rsp_if.data),
|
||||
.dram_rsp_tag (icache_dram_rsp_if.tag),
|
||||
.dram_rsp_ready (icache_dram_rsp_if.ready),
|
||||
|
||||
// Snoop request
|
||||
.snp_req_valid (1'b0),
|
||||
.snp_req_addr (0),
|
||||
.snp_req_inv (1'b0),
|
||||
.snp_req_tag (0),
|
||||
`UNUSED_PIN (snp_req_ready),
|
||||
|
||||
// Snoop response
|
||||
`UNUSED_PIN (snp_rsp_valid),
|
||||
`UNUSED_PIN (snp_rsp_tag),
|
||||
.snp_rsp_ready (1'b0),
|
||||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
);
|
||||
|
@ -268,6 +285,10 @@ module VX_mem_unit # (
|
|||
.core_rsp_tag (smem_rsp_if.tag),
|
||||
.core_rsp_ready (smem_rsp_if.ready),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_smem_if),
|
||||
`endif
|
||||
|
||||
// DRAM request
|
||||
`UNUSED_PIN (dram_req_valid),
|
||||
`UNUSED_PIN (dram_req_rw),
|
||||
|
@ -340,4 +361,65 @@ module VX_mem_unit # (
|
|||
.rsp_ready_in (dram_rsp_if.ready)
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
assign perf_memsys_if.icache_if.read_misses = perf_icache_if.read_misses;
|
||||
assign perf_memsys_if.icache_if.write_misses = perf_icache_if.write_misses;
|
||||
assign perf_memsys_if.icache_if.mshr_stalls = perf_icache_if.mshr_stalls;
|
||||
assign perf_memsys_if.icache_if.crsp_stalls = perf_icache_if.crsp_stalls;
|
||||
assign perf_memsys_if.icache_if.dreq_stalls = perf_icache_if.dreq_stalls;
|
||||
assign perf_memsys_if.icache_if.pipe_stalls = perf_icache_if.pipe_stalls;
|
||||
assign perf_memsys_if.icache_if.reads = perf_icache_if.reads;
|
||||
assign perf_memsys_if.icache_if.writes = perf_icache_if.writes;
|
||||
assign perf_memsys_if.icache_if.evictions = perf_icache_if.evictions;
|
||||
|
||||
assign perf_memsys_if.dcache_if.read_misses = perf_dcache_if.read_misses;
|
||||
assign perf_memsys_if.dcache_if.write_misses = perf_dcache_if.write_misses;
|
||||
assign perf_memsys_if.dcache_if.mshr_stalls = perf_dcache_if.mshr_stalls;
|
||||
assign perf_memsys_if.dcache_if.crsp_stalls = perf_dcache_if.crsp_stalls;
|
||||
assign perf_memsys_if.dcache_if.dreq_stalls = perf_dcache_if.dreq_stalls;
|
||||
assign perf_memsys_if.dcache_if.pipe_stalls = perf_dcache_if.pipe_stalls;
|
||||
assign perf_memsys_if.dcache_if.reads = perf_dcache_if.reads;
|
||||
assign perf_memsys_if.dcache_if.writes = perf_dcache_if.writes;
|
||||
assign perf_memsys_if.dcache_if.evictions = perf_dcache_if.evictions;
|
||||
|
||||
reg [63:0] perf_dram_lat_per_cycle;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_dram_lat_per_cycle <= 0;
|
||||
end else begin
|
||||
if (dram_req_if.valid & (~dram_req_if.rw) & dram_req_if.ready & dram_rsp_if.valid & dram_rsp_if.ready) begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle;
|
||||
end else if (dram_req_if.valid & (~dram_req_if.rw) & dram_req_if.ready) begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle + 64'd1;
|
||||
end else if (dram_rsp_if.valid & dram_rsp_if.ready) begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle - 64'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [63:0] perf_dram_req, perf_dram_rsp, perf_dram_lat;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_dram_req <= 0;
|
||||
perf_dram_rsp <= 0;
|
||||
perf_dram_lat <= 0;
|
||||
end else begin
|
||||
if (dram_req_if.valid & dram_req_if.ready) begin
|
||||
perf_dram_req <= perf_dram_req + 64'd1;
|
||||
end
|
||||
if (dram_rsp_if.valid & dram_rsp_if.ready) begin
|
||||
perf_dram_rsp <= perf_dram_rsp + 64'd1;
|
||||
end
|
||||
perf_dram_lat <= perf_dram_lat + perf_dram_lat_per_cycle;
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_memsys_if.dram_requests = perf_dram_req;
|
||||
assign perf_memsys_if.dram_responses = perf_dram_rsp;
|
||||
assign perf_memsys_if.dram_latency = perf_dram_lat;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -51,6 +51,10 @@ module VX_pipeline #(
|
|||
output wire[31:0] csr_io_rsp_data,
|
||||
input wire csr_io_rsp_ready,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
`endif
|
||||
|
||||
// Status
|
||||
output wire busy,
|
||||
output wire ebreak
|
||||
|
@ -171,6 +175,10 @@ module VX_pipeline #(
|
|||
VX_commit_if fpu_commit_if();
|
||||
VX_commit_if gpu_commit_if();
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_pipeline_if perf_pipeline_if();
|
||||
`endif
|
||||
|
||||
VX_fetch #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) fetch (
|
||||
|
@ -206,6 +214,10 @@ module VX_pipeline #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
`endif
|
||||
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
|
||||
|
@ -224,7 +236,12 @@ module VX_pipeline #(
|
|||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
`endif
|
||||
|
||||
.dcache_req_if (core_dcache_req_if),
|
||||
.dcache_rsp_if (core_dcache_rsp_if),
|
||||
|
||||
|
@ -272,4 +289,78 @@ module VX_pipeline #(
|
|||
.cmt_to_csr_if (cmt_to_csr_if)
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [63:0] perf_icache_stalls;
|
||||
reg [63:0] perf_ibuffer_stalls;
|
||||
reg [63:0] perf_alu_stalls;
|
||||
reg [63:0] perf_lsu_stalls;
|
||||
reg [63:0] perf_csr_stalls;
|
||||
reg [63:0] perf_gpu_stalls;
|
||||
`ifdef EXT_M_ENABLE
|
||||
reg [63:0] perf_mul_stalls;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
reg [63:0] perf_fpu_stalls;
|
||||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_icache_stalls <= 0;
|
||||
perf_ibuffer_stalls <= 0;
|
||||
perf_alu_stalls <= 0;
|
||||
perf_lsu_stalls <= 0;
|
||||
perf_csr_stalls <= 0;
|
||||
perf_gpu_stalls <= 0;
|
||||
`ifdef EXT_M_ENABLE
|
||||
perf_mul_stalls <= 0;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
perf_fpu_stalls <= 0;
|
||||
`endif
|
||||
end else begin
|
||||
if (core_icache_req_if.valid & !core_icache_req_if.ready) begin
|
||||
perf_icache_stalls <= perf_icache_stalls + 64'd1;
|
||||
end
|
||||
if (decode_if.valid & !decode_if.ready) begin
|
||||
perf_ibuffer_stalls <= perf_ibuffer_stalls + 64'd1;
|
||||
end
|
||||
if (alu_req_if.valid & !alu_req_if.ready) begin
|
||||
perf_alu_stalls <= perf_alu_stalls + 64'd1;
|
||||
end
|
||||
if (lsu_req_if.valid & !lsu_req_if.ready) begin
|
||||
perf_lsu_stalls <= perf_lsu_stalls + 64'd1;
|
||||
end
|
||||
if (csr_req_if.valid & !csr_req_if.ready) begin
|
||||
perf_csr_stalls <= perf_csr_stalls + 64'd1;
|
||||
end
|
||||
if (gpu_req_if.valid & !gpu_req_if.ready) begin
|
||||
perf_gpu_stalls <= perf_gpu_stalls + 64'd1;
|
||||
end
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (mul_req_if.valid & !mul_req_if.ready) begin
|
||||
perf_mul_stalls <= perf_mul_stalls + 64'd1;
|
||||
end
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_req_if.valid & !fpu_req_if.ready) begin
|
||||
perf_fpu_stalls <= perf_fpu_stalls + 64'd1;
|
||||
end
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_pipeline_if.icache_stalls = perf_icache_stalls;
|
||||
assign perf_pipeline_if.ibuffer_stalls = perf_ibuffer_stalls;
|
||||
assign perf_pipeline_if.alu_stalls = perf_alu_stalls;
|
||||
assign perf_pipeline_if.lsu_stalls = perf_lsu_stalls;
|
||||
assign perf_pipeline_if.csr_stalls = perf_csr_stalls;
|
||||
assign perf_pipeline_if.gpu_stalls = perf_gpu_stalls;
|
||||
`ifdef EXT_M_ENABLE
|
||||
assign perf_pipeline_if.mul_stalls = perf_mul_stalls;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign perf_pipeline_if.fpu_stalls = perf_fpu_stalls;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -299,6 +299,9 @@ module Vortex (
|
|||
);
|
||||
|
||||
if (`L3_ENABLE) begin
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_l3cache_if();
|
||||
`endif
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid_qual;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw_qual;
|
||||
|
@ -347,10 +350,14 @@ module Vortex (
|
|||
.SNP_TAG_WIDTH (`L3SNP_TAG_WIDTH)
|
||||
) l3cache (
|
||||
`SCOPE_BIND_Vortex_l3cache
|
||||
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_l3cache_if),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_cluster_dram_req_valid_qual),
|
||||
.core_req_rw (per_cluster_dram_req_rw_qual),
|
||||
|
|
21
hw/rtl/cache/VX_bank.v
vendored
21
hw/rtl/cache/VX_bank.v
vendored
|
@ -96,6 +96,14 @@ module VX_bank #(
|
|||
output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
|
||||
input wire snp_rsp_ready,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire perf_mshr_stall,
|
||||
output wire perf_pipe_stall,
|
||||
output wire perf_evict,
|
||||
output wire perf_read_miss,
|
||||
output wire perf_write_miss,
|
||||
`endif
|
||||
|
||||
// Misses
|
||||
output wire misses
|
||||
);
|
||||
|
@ -567,7 +575,6 @@ end else begin
|
|||
assign incoming_fill_st2 = 0;
|
||||
|
||||
assign misses = 0;
|
||||
|
||||
end
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
|
@ -951,6 +958,18 @@ end
|
|||
`SCOPE_ASSIGN (addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st3, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID));
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign perf_pipe_stall = pipeline_stall;
|
||||
assign perf_mshr_stall = mshr_going_full;
|
||||
assign perf_read_miss = !pipeline_stall & miss_st1 & !is_mshr_st1 & !mem_rw_st1;
|
||||
assign perf_write_miss = !pipeline_stall & miss_st1 & !is_mshr_st1 & mem_rw_st1;
|
||||
if (DRAM_ENABLE) begin
|
||||
assign perf_evict = dreq_push & do_writeback_st3 & !is_snp_st3;
|
||||
end else begin
|
||||
assign perf_evict = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_BANK
|
||||
wire incoming_fill_dfp_st3 = drsq_push && (addr_st3 == dram_rsp_addr);
|
||||
always @(posedge clk) begin
|
||||
|
|
178
hw/rtl/cache/VX_cache.v
vendored
178
hw/rtl/cache/VX_cache.v
vendored
|
@ -70,7 +70,12 @@ module VX_cache #(
|
|||
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready,
|
||||
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_cache_if,
|
||||
`endif
|
||||
|
||||
// DRAM request
|
||||
output wire dram_req_valid,
|
||||
output wire dram_req_rw,
|
||||
|
@ -130,7 +135,16 @@ module VX_cache #(
|
|||
|
||||
wire [NUM_BANKS-1:0] per_bank_miss;
|
||||
assign miss_vec = per_bank_miss;
|
||||
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_pipe_stall_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_evict_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
|
||||
`endif
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign snp_req_ready = per_bank_snp_req_ready;
|
||||
end else begin
|
||||
|
@ -139,9 +153,9 @@ module VX_cache #(
|
|||
|
||||
VX_cache_core_req_bank_sel #(
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
|
||||
) cache_core_req_bank_sel (
|
||||
.core_req_valid (core_req_valid),
|
||||
|
@ -312,6 +326,14 @@ module VX_cache #(
|
|||
.dram_rsp_addr (curr_bank_dram_rsp_addr),
|
||||
.dram_rsp_ready (curr_bank_dram_rsp_ready),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_mshr_stall (perf_mshr_stall_per_bank[i]),
|
||||
.perf_pipe_stall (perf_pipe_stall_per_bank[i]),
|
||||
.perf_evict (perf_evict_per_bank[i]),
|
||||
.perf_read_miss (perf_read_miss_per_bank[i]),
|
||||
.perf_write_miss (perf_write_miss_per_bank[i]),
|
||||
`endif
|
||||
|
||||
// Snoop request
|
||||
.snp_req_valid (curr_bank_snp_req_valid),
|
||||
.snp_req_addr (curr_bank_snp_req_addr),
|
||||
|
@ -408,4 +430,150 @@ module VX_cache #(
|
|||
`UNUSED_VAR (snp_rsp_ready)
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// per cycle: core_req_r, core_req_w
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_req_r_per_cycle, perf_core_req_w_per_cycle;
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
VX_countones #( // core_req_r
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_req_r_count (
|
||||
.valids (core_req_valid & {NUM_REQS{core_req_ready & ~core_req_rw}}),
|
||||
.count (perf_core_req_r_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #( // core_req_w
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_req_w_count (
|
||||
.valids (core_req_valid & {NUM_REQS{core_req_ready & core_req_rw}}),
|
||||
.count (perf_core_req_w_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #( // core_rsp
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_rsp_count (
|
||||
.valids (core_rsp_valid & {NUM_REQS{!core_rsp_ready}}),
|
||||
.count (perf_crsp_stall_per_cycle)
|
||||
);
|
||||
end else begin
|
||||
VX_countones #( // core_req_r
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_req_r_count (
|
||||
.valids (core_req_valid & core_req_ready & ~core_req_rw),
|
||||
.count (perf_core_req_r_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #( // core_req_w
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_req_w_count (
|
||||
.valids (core_req_valid & core_req_ready & core_req_rw),
|
||||
.count (perf_core_req_w_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #( // core_rsp
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_rsp_count (
|
||||
.valids (core_rsp_valid & ~core_rsp_ready),
|
||||
.count (perf_crsp_stall_per_cycle)
|
||||
);
|
||||
end
|
||||
|
||||
// per cycle: msrq stalls, pipeline stalls, evictions, read misses, write misses
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_mshr_stall_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_pipe_stall_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_evictions_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_read_miss_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_write_miss_per_cycle;
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_mshr_stall_count (
|
||||
.valids (perf_mshr_stall_per_bank),
|
||||
.count (perf_mshr_stall_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_total_stall_count (
|
||||
.valids (perf_pipe_stall_per_bank),
|
||||
.count (perf_pipe_stall_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_EVICTSict_count (
|
||||
.valids (perf_evict_per_bank),
|
||||
.count (perf_evictions_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_read_miss_count (
|
||||
.valids (perf_read_miss_per_bank),
|
||||
.count (perf_read_miss_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_write_miss_count (
|
||||
.valids (perf_write_miss_per_bank),
|
||||
.count (perf_write_miss_per_cycle)
|
||||
);
|
||||
|
||||
reg [63:0] perf_core_req_r;
|
||||
reg [63:0] perf_core_req_w;
|
||||
reg [63:0] perf_mshr_stall;
|
||||
reg [63:0] perf_pipe_stall;
|
||||
reg [63:0] perf_evictions;
|
||||
reg [63:0] perf_read_miss;
|
||||
reg [63:0] perf_write_miss;
|
||||
reg [63:0] perf_crsp_stall;
|
||||
reg [63:0] perf_dreq_stall;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_core_req_r <= 0;
|
||||
perf_core_req_w <= 0;
|
||||
perf_crsp_stall <= 0;
|
||||
perf_mshr_stall <= 0;
|
||||
perf_pipe_stall <= 0;
|
||||
perf_evictions <= 0;
|
||||
perf_read_miss <= 0;
|
||||
perf_write_miss <= 0;
|
||||
perf_dreq_stall <= 0;
|
||||
end else begin
|
||||
// core requests
|
||||
perf_core_req_r <= perf_core_req_r + $bits(perf_core_req_r)'(perf_core_req_r_per_cycle);
|
||||
perf_core_req_w <= perf_core_req_w + $bits(perf_core_req_w)'(perf_core_req_w_per_cycle);
|
||||
// core response stalls
|
||||
perf_crsp_stall <= perf_crsp_stall + $bits(perf_crsp_stall)'(perf_crsp_stall_per_cycle);
|
||||
// miss reserve queue stalls
|
||||
perf_mshr_stall <= perf_mshr_stall + $bits(perf_mshr_stall)'(perf_mshr_stall_per_cycle);
|
||||
// pipeline stalls
|
||||
perf_pipe_stall <= perf_pipe_stall + $bits(perf_pipe_stall)'(perf_pipe_stall_per_cycle);
|
||||
// total evictions
|
||||
perf_evictions <= perf_evictions + $bits(perf_evictions)'(perf_evictions_per_cycle);
|
||||
// read misses
|
||||
perf_read_miss <= perf_read_miss + $bits(perf_read_miss)'(perf_read_miss_per_cycle);
|
||||
// write misses
|
||||
perf_write_miss <= perf_write_miss + $bits(perf_write_miss)'(perf_write_miss_per_cycle);
|
||||
// dram request stalls
|
||||
if (dram_req_valid & !dram_req_ready) begin
|
||||
perf_dreq_stall <= perf_dreq_stall + 64'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_cache_if.reads = perf_core_req_r;
|
||||
assign perf_cache_if.writes = perf_core_req_w;
|
||||
assign perf_cache_if.read_misses = perf_read_miss;
|
||||
assign perf_cache_if.write_misses = perf_write_miss;
|
||||
assign perf_cache_if.evictions = perf_evictions;
|
||||
assign perf_cache_if.mshr_stalls = perf_mshr_stall;
|
||||
assign perf_cache_if.pipe_stalls = perf_pipe_stall;
|
||||
assign perf_cache_if.crsp_stalls = perf_crsp_stall;
|
||||
assign perf_cache_if.dreq_stalls = perf_dreq_stall;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -159,7 +159,7 @@ module VX_fpnew
|
|||
.tag_o ({fpu_tag_out, fpu_has_fflags_out}),
|
||||
.out_valid_o (fpu_valid_out),
|
||||
.out_ready_i (fpu_ready_out),
|
||||
`UNUSED_PIN (busy_o)
|
||||
`UNUSED_PIN (busy_o)
|
||||
);
|
||||
end else begin
|
||||
fpnew_top #(
|
||||
|
@ -179,14 +179,14 @@ module VX_fpnew
|
|||
.vectorial_op_i (1'b0),
|
||||
.tag_i (1'b0),
|
||||
.in_valid_i (fpu_valid_in),
|
||||
`UNUSED_PIN (in_ready_o),
|
||||
`UNUSED_PIN (in_ready_o),
|
||||
.flush_i (reset),
|
||||
.result_o (fpu_result[i]),
|
||||
.status_o (fpu_status[i]),
|
||||
`UNUSED_PIN (tag_o),
|
||||
`UNUSED_PIN (out_valid_o),
|
||||
`UNUSED_PIN (tag_o),
|
||||
`UNUSED_PIN (out_valid_o),
|
||||
.out_ready_i (fpu_ready_out),
|
||||
`UNUSED_PIN (busy_o)
|
||||
`UNUSED_PIN (busy_o)
|
||||
);
|
||||
end
|
||||
end
|
||||
|
|
20
hw/rtl/interfaces/VX_perf_cache_if.v
Normal file
20
hw/rtl/interfaces/VX_perf_cache_if.v
Normal file
|
@ -0,0 +1,20 @@
|
|||
`ifndef VX_PERF_CACHE_IF
|
||||
`define VX_PERF_CACHE_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_cache_if ();
|
||||
|
||||
wire [63:0] reads;
|
||||
wire [63:0] writes;
|
||||
wire [63:0] read_misses;
|
||||
wire [63:0] write_misses;
|
||||
wire [63:0] evictions;
|
||||
wire [63:0] mshr_stalls;
|
||||
wire [63:0] crsp_stalls;
|
||||
wire [63:0] dreq_stalls;
|
||||
wire [63:0] pipe_stalls;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
17
hw/rtl/interfaces/VX_perf_memsys_if.v
Normal file
17
hw/rtl/interfaces/VX_perf_memsys_if.v
Normal file
|
@ -0,0 +1,17 @@
|
|||
`ifndef VX_PERF_MEMSYS_IF
|
||||
`define VX_PERF_MEMSYS_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_memsys_if ();
|
||||
|
||||
VX_perf_cache_if dcache_if;
|
||||
VX_perf_cache_if icache_if;
|
||||
|
||||
wire [63:0] dram_latency;
|
||||
wire [63:0] dram_requests;
|
||||
wire [63:0] dram_responses;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
25
hw/rtl/interfaces/VX_perf_pipeline_if.v
Normal file
25
hw/rtl/interfaces/VX_perf_pipeline_if.v
Normal file
|
@ -0,0 +1,25 @@
|
|||
`ifndef VX_PERF_PIPELINE_IF
|
||||
`define VX_PERF_PIPELINE_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_pipeline_if ();
|
||||
// from pipeline
|
||||
wire [63:0] icache_stalls;
|
||||
wire [63:0] ibuffer_stalls;
|
||||
// from issue
|
||||
wire [63:0] scoreboard_stalls;
|
||||
// from execute
|
||||
wire [63:0] lsu_stalls;
|
||||
wire [63:0] csr_stalls;
|
||||
wire [63:0] alu_stalls;
|
||||
wire [63:0] gpu_stalls;
|
||||
`ifdef EXT_M_ENABLE
|
||||
wire [63:0] mul_stalls;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [63:0] fpu_stalls;
|
||||
`endif
|
||||
endinterface
|
||||
|
||||
`endif
|
Loading…
Add table
Add a link
Reference in a new issue