mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
runtime perf counter
This commit is contained in:
parent
6c1b08f45d
commit
500afe661e
10 changed files with 95 additions and 32 deletions
|
@ -326,7 +326,7 @@
|
|||
|
||||
// Number of raster units
|
||||
`ifndef NUM_RASTER_UNITS
|
||||
`define NUM_RASTER_UNITS 1
|
||||
`define NUM_RASTER_UNITS `UP(`NUM_CORES / 16)
|
||||
`endif
|
||||
|
||||
// RASTER memory pending size
|
||||
|
@ -363,7 +363,7 @@
|
|||
|
||||
// Number of rop units
|
||||
`ifndef NUM_ROP_UNITS
|
||||
`define NUM_ROP_UNITS 1
|
||||
`define NUM_ROP_UNITS `UP(`NUM_CORES / 16)
|
||||
`endif
|
||||
|
||||
// ROP memory pending size
|
||||
|
@ -712,7 +712,7 @@
|
|||
|
||||
// Size of cache in bytes
|
||||
`ifndef L2_CACHE_SIZE
|
||||
`define L2_CACHE_SIZE 1048576
|
||||
`define L2_CACHE_SIZE 2097152
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
|
|
|
@ -27,7 +27,7 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
|
|||
|
||||
DBG_FLAGS += $(DBG_TRACE_FLAGS)
|
||||
|
||||
#CONFIGS += -DEXT_GFX_ENABLE
|
||||
CONFIGS += -DEXT_GFX_ENABLE
|
||||
|
||||
#CONFIGS += -DL1_DISABLE
|
||||
#CONFIGS += -DSM_DISABLE
|
||||
|
|
12
perf/cache/perf.sh
vendored
12
perf/cache/perf.sh
vendored
|
@ -10,17 +10,17 @@ sgemm()
|
|||
{
|
||||
echo "begin cache tests"
|
||||
|
||||
CONFIGS="-DICACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' > ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DICACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' > ./perf/cache/cache_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DDCACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DDCACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DICACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DICACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
|
||||
echo "cache tests done!"
|
||||
}
|
||||
|
|
|
@ -1,27 +1,23 @@
|
|||
#!/bin/bash
|
||||
|
||||
LOG=./perf/draw3d/draw3d_perf.log
|
||||
declare -a traces=(vase filmtv skybox coverflow evilskull polybump tekkaman carnival)
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
# ensure build
|
||||
make -s
|
||||
WIDTH=1920
|
||||
HEIGHT=1080
|
||||
|
||||
LOG_FILE=./perf/draw3d/perf_${DEVICE_FAMILY}_${WIDTH}_${HEIGHT}.log
|
||||
|
||||
declare -a traces=(vase filmtv skybox coverflow evilskull polybump tekkaman carnival)
|
||||
|
||||
# draw3d benchmarks
|
||||
draw3d(){
|
||||
echo > $LOG # clear log
|
||||
for TRACE in "${traces[@]}"
|
||||
echo > $LOG_FILE # clear log
|
||||
for trace in "${traces[@]}"
|
||||
do
|
||||
echo -e "\n**************************************\n" >> $LOG
|
||||
echo -e "draw3d $TRACE benchmark\n" >> $LOG
|
||||
if [ $ALL = true ]
|
||||
then
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-t$TRACE.cgltrace -w512 -h512" >> $LOG
|
||||
else
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-t$TRACE.cgltrace -w512 -h512" | grep 'PERF' >> $LOG
|
||||
fi
|
||||
echo -e "\n**************************************\n" >> $LOG_FILE
|
||||
echo -e "draw3d $trace benchmark\n" >> $LOG_FILE
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=fpga --app=draw3d --args="-t$trace.cgltrace -w${WIDTH} -h${HEIGHT}" | grep 'Total elapsed time:' >> $LOG_FILE
|
||||
done
|
||||
echo "draw3d tests done!"
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ rtlsim()
|
|||
for i in 1 4 16
|
||||
do
|
||||
echo "NUM_CORES = " $i >> $LOG
|
||||
CONFIGS="-DEXT_ROP_ENABLE" ./ci/blackbox.sh --driver=rtlsim --cores=$i --warps=4 --threads=4 --app=rop --args="-w128 -h128 -b -d" --perf=4 | grep 'PERF' >> $LOG
|
||||
CONFIGS="-DEXT_ROP_ENABLE" ./ci/blackbox.sh --driver=rtlsim --cores=$i --warps=4 --threads=4 --app=rop --args="-w128 -h128 -b -d" --perf=4 | grep 'PERF\|Total elapsed time' >> $LOG
|
||||
echo "**************************************" >> $LOG
|
||||
done
|
||||
|
||||
|
|
|
@ -535,7 +535,62 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
// release allocated resources
|
||||
vx_buf_free(staging_buf);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_perf_counter(vx_device_h device, int counter, int core_id, uint64_t* value) {
|
||||
int ret = 0;
|
||||
|
||||
uint64_t num_cores;
|
||||
ret = vx_dev_caps(device, VX_CAPS_MAX_CORES, &num_cores);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
if (core_id >= (int)num_cores) {
|
||||
std::cout << "error: core_id out of range" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint64_t mpm_mem_size = 64 * sizeof(uint32_t);
|
||||
|
||||
vx_buffer_h staging_buf;
|
||||
ret = vx_buf_alloc(device, mpm_mem_size, &staging_buf);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
auto staging_ptr = (uint32_t*)vx_host_ptr(staging_buf);
|
||||
|
||||
uint64_t _value = 0;
|
||||
|
||||
unsigned i = 0;
|
||||
if (core_id != -1) {
|
||||
i = core_id;
|
||||
num_cores = core_id + 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_cores; ++i) {
|
||||
uint64_t mpm_mem_addr = IO_CSR_ADDR + i * mpm_mem_size;
|
||||
ret = vx_copy_from_dev(staging_buf, mpm_mem_addr, mpm_mem_size, 0);
|
||||
if (ret != 0) {
|
||||
vx_buf_free(staging_buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto per_core_value = get_csr_64(staging_ptr, counter);
|
||||
if (counter == CSR_MCYCLE) {
|
||||
_value = std::max<uint64_t>(per_core_value, _value);
|
||||
} else {
|
||||
_value += per_core_value;
|
||||
}
|
||||
}
|
||||
|
||||
// release allocated resources
|
||||
vx_buf_free(staging_buf);
|
||||
|
||||
// output
|
||||
*value = _value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Deprecated API functions
|
||||
|
|
|
@ -91,8 +91,9 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint64_t siz
|
|||
// upload kernel file to device
|
||||
int vx_upload_kernel_file(vx_device_h device, const char* filename);
|
||||
|
||||
// dump performance counters
|
||||
// performance counters
|
||||
int vx_dump_perf(vx_device_h device, FILE* stream);
|
||||
int vx_perf_counter(vx_device_h device, int counter, int core_id, uint64_t* value);
|
||||
|
||||
//////////////////////////// DEPRECATED FUNCTIONS /////////////////////////////
|
||||
int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr);
|
||||
|
|
|
@ -137,6 +137,9 @@ int render(const CGLTrace& trace) {
|
|||
|
||||
uint32_t draw_idx = 0;
|
||||
|
||||
uint64_t instrs = 0;
|
||||
uint64_t cycles = 0;
|
||||
|
||||
// render each draw call
|
||||
for (auto& drawcall : trace.drawcalls) {
|
||||
auto& states = drawcall.states;
|
||||
|
@ -344,9 +347,16 @@ int render(const CGLTrace& trace) {
|
|||
printf("Elapsed time: %lg ms\n", elapsed);
|
||||
|
||||
if (draw_idx < trace.drawcalls.size()-1) {
|
||||
vx_dump_perf(device, stdout);
|
||||
vx_dump_perf(device, stdout);
|
||||
}
|
||||
|
||||
uint64_t instrs_;
|
||||
uint64_t cycles_;
|
||||
RT_CHECK(vx_perf_counter(device, CSR_MCYCLE, -1, &cycles_));
|
||||
RT_CHECK(vx_perf_counter(device, CSR_MINSTRET, -1, &instrs_));
|
||||
cycles += cycles_;
|
||||
instrs += instrs_;
|
||||
|
||||
++draw_idx;
|
||||
}
|
||||
|
||||
|
@ -364,7 +374,8 @@ int render(const CGLTrace& trace) {
|
|||
|
||||
auto time_end = std::chrono::high_resolution_clock::now();
|
||||
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_begin).count();
|
||||
printf("Total elapsed time: %lg ms\n", elapsed);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
printf("Total elapsed time: %lg ms, instrs=%ld, cycles=%ld, IPC=%f\n", elapsed, instrs, cycles, IPC);
|
||||
|
||||
// save output image
|
||||
std::cout << "save output image" << std::endl;
|
||||
|
|
|
@ -33,7 +33,7 @@ CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors
|
|||
|
||||
CXXFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_KN_PATH)/../hw -I$(VORTEX_KN_PATH)/../sim/common -I$(VORTEX_KN_PATH)/../third_party
|
||||
|
||||
LDFLAGS += -L$(VORTEX_RT_PATH)/stub -lvortex $(VORTEX_KN_PATH)/../third_party/cocogfx/libcocogfx.a -lpng -lz -lboost_serialization
|
||||
LDFLAGS += -L/homes/tinebp/tools/boost/lib -L$(VORTEX_RT_PATH)/stub -lvortex $(VORTEX_KN_PATH)/../third_party/cocogfx/libcocogfx.a -lpng -lz -lboost_serialization
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
|
|
@ -22,7 +22,7 @@ CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
|||
|
||||
CXXFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_KN_PATH)/../hw -I$(VORTEX_KN_PATH)/../sim/common -I$(VORTEX_KN_PATH)/../third_party
|
||||
|
||||
LDFLAGS += -L$(VORTEX_RT_PATH)/stub -lvortex $(VORTEX_KN_PATH)/../third_party/cocogfx/libcocogfx.a -lpng -lz
|
||||
LDFLAGS += -L/homes/tinebp/tools/boost/lib -L$(VORTEX_RT_PATH)/stub -lvortex $(VORTEX_KN_PATH)/../third_party/cocogfx/libcocogfx.a -lpng -lz
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue