mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
281 lines
6.4 KiB
C
281 lines
6.4 KiB
C
#define __STDC_FORMAT_MACROS
|
|
#include <inttypes.h>
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include <assert.h>
|
|
#include <sys/types.h>
|
|
#include <dirent.h>
|
|
#include "perfmon.h"
|
|
|
|
static const char* mxpa_profile_log = "mxpa_profile_%d.log";
|
|
static int enabled = 0;
|
|
|
|
// quartet's CPU supports these. Check check_events at libpfm4
|
|
// to make this adaptable.
|
|
static const char *gen_events_all[]={
|
|
"snb_ep::L3_LAT_CACHE:MISS",
|
|
"snb_ep::L3_LAT_CACHE:REFERENCE",
|
|
|
|
"snb_ep::L2_RQSTS:ALL_DEMAND_DATA_RD",
|
|
"snb_ep::L2_RQSTS:ALL_DEMAND_RD_HIT",
|
|
|
|
"perf::PERF_COUNT_HW_CACHE_L1D:ACCESS",
|
|
"perf::PERF_COUNT_HW_CACHE_L1D:MISS",
|
|
|
|
"perf::PERF_COUNT_HW_CACHE_L1D:PREFETCH",
|
|
"perf::L1-DCACHE-PREFETCH-MISSES",
|
|
|
|
"perf::PERF_COUNT_HW_CACHE_L1I:READ",
|
|
"perf::PERF_COUNT_HW_CACHE_L1I:MISS",
|
|
|
|
"perf::ITLB-LOADS",
|
|
"perf::ITLB-LOAD-MISSES",
|
|
"perf::DTLB-LOADS",
|
|
"perf::DTLB-LOAD-MISSES",
|
|
"perf::CONTEXT-SWITCHES",
|
|
"perf::CPU-MIGRATIONS",
|
|
"perf::CYCLES",
|
|
"snb_ep::RESOURCE_STALLS:ANY",
|
|
|
|
"perf::INSTRUCTIONS",
|
|
"perf::BRANCH-INSTRUCTIONS",
|
|
"perf::BRANCHES",
|
|
"perf::BRANCH-MISSES",
|
|
NULL
|
|
};
|
|
|
|
#define NUM_MAX_THREAD 256
|
|
|
|
static perf_event_desc_t *g_fds[NUM_MAX_THREAD];
|
|
static int g_nthreads;
|
|
static int num_fds = 0;
|
|
|
|
/* note: unsafe for multithreading */
|
|
static uint64_t* begins;
|
|
|
|
static void
|
|
fetch_counts(perf_event_desc_t *fds, int num_fds)
|
|
{
|
|
if (begins == 0) {
|
|
begins = (uint64_t*) malloc(num_fds * sizeof(uint64_t));
|
|
memset(begins, 0, num_fds * sizeof(uint64_t));
|
|
}
|
|
|
|
uint64_t val;
|
|
uint64_t values[3];
|
|
double ratio;
|
|
int i;
|
|
ssize_t ret;
|
|
|
|
/*
|
|
* now read the results. We use pfp_event_count because
|
|
* libpfm guarantees that counters for the events always
|
|
* come first.
|
|
*/
|
|
memset(values, 0, sizeof(values));
|
|
|
|
for (i = 0; i < num_fds; i++) {
|
|
ret = read(fds[i].fd, values, sizeof(values));
|
|
if (ret < (ssize_t)sizeof(values)) {
|
|
if (ret == -1)
|
|
fprintf(stderr, "cannot read results: %s", strerror(errno));
|
|
else
|
|
warnx("could not read event%d", i);
|
|
}
|
|
/*
|
|
* scaling is systematic because we may be sharing the PMU and
|
|
* thus may be multiplexed
|
|
*/
|
|
int valid = 0;
|
|
val = perf_scale_valid(values, &valid);
|
|
if (valid == 0) printf ("@i=%d, v0=%llu, v1=%llu, v2=%llu, val=%llu\n", i, values[0], values[1], values[2], val);
|
|
ratio = perf_scale_ratio(values);
|
|
|
|
begins[i] = val;
|
|
}
|
|
}
|
|
|
|
static void
|
|
print_counts(perf_event_desc_t *fds, int num_fds, const char *msg, FILE* fp)
|
|
{
|
|
uint64_t val;
|
|
uint64_t values[3];
|
|
double ratio;
|
|
int i;
|
|
ssize_t ret;
|
|
|
|
#if 0
|
|
fprintf(fp, "%s ------------------------------------\n", msg);
|
|
#else
|
|
fprintf(fp, "method=%s", msg);
|
|
#endif
|
|
|
|
/*
|
|
* now read the results. We use pfp_event_count because
|
|
* libpfm guarantees that counters for the events always
|
|
* come first.
|
|
*/
|
|
memset(values, 0, sizeof(values));
|
|
|
|
for (i = 0; i < num_fds; i++) {
|
|
|
|
ret = read(fds[i].fd, values, sizeof(values));
|
|
if (ret < (ssize_t)sizeof(values)) {
|
|
if (ret == -1)
|
|
fprintf(stderr, "cannot read results: %s", strerror(errno));
|
|
else
|
|
warnx("could not read event%d", i);
|
|
}
|
|
/*
|
|
* scaling is systematic because we may be sharing the PMU and
|
|
* thus may be multiplexed
|
|
*/
|
|
int valid;
|
|
val = perf_scale_valid(values, &valid);
|
|
if (valid == 0) printf ("!i=%d, v0=%llu, v1=%llu, v2=%llu, val=%llu\n", i, values[0], values[1], values[2], val);
|
|
ratio = perf_scale_ratio(values);
|
|
|
|
#if 0
|
|
fprintf(fp, "%s %'20"PRIu64" %s (%.2f%% scaling, raw=%'"PRIu64", ena=%'"PRIu64", run=%'"PRIu64")\n",
|
|
"-", // msg,
|
|
val,
|
|
fds[i].name,
|
|
(1.0-ratio)*100.0,
|
|
values[0],
|
|
values[1],
|
|
values[2]);
|
|
#else
|
|
fprintf (fp, " %s=%llu", fds[i].name, val); // valid ? val : 0);
|
|
#endif
|
|
}
|
|
fprintf (fp, "\n");
|
|
}
|
|
|
|
FILE* open_log_file(char* fname) {
|
|
FILE* fp;
|
|
if (fp = fopen(fname, "r")) {
|
|
fclose(fp);
|
|
return fopen(fname, "a");
|
|
}
|
|
fp = fopen(fname, "a");
|
|
return fp;
|
|
}
|
|
|
|
void perf_init() {
|
|
static int init = 0;
|
|
|
|
if (init) return;
|
|
init = 1;
|
|
|
|
char* prof_envvar = getenv("MXPA_PROFILE");
|
|
if (prof_envvar) {
|
|
enabled = 1;
|
|
} else {
|
|
return;
|
|
}
|
|
|
|
pfm_initialize();
|
|
}
|
|
|
|
static void get_tids(int* tids, int* number) {
|
|
char path[32];
|
|
int pid = getpid();
|
|
sprintf (path, "/proc/%d/task", pid);
|
|
struct dirent *de=NULL;
|
|
DIR *d=NULL;
|
|
d=opendir(path);
|
|
assert(d != NULL && "Null for opendir");
|
|
// Loop while not NULL
|
|
char pid_str[8];
|
|
char last[8];
|
|
sprintf (pid_str, "%d", pid);
|
|
int n = 0;
|
|
while(de = readdir(d)) {
|
|
if (!strcmp(de->d_name, ".")) continue;
|
|
if (!strcmp(de->d_name, "..")) continue;
|
|
if (!strcmp(de->d_name, pid_str)) continue;
|
|
*tids++ = atoi(de->d_name);
|
|
n++;
|
|
}
|
|
*number = n;
|
|
// printf ("Sampling thread %d\n", tid);
|
|
closedir(d);
|
|
}
|
|
|
|
void perf_start(const char* kname) {
|
|
if (!enabled) return;
|
|
|
|
char* prof_envvar = getenv("MXPA_PROFILE");
|
|
|
|
int tids[32];
|
|
int ntid;
|
|
get_tids(tids, &ntid);
|
|
g_nthreads = ntid;
|
|
|
|
int n;
|
|
for (n = 0; n < ntid; n++) {
|
|
int ret;
|
|
ret = perf_setup_list_events(prof_envvar, &(g_fds[n]), &num_fds);
|
|
perf_event_desc_t *fds = g_fds[n];
|
|
int cpu = -1;
|
|
int group_fd = -1;
|
|
int pid = tids[n];
|
|
fds[0].fd = -1;
|
|
int i;
|
|
for(i=0; i < num_fds; i++) {
|
|
fds[i].hw.read_format = PERF_FORMAT_SCALE;
|
|
fds[i].hw.disabled = 1; /* do not start now */
|
|
fds[i].hw.inherit = 1; /* XXX child process will inherit, when forked only? */
|
|
|
|
/* each event is in an independent group (multiplexing likely) */
|
|
fds[i].fd = perf_event_open(&fds[i].hw, pid, cpu, group_fd, 0);
|
|
if (fds[i].fd == -1) {
|
|
fprintf(stderr, "cannot open event %d\n", i);
|
|
exit(2);
|
|
}
|
|
}
|
|
}
|
|
prctl(PR_TASK_PERF_EVENTS_ENABLE);
|
|
}
|
|
|
|
void perf_end(const char* kname) {
|
|
if (!enabled) return;
|
|
int i, n;
|
|
prctl(PR_TASK_PERF_EVENTS_DISABLE);
|
|
|
|
static int first_time = 1;
|
|
if (first_time) {
|
|
first_time = 0;
|
|
char name[128];
|
|
for (n = 0; n < g_nthreads; n++) {
|
|
sprintf (name, mxpa_profile_log, n);
|
|
FILE* fp = fopen(name, "w");
|
|
fclose(fp);
|
|
}
|
|
}
|
|
|
|
char name[128];
|
|
for (n = 0; n < g_nthreads; n++) {
|
|
sprintf (name, mxpa_profile_log, n);
|
|
FILE* fp = open_log_file(name);
|
|
perf_event_desc_t *fds = g_fds[n];
|
|
print_counts(fds, num_fds, kname, fp);
|
|
for (i = 0; i < num_fds; i++) close(fds[i].fd);
|
|
perf_free_fds(fds, num_fds);
|
|
g_fds[n] = fds = NULL;
|
|
fclose(fp);
|
|
}
|
|
}
|
|
|
|
void pin_trace_enable(char* n) {
|
|
perf_start((const char*)n);
|
|
}
|
|
|
|
void pin_trace_disable(char* n) {
|
|
perf_end((const char*)n);
|
|
}
|
|
|
|
|