vortex/tests/opencl/cutcp/parboil_opencl.c
2021-06-13 17:42:04 -07:00

1394 lines
40 KiB
C

/*
* (c) 2007 The Board of Trustees of the University of Illinois.
*/
#include <parboil.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include <CL/cl.h>
#if _POSIX_VERSION >= 200112L
# include <sys/time.h>
#endif
//#include "perfmon.h"
cl_context *clContextPtr;
cl_command_queue *clCommandQueuePtr;
// #define DISABLE_PARBOIL_TIMER
/*****************************************************************************/
/* Timer routines */
static int is_async(enum pb_TimerID timer)
{
return (timer == pb_TimerID_KERNEL) ||
(timer == pb_TimerID_COPY_ASYNC);
}
static int is_blocking(enum pb_TimerID timer)
{
return (timer == pb_TimerID_COPY) || (timer == pb_TimerID_NONE);
}
#define INVALID_TIMERID pb_TimerID_LAST
static int asyncs_outstanding(struct pb_TimerSet* timers)
{
return (timers->async_markers != NULL) &&
(timers->async_markers->timerID != INVALID_TIMERID);
}
static struct pb_async_time_marker_list *
get_last_async(struct pb_TimerSet* timers)
{
/* Find the last event recorded thus far */
struct pb_async_time_marker_list * last_event = timers->async_markers;
if(last_event != NULL && last_event->timerID != INVALID_TIMERID) {
while(last_event->next != NULL &&
last_event->next->timerID != INVALID_TIMERID)
last_event = last_event->next;
return last_event;
} else
return NULL;
}
static void insert_marker(struct pb_TimerSet* tset, enum pb_TimerID timer)
{
cl_int ciErrNum = CL_SUCCESS;
struct pb_async_time_marker_list ** new_event = &(tset->async_markers);
while(*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) {
new_event = &((*new_event)->next);
}
if(*new_event == NULL) {
*new_event = (struct pb_async_time_marker_list *)
malloc(sizeof(struct pb_async_time_marker_list));
(*new_event)->marker = calloc(1, sizeof(cl_event));
/*
// I don't think this is needed at all. I believe clEnqueueMarker 'creates' the event
#if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 )
fprintf(stderr, "Creating Marker [%d]\n", timer);
*((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr, &ciErrNum);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Creating User Event Object!\n");
}
ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)), CL_QUEUED);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Setting User Event Status!\n");
}
#endif
*/
(*new_event)->next = NULL;
}
/* valid event handle now aquired: insert the event record */
(*new_event)->label = NULL;
(*new_event)->timerID = timer;
ciErrNum = clEnqueueMarker(*clCommandQueuePtr, (cl_event *)(*new_event)->marker);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Enqueueing Marker!\n");
}
}
static void insert_submarker(struct pb_TimerSet* tset, char *label, enum pb_TimerID timer)
{
cl_int ciErrNum = CL_SUCCESS;
struct pb_async_time_marker_list ** new_event = &(tset->async_markers);
while(*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) {
new_event = &((*new_event)->next);
}
if(*new_event == NULL) {
*new_event = (struct pb_async_time_marker_list *)
malloc(sizeof(struct pb_async_time_marker_list));
(*new_event)->marker = calloc(1, sizeof(cl_event));
/*
#if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 )
fprintf(stderr, "Creating SubMarker %s[%d]\n", label, timer);
*((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr, &ciErrNum);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Creating User Event Object!\n");
}
ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)), CL_QUEUED);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Setting User Event Status!\n");
}
#endif
*/
(*new_event)->next = NULL;
}
/* valid event handle now aquired: insert the event record */
(*new_event)->label = label;
(*new_event)->timerID = timer;
ciErrNum = clEnqueueMarker(*clCommandQueuePtr, (cl_event *)(*new_event)->marker);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Enqueueing Marker!\n");
}
}
/* Assumes that all recorded events have completed */
static pb_Timestamp record_async_times(struct pb_TimerSet* tset)
{
struct pb_async_time_marker_list * next_interval = NULL;
struct pb_async_time_marker_list * last_marker = get_last_async(tset);
pb_Timestamp total_async_time = 0;
enum pb_TimerID timer;
for(next_interval = tset->async_markers; next_interval != last_marker;
next_interval = next_interval->next) {
cl_ulong command_start=0, command_end=0;
cl_int ciErrNum = CL_SUCCESS;
ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->marker), CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &command_start, NULL);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error getting first EventProfilingInfo: %d\n", ciErrNum);
}
ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->next->marker), CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &command_end, NULL);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error getting second EventProfilingInfo: %d\n", ciErrNum);
}
pb_Timestamp interval = (pb_Timestamp) (((double)(command_end - command_start)) / 1e3);
tset->timers[next_interval->timerID].elapsed += interval;
if (next_interval->label != NULL) {
struct pb_SubTimer *subtimer = tset->sub_timer_list[next_interval->timerID]->subtimer_list;
while (subtimer != NULL) {
if ( strcmp(subtimer->label, next_interval->label) == 0) {
subtimer->timer.elapsed += interval;
break;
}
subtimer = subtimer->next;
}
}
total_async_time += interval;
next_interval->timerID = INVALID_TIMERID;
}
if(next_interval != NULL)
next_interval->timerID = INVALID_TIMERID;
return total_async_time;
}
static void
accumulate_time(pb_Timestamp *accum,
pb_Timestamp start,
pb_Timestamp end)
{
//#if _POSIX_VERSION >= 200112L
*accum += end - start;
//#else
//# error "Timestamps not implemented for this system"
//#endif
}
//#if _POSIX_VERSION >= 200112L
static pb_Timestamp get_time()
{
//struct timeval tv;
//gettimeofday(&tv, NULL);
//return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec);
return 0;
}
//#else
//# error "no supported time libraries are available on this platform"
//#endif
void
pb_ResetTimer(struct pb_Timer *timer)
{
//#ifndef DISABLE_PARBOIL_TIMER
timer->state = pb_Timer_STOPPED;
//#if _POSIX_VERSION >= 200112L
timer->elapsed = 0;
//#else
//# error "pb_ResetTimer: not implemented for this system"
//#endif
//#endif
}
void
pb_StartTimer(struct pb_Timer *timer)
{
/*#ifndef DISABLE_PARBOIL_TIMER
if (timer->state != pb_Timer_STOPPED) {
fputs("Ignoring attempt to start a running timer\n", stderr);
return;
}
timer->state = pb_Timer_RUNNING;
#if _POSIX_VERSION >= 200112L
{
struct timeval tv;
gettimeofday(&tv, NULL);
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
}
#else
# error "pb_StartTimer: not implemented for this system"
#endif
#endif*/
}
void
pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer)
{
/*#ifndef DISABLE_PARBOIL_TIMER
unsigned int numNotStopped = 0x3; // 11
if (timer->state != pb_Timer_STOPPED) {
fputs("Warning: Timer was not stopped\n", stderr);
numNotStopped &= 0x1; // Zero out 2^1
}
if (subtimer->state != pb_Timer_STOPPED) {
fputs("Warning: Subtimer was not stopped\n", stderr);
numNotStopped &= 0x2; // Zero out 2^0
}
if (numNotStopped == 0x0) {
fputs("Ignoring attempt to start running timer and subtimer\n", stderr);
return;
}
timer->state = pb_Timer_RUNNING;
subtimer->state = pb_Timer_RUNNING;
#if _POSIX_VERSION >= 200112L
{
struct timeval tv;
gettimeofday(&tv, NULL);
if (numNotStopped & 0x2) {
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
}
if (numNotStopped & 0x1) {
subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
}
}
#else
# error "pb_StartTimer: not implemented for this system"
#endif
#endif*/
}
void
pb_StopTimer(struct pb_Timer *timer)
{
/*#ifndef DISABLE_PARBOIL_TIMER
pb_Timestamp fini;
if (timer->state != pb_Timer_RUNNING) {
fputs("Ignoring attempt to stop a stopped timer\n", stderr);
return;
}
timer->state = pb_Timer_STOPPED;
#if _POSIX_VERSION >= 200112L
{
struct timeval tv;
gettimeofday(&tv, NULL);
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
}
#else
# error "pb_StopTimer: not implemented for this system"
#endif
accumulate_time(&timer->elapsed, timer->init, fini);
timer->init = fini;
#endif*/
}
void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) {
/*#ifndef DISABLE_PARBOIL_TIMER
pb_Timestamp fini;
unsigned int numNotRunning = 0x3; // 11
if (timer->state != pb_Timer_RUNNING) {
fputs("Warning: Timer was not running\n", stderr);
numNotRunning &= 0x1; // Zero out 2^1
}
if (subtimer->state != pb_Timer_RUNNING) {
fputs("Warning: Subtimer was not running\n", stderr);
numNotRunning &= 0x2; // Zero out 2^0
}
if (numNotRunning == 0x0) {
fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr);
return;
}
timer->state = pb_Timer_STOPPED;
subtimer->state = pb_Timer_STOPPED;
#if _POSIX_VERSION >= 200112L
{
struct timeval tv;
gettimeofday(&tv, NULL);
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
}
#else
# error "pb_StopTimer: not implemented for this system"
#endif
if (numNotRunning & 0x2) {
accumulate_time(&timer->elapsed, timer->init, fini);
timer->init = fini;
}
if (numNotRunning & 0x1) {
accumulate_time(&subtimer->elapsed, subtimer->init, fini);
subtimer->init = fini;
}
#endif*/
}
/* Get the elapsed time in seconds. */
double
pb_GetElapsedTime(struct pb_Timer *timer)
{
/*double ret;
#ifndef DISABLE_PARBOIL_TIMER
if (timer->state != pb_Timer_STOPPED) {
fputs("Elapsed time from a running timer is inaccurate\n", stderr);
}
#if _POSIX_VERSION >= 200112L
ret = timer->elapsed / 1e6;
#else
# error "pb_GetElapsedTime: not implemented for this system"
#endif
#endif
return ret;*/
return 0;
}
void
pb_InitializeTimerSet(struct pb_TimerSet *timers)
{
/*#ifndef DISABLE_PARBOIL_TIMER
int n;
timers->wall_begin = 0; //get_time();
timers->current = pb_TimerID_NONE;
timers->async_markers = NULL;
for (n = 0; n < pb_TimerID_LAST; n++) {
pb_ResetTimer(&timers->timers[n]);
timers->sub_timer_list[n] = NULL;
}
#endif*/
}
void pb_SetOpenCL(void *p_clContextPtr, void *p_clCommandQueuePtr) {
clContextPtr = ((cl_context *)p_clContextPtr);
clCommandQueuePtr = ((cl_command_queue *)p_clCommandQueuePtr);
}
void
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) {
/*#ifndef DISABLE_PARBOIL_TIMER
struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc
(sizeof(struct pb_SubTimer));
int len = strlen(label);
subtimer->label = (char *) malloc (sizeof(char)*(len+1));
sprintf(subtimer->label, "%s\0", label);
pb_ResetTimer(&subtimer->timer);
subtimer->next = NULL;
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category];
if (subtimerlist == NULL) {
subtimerlist = (struct pb_SubTimerList *) calloc
(1, sizeof(struct pb_SubTimerList));
subtimerlist->subtimer_list = subtimer;
timers->sub_timer_list[pb_Category] = subtimerlist;
} else {
// Append to list
struct pb_SubTimer *element = subtimerlist->subtimer_list;
while (element->next != NULL) {
element = element->next;
}
element->next = subtimer;
}
#endif*/
}
void
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer)
{
#if 0
#ifndef DISABLE_PARBOIL_TIMER
/* Stop the currently running timer */
if (timers->current != pb_TimerID_NONE) {
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
struct pb_SubTimer *currSubTimer = (subtimerlist != NULL) ? subtimerlist->current : NULL;
if (!is_async(timers->current) ) {
if (timers->current != timer) {
if (currSubTimer != NULL) {
pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer);
} else {
pb_StopTimer(&timers->timers[timers->current]);
}
} else {
if (currSubTimer != NULL) {
pb_StopTimer(&currSubTimer->timer);
}
}
} else {
insert_marker(timers, timer);
if (!is_async(timer)) { // if switching to async too, keep driver going
pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]);
}
}
}
pb_Timestamp currentTime = 0; //get_time();
/* The only cases we check for asynchronous task completion is
* when an overlapping CPU operation completes, or the next
* segment blocks on completion of previous async operations */
if( asyncs_outstanding(timers) &&
(!is_async(timers->current) || is_blocking(timer) ) ) {
struct pb_async_time_marker_list * last_event = get_last_async(timers);
/* CL_COMPLETE if completed */
cl_int ciErrNum = CL_SUCCESS;
cl_int async_done = CL_COMPLETE;
ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &async_done, NULL);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Querying EventInfo!\n");
}
if(is_blocking(timer)) {
/* Async operations completed after previous CPU operations:
* overlapped time is the total CPU time since this set of async
* operations were first issued */
// timer to switch to is COPY or NONE
if(async_done != CL_COMPLETE) {
accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed),
timers->async_begin,currentTime);
}
/* Wait on async operation completion */
ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Waiting for Events!\n");
}
pb_Timestamp total_async_time = record_async_times(timers);
/* Async operations completed before previous CPU operations:
* overlapped time is the total async time */
if(async_done == CL_COMPLETE) {
//fprintf(stderr, "Async_done: total_async_type = %lld\n", total_async_time);
timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time;
}
} else
/* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
// i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are outstanding
// so something is deeper in stack
if(async_done == CL_COMPLETE ) {
/* Async operations completed before previous CPU operations:
* overlapped time is the total async time */
timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers);
}
}
/* Start the new timer */
if (timer != pb_TimerID_NONE) {
if(!is_async(timer)) {
pb_StartTimer(&timers->timers[timer]);
} else {
// toSwitchTo Is Async (KERNEL/COPY_ASYNC)
if (!asyncs_outstanding(timers)) {
/* No asyncs outstanding, insert a fresh async marker */
insert_marker(timers, timer);
timers->async_begin = currentTime;
} else if(!is_async(timers->current)) {
/* Previous asyncs still in flight, but a previous SwitchTo
* already marked the end of the most recent async operation,
* so we can rename that marker as the beginning of this async
* operation */
struct pb_async_time_marker_list * last_event = get_last_async(timers);
last_event->label = NULL;
last_event->timerID = timer;
}
if (!is_async(timers->current)) {
pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]);
}
}
}
timers->current = timer;
#endif
#endif
}
void
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category)
{
#if 0
#ifndef DISABLE_PARBOIL_TIMER
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
struct pb_SubTimer *curr = (subtimerlist != NULL) ? subtimerlist->current : NULL;
if (timers->current != pb_TimerID_NONE) {
if (!is_async(timers->current) ) {
if (timers->current != category) {
if (curr != NULL) {
pb_StopTimerAndSubTimer(&timers->timers[timers->current], &curr->timer);
} else {
pb_StopTimer(&timers->timers[timers->current]);
}
} else {
if (curr != NULL) {
pb_StopTimer(&curr->timer);
}
}
} else {
insert_submarker(timers, label, category);
if (!is_async(category)) { // if switching to async too, keep driver going
pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]);
}
}
}
pb_Timestamp currentTime = 0; //get_time();
/* The only cases we check for asynchronous task completion is
* when an overlapping CPU operation completes, or the next
* segment blocks on completion of previous async operations */
if( asyncs_outstanding(timers) &&
(!is_async(timers->current) || is_blocking(category) ) ) {
struct pb_async_time_marker_list * last_event = get_last_async(timers);
/* CL_COMPLETE if completed */
cl_int ciErrNum = CL_SUCCESS;
cl_int async_done = CL_COMPLETE;
ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &async_done, NULL);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Querying EventInfo!\n");
}
if(is_blocking(category)) {
/* Async operations completed after previous CPU operations:
* overlapped time is the total CPU time since this set of async
* operations were first issued */
// timer to switch to is COPY or NONE
// if it hasn't already finished, then just take now and use that as the elapsed time in OVERLAP
// anything happening after now isn't OVERLAP because everything is being stopped to wait for synchronization
// it seems that the extra sync wall time isn't being recorded anywhere
if(async_done != CL_COMPLETE)
accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed),
timers->async_begin,currentTime);
/* Wait on async operation completion */
ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Waiting for Events!\n");
}
pb_Timestamp total_async_time = record_async_times(timers);
/* Async operations completed before previous CPU operations:
* overlapped time is the total async time */
// If it did finish, then accumulate all the async time that did happen into OVERLAP
// the immediately preceding EventSynchronize theoretically didn't have any effect since it was already completed.
if(async_done == CL_COMPLETE /*cudaSuccess*/)
timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time;
} else
/* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
// i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are outstanding
// so something is deeper in stack
if(async_done == CL_COMPLETE /*cudaSuccess*/) {
/* Async operations completed before previous CPU operations:
* overlapped time is the total async time */
timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers);
}
// else, this isn't blocking, so just check the next time around
}
subtimerlist = timers->sub_timer_list[category];
struct pb_SubTimer *subtimer = NULL;
if (label != NULL) {
subtimer = subtimerlist->subtimer_list;
while (subtimer != NULL) {
if (strcmp(subtimer->label, label) == 0) {
break;
} else {
subtimer = subtimer->next;
}
}
}
/* Start the new timer */
if (category != pb_TimerID_NONE) {
if(!is_async(category)) {
if (subtimerlist != NULL) {
subtimerlist->current = subtimer;
}
if (category != timers->current && subtimer != NULL) {
pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer);
} else if (subtimer != NULL) {
pb_StartTimer(&subtimer->timer);
} else {
pb_StartTimer(&timers->timers[category]);
}
} else {
if (subtimerlist != NULL) {
subtimerlist->current = subtimer;
}
// toSwitchTo Is Async (KERNEL/COPY_ASYNC)
if (!asyncs_outstanding(timers)) {
/* No asyncs outstanding, insert a fresh async marker */
insert_submarker(timers, label, category);
timers->async_begin = currentTime;
} else if(!is_async(timers->current)) {
/* Previous asyncs still in flight, but a previous SwitchTo
* already marked the end of the most recent async operation,
* so we can rename that marker as the beginning of this async
* operation */
struct pb_async_time_marker_list * last_event = get_last_async(timers);
last_event->timerID = category;
last_event->label = label;
} // else, marker for switchToThis was already inserted
//toSwitchto is already asynchronous, but if current/prev state is async too, then DRIVER is already running
if (!is_async(timers->current)) {
pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]);
}
}
}
timers->current = category;
#endif
#endif
}
void
pb_PrintTimerSet(struct pb_TimerSet *timers)
{
#if 0
#ifndef DISABLE_PARBOIL_TIMER
pb_Timestamp wall_end = 0; //get_time();
struct pb_Timer *t = timers->timers;
struct pb_SubTimer* sub = NULL;
int maxSubLength;
const char *categories[] = {
"IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute"
};
const int maxCategoryLength = 10;
int i;
for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format
if(pb_GetElapsedTime(&t[i]) != 0) {
// Print Category Timer
printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i]));
if (timers->sub_timer_list[i] != NULL) {
sub = timers->sub_timer_list[i]->subtimer_list;
maxSubLength = 0;
while (sub != NULL) {
// Find longest SubTimer label
if (strlen(sub->label) > maxSubLength) {
maxSubLength = strlen(sub->label);
}
sub = sub->next;
}
// Fit to Categories
if (maxSubLength <= maxCategoryLength) {
maxSubLength = maxCategoryLength;
}
sub = timers->sub_timer_list[i]->subtimer_list;
// Print SubTimers
while (sub != NULL) {
printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer));
sub = sub->next;
}
}
}
}
if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0)
printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]));
float walltime = (wall_end - timers->wall_begin)/ 1e6;
printf("Timer Wall Time: %f\n", walltime);
#endif
#endif
}
void pb_DestroyTimerSet(struct pb_TimerSet * timers)
{
#ifndef DISABLE_PARBOIL_TIMER
/* clean up all of the async event markers */
struct pb_async_time_marker_list* event = timers->async_markers;
while(event != NULL) {
cl_int ciErrNum = CL_SUCCESS;
ciErrNum = clWaitForEvents(1, (cl_event *)(event)->marker);
if (ciErrNum != CL_SUCCESS) {
//fprintf(stderr, "Error Waiting for Events!\n");
}
ciErrNum = clReleaseEvent( *((cl_event *)(event)->marker) );
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Release Events!\n");
}
free((event)->marker);
struct pb_async_time_marker_list* next = ((event)->next);
free(event);
// (*event) = NULL;
event = next;
}
int i = 0;
for(i = 0; i < pb_TimerID_LAST; ++i) {
if (timers->sub_timer_list[i] != NULL) {
struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list;
struct pb_SubTimer *prev = NULL;
while (subtimer != NULL) {
free(subtimer->label);
prev = subtimer;
subtimer = subtimer->next;
free(prev);
}
free(timers->sub_timer_list[i]);
}
}
#endif
}
static pb_Platform** ptr = NULL;
// verbosely print out list of platforms and their devices to the console.
pb_Platform**
pb_GetPlatforms() {
if (ptr == NULL) {
cl_uint num_platforms;
clGetPlatformIDs(0, NULL, &num_platforms);
if (num_platforms == 0) return NULL;
ptr = (pb_Platform **) malloc(sizeof(pb_Platform *) * (num_platforms + 1));
cl_platform_id* ids = (cl_platform_id *) malloc(num_platforms * sizeof(cl_platform_id));
clGetPlatformIDs(num_platforms, ids, NULL);
unsigned int i;
for (i = 0; i < num_platforms; i++) {
ptr[i] = (pb_Platform *) malloc(sizeof(pb_Platform));
ptr[i]->clPlatform = ids[i];
ptr[i]->contexts = NULL;
ptr[i]->in_use = 0;
ptr[i]->devices = NULL;
size_t sz;
clGetPlatformInfo(ids[i], CL_PLATFORM_NAME, 0, NULL, &sz);
char* name = (char *) malloc(sz + 1);
clGetPlatformInfo(ids[i], CL_PLATFORM_NAME, sz, name, NULL);
name[sz] = '\0';
ptr[i]->name = name;
clGetPlatformInfo(ids[i], CL_PLATFORM_VERSION, 0, NULL, &sz);
char* version = (char *) malloc(sz + 1);
clGetPlatformInfo(ids[i], CL_PLATFORM_VERSION, sz, version, NULL);
version[sz] = '\0';
ptr[i]->version = version;
}
ptr[i] = NULL;
free(ids);
}
return (pb_Platform**) ptr;
}
pb_Context*
createContext(pb_Platform* pb_platform, pb_Device* pb_device) {
pb_Context* c = (pb_Context*) malloc(sizeof(pb_Context));
cl_int clStatus;
cl_context_properties clCps[3] = {
CL_CONTEXT_PLATFORM, (cl_context_properties)(pb_platform->clPlatform), 0
};
c->clContext =
clCreateContext(clCps, 1, (cl_device_id*)&pb_device->clDevice, NULL, NULL, &clStatus);
c->clPlatformId = pb_platform->clPlatform;
c->clDeviceId = pb_device->clDevice;
c->pb_platform = pb_platform;
c->pb_device = pb_device;
pb_platform->in_use = 1;
pb_device->in_use = 1;
unsigned int i = 0;
if (pb_platform->contexts == NULL) {
pb_platform->contexts = (pb_Context**) malloc(2*sizeof(pb_Context*));
} else {
for (i = 0; pb_platform->contexts[i] != NULL; i++) {};
pb_platform->contexts = (pb_Context**) realloc(pb_platform->contexts,
(i+1)*sizeof(pb_Context*));
}
pb_platform->contexts[i+1] = NULL;
pb_platform->contexts[i] = c;
return c;
}
// choose a platform by name.
pb_Platform*
pb_GetPlatformByName(const char* name) {
pb_Platform** ps = (pb_Platform **) pb_GetPlatforms();
if (ps == NULL) return NULL;
if (name == NULL) {
return *ps;
}
while (*ps) {
if (strstr((*ps)->name, name)) break;
ps++;
}
return (pb_Platform*) *ps;
}
pb_Device**
pb_GetDevices(pb_Platform* pb_platform) {
if (pb_platform->devices == NULL) {
cl_uint num_devs;
cl_device_id* dev_ids;
clGetDeviceIDs((cl_platform_id) pb_platform->clPlatform,
CL_DEVICE_TYPE_ALL, 0, NULL, &num_devs);
if (num_devs == 0) return NULL;
pb_platform->devices =
(pb_Device **) malloc((num_devs + 1) * sizeof(pb_Device *));
dev_ids = (cl_device_id *) malloc(sizeof(cl_device_id) * num_devs);
clGetDeviceIDs((cl_platform_id) pb_platform->clPlatform,
CL_DEVICE_TYPE_ALL, num_devs, dev_ids, NULL);
unsigned int i;
for (i = 0; i < num_devs; i++) {
pb_platform->devices[i] = (pb_Device *) malloc(sizeof(pb_Device));
pb_platform->devices[i]->clDevice = dev_ids[i];
pb_platform->devices[i]->id = i;
size_t sz;
clGetDeviceInfo(dev_ids[i], CL_DEVICE_NAME, 0, NULL, &sz);
char* name = (char *) malloc(sz + 1);
clGetDeviceInfo(dev_ids[i], CL_DEVICE_NAME, sz, name, NULL);
name[sz] = '\0';
pb_platform->devices[i]->name = (char *) name;
cl_bool available;
clGetDeviceInfo(dev_ids[i], CL_DEVICE_AVAILABLE, sizeof(cl_bool), &available, NULL);
pb_platform->devices[i]->available = (int) available;
pb_platform->devices[i]->in_use = 0;
}
pb_platform->devices[i] = NULL;
}
return (pb_Device **) pb_platform->devices;
}
// choose a device by name.
static pb_Device*
pb_SelectDeviceByName(pb_Device **ds, const char* name) {
if (ds == NULL) return NULL;
if (name == NULL) return *ds;
while (*ds) {
if (strstr((*ds)->name, name)) break;
ds++;
}
return *ds;
}
// choose a device by name and set the device's 'in_use' flag.
pb_Device*
pb_GetDeviceByName(pb_Platform* pb_platform, const char* name) {
pb_Device** ds = (pb_Device **) pb_GetDevices(pb_platform);
pb_Device *d = pb_SelectDeviceByName(ds, name);
if (d) d->in_use = 1;
return d;
}
void
pb_ReleasePlatforms() {
if (!ptr) return;
pb_Platform** cur_ptr = ptr;
while (*cur_ptr) {
pb_Platform* pfptr = *cur_ptr++;
if (pfptr->devices) {
pb_Device** dvptr = pfptr->devices;
while (*dvptr) {
pb_Device* d = *dvptr++;
free(d->name);
free(d);
}
free(pfptr->devices);
}
if (pfptr->contexts) {
pb_Context** cptr = pfptr->contexts;
while (*cptr) {
free(*cptr++);
}
free(pfptr->contexts);
}
free(pfptr->name);
free(pfptr);
}
free(ptr);
ptr = NULL;
}
pb_Platform*
pb_GetPlatformByNameAndVersion(const char* name, const char* version) {
pb_Platform** ps = (pb_Platform **) pb_GetPlatforms();
if (ps == NULL) return NULL;
if (name == NULL) return *ps;
while (*ps) {
if (strstr((*ps)->name, name) && strstr((*ps)->version, version)) break;
ps++;
}
return (pb_Platform*) *ps;
}
/* Return a pointer to the device at the specified index, or NULL.
* Used by pb_GetDevice. */
static pb_Device *
select_device_by_index(pb_Device** ds, int id)
{
int i = 0;
pb_Device** p = ds;
while (*p && (i < id)) { p++; i++; }
return *p;
}
/* Return a pointer to the device with the specified type, or NULL.
* Used by pb_GetDevice. */
static pb_Device *
select_device_by_type(pb_Device** ds,
enum pb_DeviceSelectionCriterion criterion)
{
cl_device_type sought_type;
/* Determine the OpenCL device type to search for */
switch(criterion) {
case pb_Device_CPU:
sought_type = CL_DEVICE_TYPE_CPU;
break;
case pb_Device_GPU:
sought_type = CL_DEVICE_TYPE_GPU;
break;
case pb_Device_ACCELERATOR:
sought_type = CL_DEVICE_TYPE_ACCELERATOR;
break;
default:
fprintf(stderr, "pb_GetDevice: Invalid device type");
exit(-1);
}
/* Find the device */
{
pb_Device** p = ds;
cl_device_type type;
while (*p) {
clGetDeviceInfo(((cl_device_id) ((*p)->clDevice)), CL_DEVICE_TYPE,
sizeof(cl_device_type), &type, NULL);
if (type == sought_type) break;
}
return *p;
}
}
pb_Device*
pb_GetDevice(pb_Platform* pb_platform, struct pb_DeviceParam *device)
{
pb_Device** ds = (pb_Device **) pb_GetDevices(pb_platform);
// The list of devices must be nonempty
if (ds == NULL || *ds == NULL) {
fprintf(stderr, "Error: No device is found in platform: name = %s, version = %s\n.", pb_platform->name, pb_platform->version);
exit(-1);
}
pb_Device *selected_device = NULL;
if (device != NULL) {
/* Use 'device' to select and return a device.
* If unable to select a device, fall
* back on the default selection mechanism. */
switch(device->criterion) {
case pb_Device_INDEX:
selected_device = select_device_by_index(ds, device->index);
break;
case pb_Device_GPU:
case pb_Device_CPU:
case pb_Device_ACCELERATOR:
selected_device = select_device_by_type(ds, device->criterion);
break;
case pb_Device_NAME:
selected_device = pb_SelectDeviceByName(ds, device->name);
break;
default:
fprintf(stderr, "pb_GetDevice: Invalid argument");
exit(-1);
}
}
/* By default or if user-specified selection failed,
* select the first device */
if (selected_device == NULL)
selected_device = *ds;
/* Set the in_use flag */
selected_device->in_use = 1;
return selected_device;
}
pb_Device*
pb_GetDeviceByEnvVars(pb_Platform* pb_platform) {
/* Convert environment variables to a 'pb_DeviceParam' */
struct pb_DeviceParam *param = NULL;
char* device_num = getenv("PARBOIL_DEVICE_NUMBER");
if (device_num && strcmp(device_num, "")) {
int id = atoi(device_num);
param = pb_DeviceParam_index(id);
}
else {
char* device_name = getenv("PARBOIL_DEVICE_NAME");
if (device_name && strcmp(device_name, "")) {
param = pb_DeviceParam_name(strdup(device_name));
}
else {
char* device_type = getenv("PARBOIL_DEVICE_TYPE");
if (device_type && strcmp(device_type, "")) {
if (strcmp(device_type, "CPU") == 0)
param = pb_DeviceParam_cpu();
else if (strcmp(device_type, "GPU") == 0)
param = pb_DeviceParam_gpu();
else if (strcmp(device_type, "ACCELERATOR") == 0)
param = pb_DeviceParam_accelerator();
}
}
}
/* Get a device */
pb_Device *d = pb_GetDevice(pb_platform, param);
pb_FreeDeviceParam(param);
return d;
}
pb_Platform*
pb_GetPlatformByEnvVars() {
char* name = getenv("PARBOIL_PLATFORM_NAME");
char* version = getenv("PARBOIL_PLATFORM_VERSION");
/* Create a pb_PlatformParam object (or NULL) representing the data from the
* environment variables */
struct pb_PlatformParam *platform;
if (name) {
if (version) {
platform = pb_PlatformParam(strdup(name), strdup(version));
}
else {
platform = pb_PlatformParam(strdup(name), NULL);
}
}
else {
platform = NULL;
}
/* Convert to a platform */
pb_Platform *p = pb_GetPlatform(platform);
pb_FreePlatformParam(platform);
return p;
}
/* Choose an OpenCL platform based on the given command-line parameters.
* If NULL, use the default OpenCL platform. */
pb_Platform*
pb_GetPlatform(struct pb_PlatformParam *platform) {
if (platform != NULL) {
/* Try to use command-line parameters to choose platform */
char *name = platform->name;
char *version = platform->version;
if (!name) {
fprintf(stderr, "Internal error: NULL pointer");
exit(-1);
}
if (version) {
pb_Platform* p = pb_GetPlatformByNameAndVersion(name, version);
if (p) return p;
}
pb_Platform* p = pb_GetPlatformByName(name);
if (p) return p;
}
pb_Platform* p = pb_GetPlatformByName(NULL);
if (p == NULL) {
fprintf(stderr, "Error: No OpenCL platform in this system. Exiting.");
exit(-1);
}
return p;
}
//extern void perf_init();
//extern void mxpa_scheduler_init();
pb_Context*
pb_InitOpenCLContext(struct pb_Parameters* parameters) {
#if 0
pb_Platform* ps = pb_GetPlatform(parameters->platform);
if (!ps) return NULL;
pb_Device* ds = pb_GetDevice(ps, parameters->device);
if (!ds) return NULL;
/* HERE INITIALIZE TIMER */
//perf_init();
//mxpa_scheduler_init();
pb_Context* c = createContext(ps, ds);
pb_PrintPlatformInfo(c);
return c;
#endif
cl_int _err;
cl_platform_id platform_id;
cl_device_id device_id;
cl_context context;
clGetPlatformIDs(1, &platform_id, NULL);
clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL);
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err);
pb_Context* c = (pb_Context*)malloc(sizeof(pb_Context));
c->clContext = context;
c->clDeviceId = device_id;
c->clPlatformId = platform_id;
c->pb_platform = (pb_Platform*)malloc(sizeof(pb_Platform));
c->pb_device = (pb_Device*)malloc(sizeof(pb_Device));
c->pb_platform->devices = (pb_Device**)malloc(sizeof(pb_Device*) * 2);
c->pb_platform->devices[0] = c->pb_device;
c->pb_platform->devices[1] = NULL;
c->pb_platform->contexts = (pb_Context**)malloc(sizeof(pb_Context*) * 2);
c->pb_platform->contexts[0] = c;
c->pb_platform->contexts[1] = NULL;
c->pb_platform->in_use = 1;
c->pb_device->in_use = 1;
return c;
}
void
pb_ReleaseOpenCLContext(pb_Context* c) {
pb_ReleasePlatforms();
}
void
pb_PrintPlatformInfo(pb_Context* c) {
/*pb_Platform** ps = pb_GetPlatforms();
if (!ps) {
fprintf (stderr, "No platform found");
return;
}
printf ("********************************************************\n");
printf ("DETECTED OPENCL PLATFORMS AND DEVICES:\n");
printf ("--------------------------------------------------------\n");
while (*ps) {
printf ("PLATFORM = %s, %s", (*ps)->name, (*ps)->version);
if (c->pb_platform == *ps) printf (" (SELECTED)");
printf ("\n");
pb_Device** ds = (pb_Device **) pb_GetDevices((*ps));
if (ds == NULL) {
printf (" + (No devices)\n");
} else {
while (*ds) {
printf (" + %d: %s", (*ds)->id, (*ds)->name);
if (c->pb_device == *ds) printf (" (SELECTED)");
printf ("\n");
ds++;
}
}
ps++;
}
printf ("********************************************************\n");*/
}
#ifdef MEASURE_KERNEL_TIME
#undef clEnqueueNDRangeKernel
//extern void pin_trace_enable(char*);
//extern void pin_trace_disable(char*);
cl_int
pb_clEnqueueNDRangeKernel(cl_command_queue q/* command_queue */,
cl_kernel k/* kernel */,
cl_uint d/* work_dim */,
const size_t * o/* global_work_offset */,
const size_t * gws/* global_work_size */,
const size_t * lws/* local_work_size */,
cl_uint n/* num_events_in_wait_list */,
const cl_event * w/* event_wait_list */,
cl_event * e/* event */) {
char buf[128];
struct timeval begin, end;
clGetKernelInfo(k, CL_KERNEL_FUNCTION_NAME, 128, buf, NULL);
#if 0
int i;
for (i = 0; i < d; i++) {
printf ("%s: %d: %d / %d\n", buf, i, gws[i], (lws == NULL ? 0 : lws[i]));
}
#endif
clFinish(q); clFlush(q);
//pin_trace_enable(buf);
//gettimeofday(&begin, NULL);
cl_int result = clEnqueueNDRangeKernel(q, k, d, o, gws, lws, n, w, e);
clFinish(q); clFlush(q);
//gettimeofday(&end, NULL);
//pin_trace_disable(buf);
//float t = (float)(end.tv_sec - begin.tv_sec) + (end.tv_usec - begin.tv_usec) / 1000000.0f;
fflush(stdout);
fflush(stderr);
//printf ("PBTIMER: %s: %f\n", buf, t);
return result;
}
#endif
void
pb_sig_float(char* c, float* p, int sz) {
int i;
double s = 0.0;
for (i = 0; i < sz; i++) s += p[i] * (float)(i+1);
printf ("[Signature] %s = %lf\n", c, s);
}
void
pb_sig_double(char* c, double* p, int sz) {
int i;
double s = 0.0;
for (i = 0; i < sz; i++) s += p[i];
printf ("[Signature] %s = %lf\n", c, s);
}
void
pb_sig_short(char* c, short* p, int sz) {
int i;
long long int s = 0;
for (i = 0; i < sz; i++) s += p[i];
printf ("[Signature] %s = %lld\n", c, s);
}
void
pb_sig_int(char* c, int* p, int sz) {
int i;
long long int s = 0;
for (i = 0; i < sz; i++) s += p[i];
printf ("[Signature] %s = %lld\n", c, s);
}
void
pb_sig_uchar(char* c, unsigned char* p, unsigned int sz) {
int i;
unsigned long long int s = 0;
for (i = 0; i < sz; i++) s += p[i];
printf ("[Signature] %s = %lld\n", c, s);
}
void pb_sig_clmem(char* s, cl_command_queue command_queue, cl_mem memobj, int ty) {
size_t sz;
if (clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &sz, NULL) != CL_SUCCESS) {
printf ("Something wrong.\n");
assert(0);
} else {
printf ("size = %d\n", sz);
}
char* hp; // = (char*) malloc(sz);
//posix_memalign((void**)&hp, 64, sz);
hp = (char*)malloc(sz);
clEnqueueReadBuffer (command_queue,
memobj,
CL_TRUE,
0,
sz,
hp,
0,
NULL,
NULL);
if (ty == T_FLOAT) pb_sig_float(s, (float*)hp, sz/sizeof(float));
if (ty == T_DOUBLE) pb_sig_double(s, (double*)hp, sz/sizeof(double));
if (ty == T_INT) pb_sig_int(s, (int*)hp, sz/sizeof(int));
if (ty == T_SHORT) pb_sig_short(s, (short*)hp, sz/sizeof(short));
if (ty == T_UCHAR) pb_sig_uchar(s, (unsigned char*)hp, sz/sizeof(char));
free(hp);
}