mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
1394 lines
40 KiB
C
1394 lines
40 KiB
C
/*
|
|
* (c) 2007 The Board of Trustees of the University of Illinois.
|
|
*/
|
|
|
|
#include <parboil.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <assert.h>
|
|
#include <CL/cl.h>
|
|
|
|
#if _POSIX_VERSION >= 200112L
|
|
# include <sys/time.h>
|
|
#endif
|
|
|
|
//#include "perfmon.h"
|
|
|
|
cl_context *clContextPtr;
|
|
cl_command_queue *clCommandQueuePtr;
|
|
|
|
// #define DISABLE_PARBOIL_TIMER
|
|
|
|
/*****************************************************************************/
|
|
/* Timer routines */
|
|
|
|
static int is_async(enum pb_TimerID timer)
|
|
{
|
|
return (timer == pb_TimerID_KERNEL) ||
|
|
(timer == pb_TimerID_COPY_ASYNC);
|
|
}
|
|
|
|
static int is_blocking(enum pb_TimerID timer)
|
|
{
|
|
return (timer == pb_TimerID_COPY) || (timer == pb_TimerID_NONE);
|
|
}
|
|
|
|
#define INVALID_TIMERID pb_TimerID_LAST
|
|
|
|
static int asyncs_outstanding(struct pb_TimerSet* timers)
|
|
{
|
|
return (timers->async_markers != NULL) &&
|
|
(timers->async_markers->timerID != INVALID_TIMERID);
|
|
}
|
|
|
|
static struct pb_async_time_marker_list *
|
|
get_last_async(struct pb_TimerSet* timers)
|
|
{
|
|
/* Find the last event recorded thus far */
|
|
struct pb_async_time_marker_list * last_event = timers->async_markers;
|
|
if(last_event != NULL && last_event->timerID != INVALID_TIMERID) {
|
|
while(last_event->next != NULL &&
|
|
last_event->next->timerID != INVALID_TIMERID)
|
|
last_event = last_event->next;
|
|
return last_event;
|
|
} else
|
|
return NULL;
|
|
}
|
|
|
|
static void insert_marker(struct pb_TimerSet* tset, enum pb_TimerID timer)
|
|
{
|
|
cl_int ciErrNum = CL_SUCCESS;
|
|
struct pb_async_time_marker_list ** new_event = &(tset->async_markers);
|
|
|
|
while(*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) {
|
|
new_event = &((*new_event)->next);
|
|
}
|
|
|
|
if(*new_event == NULL) {
|
|
*new_event = (struct pb_async_time_marker_list *)
|
|
malloc(sizeof(struct pb_async_time_marker_list));
|
|
(*new_event)->marker = calloc(1, sizeof(cl_event));
|
|
/*
|
|
// I don't think this is needed at all. I believe clEnqueueMarker 'creates' the event
|
|
#if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 )
|
|
fprintf(stderr, "Creating Marker [%d]\n", timer);
|
|
*((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr, &ciErrNum);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
fprintf(stderr, "Error Creating User Event Object!\n");
|
|
}
|
|
ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)), CL_QUEUED);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
fprintf(stderr, "Error Setting User Event Status!\n");
|
|
}
|
|
#endif
|
|
*/
|
|
(*new_event)->next = NULL;
|
|
}
|
|
|
|
/* valid event handle now aquired: insert the event record */
|
|
(*new_event)->label = NULL;
|
|
(*new_event)->timerID = timer;
|
|
ciErrNum = clEnqueueMarker(*clCommandQueuePtr, (cl_event *)(*new_event)->marker);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
fprintf(stderr, "Error Enqueueing Marker!\n");
|
|
}
|
|
|
|
}
|
|
|
|
static void insert_submarker(struct pb_TimerSet* tset, char *label, enum pb_TimerID timer)
|
|
{
|
|
cl_int ciErrNum = CL_SUCCESS;
|
|
struct pb_async_time_marker_list ** new_event = &(tset->async_markers);
|
|
|
|
while(*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) {
|
|
new_event = &((*new_event)->next);
|
|
}
|
|
|
|
if(*new_event == NULL) {
|
|
*new_event = (struct pb_async_time_marker_list *)
|
|
malloc(sizeof(struct pb_async_time_marker_list));
|
|
(*new_event)->marker = calloc(1, sizeof(cl_event));
|
|
/*
|
|
#if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 )
|
|
fprintf(stderr, "Creating SubMarker %s[%d]\n", label, timer);
|
|
*((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr, &ciErrNum);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
fprintf(stderr, "Error Creating User Event Object!\n");
|
|
}
|
|
ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)), CL_QUEUED);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
fprintf(stderr, "Error Setting User Event Status!\n");
|
|
}
|
|
#endif
|
|
*/
|
|
(*new_event)->next = NULL;
|
|
}
|
|
|
|
/* valid event handle now aquired: insert the event record */
|
|
(*new_event)->label = label;
|
|
(*new_event)->timerID = timer;
|
|
ciErrNum = clEnqueueMarker(*clCommandQueuePtr, (cl_event *)(*new_event)->marker);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
fprintf(stderr, "Error Enqueueing Marker!\n");
|
|
}
|
|
|
|
}
|
|
|
|
|
|
/* Assumes that all recorded events have completed */
|
|
static pb_Timestamp record_async_times(struct pb_TimerSet* tset)
|
|
{
|
|
struct pb_async_time_marker_list * next_interval = NULL;
|
|
struct pb_async_time_marker_list * last_marker = get_last_async(tset);
|
|
pb_Timestamp total_async_time = 0;
|
|
enum pb_TimerID timer;
|
|
|
|
for(next_interval = tset->async_markers; next_interval != last_marker;
|
|
next_interval = next_interval->next) {
|
|
cl_ulong command_start=0, command_end=0;
|
|
cl_int ciErrNum = CL_SUCCESS;
|
|
|
|
ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->marker), CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &command_start, NULL);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
fprintf(stderr, "Error getting first EventProfilingInfo: %d\n", ciErrNum);
|
|
}
|
|
|
|
ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->next->marker), CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &command_end, NULL);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
fprintf(stderr, "Error getting second EventProfilingInfo: %d\n", ciErrNum);
|
|
}
|
|
|
|
pb_Timestamp interval = (pb_Timestamp) (((double)(command_end - command_start)) / 1e3);
|
|
tset->timers[next_interval->timerID].elapsed += interval;
|
|
if (next_interval->label != NULL) {
|
|
struct pb_SubTimer *subtimer = tset->sub_timer_list[next_interval->timerID]->subtimer_list;
|
|
while (subtimer != NULL) {
|
|
if ( strcmp(subtimer->label, next_interval->label) == 0) {
|
|
subtimer->timer.elapsed += interval;
|
|
break;
|
|
}
|
|
subtimer = subtimer->next;
|
|
}
|
|
}
|
|
total_async_time += interval;
|
|
next_interval->timerID = INVALID_TIMERID;
|
|
}
|
|
|
|
if(next_interval != NULL)
|
|
next_interval->timerID = INVALID_TIMERID;
|
|
|
|
return total_async_time;
|
|
}
|
|
|
|
static void
|
|
accumulate_time(pb_Timestamp *accum,
|
|
pb_Timestamp start,
|
|
pb_Timestamp end)
|
|
{
|
|
//#if _POSIX_VERSION >= 200112L
|
|
*accum += end - start;
|
|
//#else
|
|
//# error "Timestamps not implemented for this system"
|
|
//#endif
|
|
}
|
|
|
|
//#if _POSIX_VERSION >= 200112L
|
|
static pb_Timestamp get_time()
|
|
{
|
|
//struct timeval tv;
|
|
//gettimeofday(&tv, NULL);
|
|
//return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec);
|
|
return 0;
|
|
}
|
|
//#else
|
|
//# error "no supported time libraries are available on this platform"
|
|
//#endif
|
|
|
|
void
|
|
pb_ResetTimer(struct pb_Timer *timer)
|
|
{
|
|
//#ifndef DISABLE_PARBOIL_TIMER
|
|
timer->state = pb_Timer_STOPPED;
|
|
|
|
//#if _POSIX_VERSION >= 200112L
|
|
timer->elapsed = 0;
|
|
//#else
|
|
//# error "pb_ResetTimer: not implemented for this system"
|
|
//#endif
|
|
//#endif
|
|
}
|
|
|
|
void
|
|
pb_StartTimer(struct pb_Timer *timer)
|
|
{
|
|
/*#ifndef DISABLE_PARBOIL_TIMER
|
|
if (timer->state != pb_Timer_STOPPED) {
|
|
fputs("Ignoring attempt to start a running timer\n", stderr);
|
|
return;
|
|
}
|
|
|
|
timer->state = pb_Timer_RUNNING;
|
|
|
|
#if _POSIX_VERSION >= 200112L
|
|
{
|
|
struct timeval tv;
|
|
gettimeofday(&tv, NULL);
|
|
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
|
}
|
|
#else
|
|
# error "pb_StartTimer: not implemented for this system"
|
|
#endif
|
|
#endif*/
|
|
}
|
|
|
|
void
|
|
pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer)
|
|
{
|
|
/*#ifndef DISABLE_PARBOIL_TIMER
|
|
|
|
unsigned int numNotStopped = 0x3; // 11
|
|
if (timer->state != pb_Timer_STOPPED) {
|
|
fputs("Warning: Timer was not stopped\n", stderr);
|
|
numNotStopped &= 0x1; // Zero out 2^1
|
|
}
|
|
if (subtimer->state != pb_Timer_STOPPED) {
|
|
fputs("Warning: Subtimer was not stopped\n", stderr);
|
|
numNotStopped &= 0x2; // Zero out 2^0
|
|
}
|
|
if (numNotStopped == 0x0) {
|
|
fputs("Ignoring attempt to start running timer and subtimer\n", stderr);
|
|
return;
|
|
}
|
|
|
|
timer->state = pb_Timer_RUNNING;
|
|
subtimer->state = pb_Timer_RUNNING;
|
|
|
|
#if _POSIX_VERSION >= 200112L
|
|
{
|
|
struct timeval tv;
|
|
gettimeofday(&tv, NULL);
|
|
|
|
if (numNotStopped & 0x2) {
|
|
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
|
}
|
|
|
|
if (numNotStopped & 0x1) {
|
|
subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
|
}
|
|
}
|
|
#else
|
|
# error "pb_StartTimer: not implemented for this system"
|
|
#endif
|
|
|
|
#endif*/
|
|
}
|
|
|
|
void
|
|
pb_StopTimer(struct pb_Timer *timer)
|
|
{
|
|
/*#ifndef DISABLE_PARBOIL_TIMER
|
|
|
|
pb_Timestamp fini;
|
|
|
|
if (timer->state != pb_Timer_RUNNING) {
|
|
fputs("Ignoring attempt to stop a stopped timer\n", stderr);
|
|
return;
|
|
}
|
|
|
|
timer->state = pb_Timer_STOPPED;
|
|
|
|
#if _POSIX_VERSION >= 200112L
|
|
{
|
|
struct timeval tv;
|
|
gettimeofday(&tv, NULL);
|
|
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
|
|
}
|
|
#else
|
|
# error "pb_StopTimer: not implemented for this system"
|
|
#endif
|
|
|
|
accumulate_time(&timer->elapsed, timer->init, fini);
|
|
timer->init = fini;
|
|
|
|
#endif*/
|
|
}
|
|
|
|
void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) {
|
|
/*#ifndef DISABLE_PARBOIL_TIMER
|
|
|
|
pb_Timestamp fini;
|
|
|
|
unsigned int numNotRunning = 0x3; // 11
|
|
if (timer->state != pb_Timer_RUNNING) {
|
|
fputs("Warning: Timer was not running\n", stderr);
|
|
numNotRunning &= 0x1; // Zero out 2^1
|
|
}
|
|
if (subtimer->state != pb_Timer_RUNNING) {
|
|
fputs("Warning: Subtimer was not running\n", stderr);
|
|
numNotRunning &= 0x2; // Zero out 2^0
|
|
}
|
|
if (numNotRunning == 0x0) {
|
|
fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr);
|
|
return;
|
|
}
|
|
|
|
|
|
timer->state = pb_Timer_STOPPED;
|
|
subtimer->state = pb_Timer_STOPPED;
|
|
|
|
#if _POSIX_VERSION >= 200112L
|
|
{
|
|
struct timeval tv;
|
|
gettimeofday(&tv, NULL);
|
|
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
|
|
}
|
|
#else
|
|
# error "pb_StopTimer: not implemented for this system"
|
|
#endif
|
|
|
|
if (numNotRunning & 0x2) {
|
|
accumulate_time(&timer->elapsed, timer->init, fini);
|
|
timer->init = fini;
|
|
}
|
|
|
|
if (numNotRunning & 0x1) {
|
|
accumulate_time(&subtimer->elapsed, subtimer->init, fini);
|
|
subtimer->init = fini;
|
|
}
|
|
|
|
#endif*/
|
|
}
|
|
|
|
/* Get the elapsed time in seconds. */
|
|
double
|
|
pb_GetElapsedTime(struct pb_Timer *timer)
|
|
{
|
|
/*double ret;
|
|
#ifndef DISABLE_PARBOIL_TIMER
|
|
|
|
if (timer->state != pb_Timer_STOPPED) {
|
|
fputs("Elapsed time from a running timer is inaccurate\n", stderr);
|
|
}
|
|
|
|
#if _POSIX_VERSION >= 200112L
|
|
ret = timer->elapsed / 1e6;
|
|
#else
|
|
# error "pb_GetElapsedTime: not implemented for this system"
|
|
#endif
|
|
#endif
|
|
return ret;*/
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
pb_InitializeTimerSet(struct pb_TimerSet *timers)
|
|
{
|
|
/*#ifndef DISABLE_PARBOIL_TIMER
|
|
int n;
|
|
|
|
timers->wall_begin = 0; //get_time();
|
|
timers->current = pb_TimerID_NONE;
|
|
|
|
timers->async_markers = NULL;
|
|
|
|
for (n = 0; n < pb_TimerID_LAST; n++) {
|
|
pb_ResetTimer(&timers->timers[n]);
|
|
timers->sub_timer_list[n] = NULL;
|
|
}
|
|
#endif*/
|
|
}
|
|
|
|
void pb_SetOpenCL(void *p_clContextPtr, void *p_clCommandQueuePtr) {
|
|
clContextPtr = ((cl_context *)p_clContextPtr);
|
|
clCommandQueuePtr = ((cl_command_queue *)p_clCommandQueuePtr);
|
|
}
|
|
|
|
void
|
|
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) {
|
|
/*#ifndef DISABLE_PARBOIL_TIMER
|
|
|
|
struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc
|
|
(sizeof(struct pb_SubTimer));
|
|
|
|
int len = strlen(label);
|
|
|
|
subtimer->label = (char *) malloc (sizeof(char)*(len+1));
|
|
sprintf(subtimer->label, "%s\0", label);
|
|
|
|
pb_ResetTimer(&subtimer->timer);
|
|
subtimer->next = NULL;
|
|
|
|
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category];
|
|
if (subtimerlist == NULL) {
|
|
subtimerlist = (struct pb_SubTimerList *) calloc
|
|
(1, sizeof(struct pb_SubTimerList));
|
|
subtimerlist->subtimer_list = subtimer;
|
|
timers->sub_timer_list[pb_Category] = subtimerlist;
|
|
} else {
|
|
// Append to list
|
|
struct pb_SubTimer *element = subtimerlist->subtimer_list;
|
|
while (element->next != NULL) {
|
|
element = element->next;
|
|
}
|
|
element->next = subtimer;
|
|
}
|
|
|
|
#endif*/
|
|
}
|
|
|
|
void
|
|
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer)
|
|
{
|
|
#if 0
|
|
#ifndef DISABLE_PARBOIL_TIMER
|
|
|
|
/* Stop the currently running timer */
|
|
if (timers->current != pb_TimerID_NONE) {
|
|
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
|
struct pb_SubTimer *currSubTimer = (subtimerlist != NULL) ? subtimerlist->current : NULL;
|
|
|
|
if (!is_async(timers->current) ) {
|
|
if (timers->current != timer) {
|
|
if (currSubTimer != NULL) {
|
|
pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer);
|
|
} else {
|
|
pb_StopTimer(&timers->timers[timers->current]);
|
|
}
|
|
} else {
|
|
if (currSubTimer != NULL) {
|
|
pb_StopTimer(&currSubTimer->timer);
|
|
}
|
|
}
|
|
} else {
|
|
insert_marker(timers, timer);
|
|
if (!is_async(timer)) { // if switching to async too, keep driver going
|
|
pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]);
|
|
}
|
|
}
|
|
}
|
|
|
|
pb_Timestamp currentTime = 0; //get_time();
|
|
|
|
/* The only cases we check for asynchronous task completion is
|
|
* when an overlapping CPU operation completes, or the next
|
|
* segment blocks on completion of previous async operations */
|
|
if( asyncs_outstanding(timers) &&
|
|
(!is_async(timers->current) || is_blocking(timer) ) ) {
|
|
|
|
struct pb_async_time_marker_list * last_event = get_last_async(timers);
|
|
/* CL_COMPLETE if completed */
|
|
|
|
cl_int ciErrNum = CL_SUCCESS;
|
|
cl_int async_done = CL_COMPLETE;
|
|
|
|
ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &async_done, NULL);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
fprintf(stderr, "Error Querying EventInfo!\n");
|
|
}
|
|
|
|
|
|
if(is_blocking(timer)) {
|
|
/* Async operations completed after previous CPU operations:
|
|
* overlapped time is the total CPU time since this set of async
|
|
* operations were first issued */
|
|
|
|
// timer to switch to is COPY or NONE
|
|
if(async_done != CL_COMPLETE) {
|
|
accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed),
|
|
timers->async_begin,currentTime);
|
|
}
|
|
|
|
/* Wait on async operation completion */
|
|
ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
fprintf(stderr, "Error Waiting for Events!\n");
|
|
}
|
|
|
|
pb_Timestamp total_async_time = record_async_times(timers);
|
|
|
|
/* Async operations completed before previous CPU operations:
|
|
* overlapped time is the total async time */
|
|
if(async_done == CL_COMPLETE) {
|
|
//fprintf(stderr, "Async_done: total_async_type = %lld\n", total_async_time);
|
|
timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time;
|
|
}
|
|
|
|
} else
|
|
/* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
|
|
// i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are outstanding
|
|
// so something is deeper in stack
|
|
if(async_done == CL_COMPLETE ) {
|
|
/* Async operations completed before previous CPU operations:
|
|
* overlapped time is the total async time */
|
|
timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers);
|
|
}
|
|
}
|
|
|
|
/* Start the new timer */
|
|
if (timer != pb_TimerID_NONE) {
|
|
if(!is_async(timer)) {
|
|
pb_StartTimer(&timers->timers[timer]);
|
|
} else {
|
|
// toSwitchTo Is Async (KERNEL/COPY_ASYNC)
|
|
if (!asyncs_outstanding(timers)) {
|
|
/* No asyncs outstanding, insert a fresh async marker */
|
|
|
|
insert_marker(timers, timer);
|
|
timers->async_begin = currentTime;
|
|
} else if(!is_async(timers->current)) {
|
|
/* Previous asyncs still in flight, but a previous SwitchTo
|
|
* already marked the end of the most recent async operation,
|
|
* so we can rename that marker as the beginning of this async
|
|
* operation */
|
|
|
|
struct pb_async_time_marker_list * last_event = get_last_async(timers);
|
|
last_event->label = NULL;
|
|
last_event->timerID = timer;
|
|
}
|
|
if (!is_async(timers->current)) {
|
|
pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]);
|
|
}
|
|
}
|
|
}
|
|
timers->current = timer;
|
|
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
void
|
|
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category)
|
|
{
|
|
#if 0
|
|
#ifndef DISABLE_PARBOIL_TIMER
|
|
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
|
struct pb_SubTimer *curr = (subtimerlist != NULL) ? subtimerlist->current : NULL;
|
|
|
|
if (timers->current != pb_TimerID_NONE) {
|
|
if (!is_async(timers->current) ) {
|
|
if (timers->current != category) {
|
|
if (curr != NULL) {
|
|
pb_StopTimerAndSubTimer(&timers->timers[timers->current], &curr->timer);
|
|
} else {
|
|
pb_StopTimer(&timers->timers[timers->current]);
|
|
}
|
|
} else {
|
|
if (curr != NULL) {
|
|
pb_StopTimer(&curr->timer);
|
|
}
|
|
}
|
|
} else {
|
|
insert_submarker(timers, label, category);
|
|
if (!is_async(category)) { // if switching to async too, keep driver going
|
|
pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]);
|
|
}
|
|
}
|
|
}
|
|
|
|
pb_Timestamp currentTime = 0; //get_time();
|
|
|
|
/* The only cases we check for asynchronous task completion is
|
|
* when an overlapping CPU operation completes, or the next
|
|
* segment blocks on completion of previous async operations */
|
|
if( asyncs_outstanding(timers) &&
|
|
(!is_async(timers->current) || is_blocking(category) ) ) {
|
|
|
|
struct pb_async_time_marker_list * last_event = get_last_async(timers);
|
|
/* CL_COMPLETE if completed */
|
|
|
|
cl_int ciErrNum = CL_SUCCESS;
|
|
cl_int async_done = CL_COMPLETE;
|
|
|
|
ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &async_done, NULL);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
fprintf(stderr, "Error Querying EventInfo!\n");
|
|
}
|
|
|
|
if(is_blocking(category)) {
|
|
/* Async operations completed after previous CPU operations:
|
|
* overlapped time is the total CPU time since this set of async
|
|
* operations were first issued */
|
|
|
|
// timer to switch to is COPY or NONE
|
|
// if it hasn't already finished, then just take now and use that as the elapsed time in OVERLAP
|
|
// anything happening after now isn't OVERLAP because everything is being stopped to wait for synchronization
|
|
// it seems that the extra sync wall time isn't being recorded anywhere
|
|
if(async_done != CL_COMPLETE)
|
|
accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed),
|
|
timers->async_begin,currentTime);
|
|
|
|
/* Wait on async operation completion */
|
|
ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
fprintf(stderr, "Error Waiting for Events!\n");
|
|
}
|
|
pb_Timestamp total_async_time = record_async_times(timers);
|
|
|
|
/* Async operations completed before previous CPU operations:
|
|
* overlapped time is the total async time */
|
|
// If it did finish, then accumulate all the async time that did happen into OVERLAP
|
|
// the immediately preceding EventSynchronize theoretically didn't have any effect since it was already completed.
|
|
if(async_done == CL_COMPLETE /*cudaSuccess*/)
|
|
timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time;
|
|
|
|
} else
|
|
/* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
|
|
// i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are outstanding
|
|
// so something is deeper in stack
|
|
if(async_done == CL_COMPLETE /*cudaSuccess*/) {
|
|
/* Async operations completed before previous CPU operations:
|
|
* overlapped time is the total async time */
|
|
timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers);
|
|
}
|
|
// else, this isn't blocking, so just check the next time around
|
|
}
|
|
|
|
subtimerlist = timers->sub_timer_list[category];
|
|
struct pb_SubTimer *subtimer = NULL;
|
|
|
|
if (label != NULL) {
|
|
subtimer = subtimerlist->subtimer_list;
|
|
while (subtimer != NULL) {
|
|
if (strcmp(subtimer->label, label) == 0) {
|
|
break;
|
|
} else {
|
|
subtimer = subtimer->next;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Start the new timer */
|
|
if (category != pb_TimerID_NONE) {
|
|
if(!is_async(category)) {
|
|
if (subtimerlist != NULL) {
|
|
subtimerlist->current = subtimer;
|
|
}
|
|
|
|
if (category != timers->current && subtimer != NULL) {
|
|
pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer);
|
|
} else if (subtimer != NULL) {
|
|
pb_StartTimer(&subtimer->timer);
|
|
} else {
|
|
pb_StartTimer(&timers->timers[category]);
|
|
}
|
|
} else {
|
|
if (subtimerlist != NULL) {
|
|
subtimerlist->current = subtimer;
|
|
}
|
|
|
|
// toSwitchTo Is Async (KERNEL/COPY_ASYNC)
|
|
if (!asyncs_outstanding(timers)) {
|
|
/* No asyncs outstanding, insert a fresh async marker */
|
|
insert_submarker(timers, label, category);
|
|
timers->async_begin = currentTime;
|
|
} else if(!is_async(timers->current)) {
|
|
/* Previous asyncs still in flight, but a previous SwitchTo
|
|
* already marked the end of the most recent async operation,
|
|
* so we can rename that marker as the beginning of this async
|
|
* operation */
|
|
|
|
struct pb_async_time_marker_list * last_event = get_last_async(timers);
|
|
last_event->timerID = category;
|
|
last_event->label = label;
|
|
} // else, marker for switchToThis was already inserted
|
|
|
|
//toSwitchto is already asynchronous, but if current/prev state is async too, then DRIVER is already running
|
|
if (!is_async(timers->current)) {
|
|
pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]);
|
|
}
|
|
}
|
|
}
|
|
|
|
timers->current = category;
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
void
|
|
pb_PrintTimerSet(struct pb_TimerSet *timers)
|
|
{
|
|
#if 0
|
|
#ifndef DISABLE_PARBOIL_TIMER
|
|
pb_Timestamp wall_end = 0; //get_time();
|
|
|
|
struct pb_Timer *t = timers->timers;
|
|
struct pb_SubTimer* sub = NULL;
|
|
|
|
int maxSubLength;
|
|
|
|
const char *categories[] = {
|
|
"IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute"
|
|
};
|
|
|
|
const int maxCategoryLength = 10;
|
|
|
|
int i;
|
|
for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format
|
|
if(pb_GetElapsedTime(&t[i]) != 0) {
|
|
|
|
// Print Category Timer
|
|
printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i]));
|
|
|
|
if (timers->sub_timer_list[i] != NULL) {
|
|
sub = timers->sub_timer_list[i]->subtimer_list;
|
|
maxSubLength = 0;
|
|
while (sub != NULL) {
|
|
// Find longest SubTimer label
|
|
if (strlen(sub->label) > maxSubLength) {
|
|
maxSubLength = strlen(sub->label);
|
|
}
|
|
sub = sub->next;
|
|
}
|
|
|
|
// Fit to Categories
|
|
if (maxSubLength <= maxCategoryLength) {
|
|
maxSubLength = maxCategoryLength;
|
|
}
|
|
|
|
sub = timers->sub_timer_list[i]->subtimer_list;
|
|
|
|
// Print SubTimers
|
|
while (sub != NULL) {
|
|
printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer));
|
|
sub = sub->next;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0)
|
|
printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]));
|
|
|
|
float walltime = (wall_end - timers->wall_begin)/ 1e6;
|
|
printf("Timer Wall Time: %f\n", walltime);
|
|
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
void pb_DestroyTimerSet(struct pb_TimerSet * timers)
|
|
{
|
|
#ifndef DISABLE_PARBOIL_TIMER
|
|
/* clean up all of the async event markers */
|
|
struct pb_async_time_marker_list* event = timers->async_markers;
|
|
while(event != NULL) {
|
|
|
|
cl_int ciErrNum = CL_SUCCESS;
|
|
ciErrNum = clWaitForEvents(1, (cl_event *)(event)->marker);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
//fprintf(stderr, "Error Waiting for Events!\n");
|
|
}
|
|
|
|
ciErrNum = clReleaseEvent( *((cl_event *)(event)->marker) );
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
fprintf(stderr, "Error Release Events!\n");
|
|
}
|
|
|
|
free((event)->marker);
|
|
struct pb_async_time_marker_list* next = ((event)->next);
|
|
|
|
free(event);
|
|
|
|
// (*event) = NULL;
|
|
event = next;
|
|
}
|
|
|
|
int i = 0;
|
|
for(i = 0; i < pb_TimerID_LAST; ++i) {
|
|
if (timers->sub_timer_list[i] != NULL) {
|
|
struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list;
|
|
struct pb_SubTimer *prev = NULL;
|
|
while (subtimer != NULL) {
|
|
free(subtimer->label);
|
|
prev = subtimer;
|
|
subtimer = subtimer->next;
|
|
free(prev);
|
|
}
|
|
free(timers->sub_timer_list[i]);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static pb_Platform** ptr = NULL;
|
|
|
|
// verbosely print out list of platforms and their devices to the console.
|
|
pb_Platform**
|
|
pb_GetPlatforms() {
|
|
if (ptr == NULL) {
|
|
cl_uint num_platforms;
|
|
clGetPlatformIDs(0, NULL, &num_platforms);
|
|
if (num_platforms == 0) return NULL;
|
|
|
|
ptr = (pb_Platform **) malloc(sizeof(pb_Platform *) * (num_platforms + 1));
|
|
cl_platform_id* ids = (cl_platform_id *) malloc(num_platforms * sizeof(cl_platform_id));
|
|
clGetPlatformIDs(num_platforms, ids, NULL);
|
|
|
|
unsigned int i;
|
|
for (i = 0; i < num_platforms; i++) {
|
|
ptr[i] = (pb_Platform *) malloc(sizeof(pb_Platform));
|
|
ptr[i]->clPlatform = ids[i];
|
|
ptr[i]->contexts = NULL;
|
|
ptr[i]->in_use = 0;
|
|
ptr[i]->devices = NULL;
|
|
|
|
size_t sz;
|
|
clGetPlatformInfo(ids[i], CL_PLATFORM_NAME, 0, NULL, &sz);
|
|
char* name = (char *) malloc(sz + 1);
|
|
clGetPlatformInfo(ids[i], CL_PLATFORM_NAME, sz, name, NULL);
|
|
name[sz] = '\0';
|
|
ptr[i]->name = name;
|
|
|
|
clGetPlatformInfo(ids[i], CL_PLATFORM_VERSION, 0, NULL, &sz);
|
|
char* version = (char *) malloc(sz + 1);
|
|
clGetPlatformInfo(ids[i], CL_PLATFORM_VERSION, sz, version, NULL);
|
|
version[sz] = '\0';
|
|
ptr[i]->version = version;
|
|
}
|
|
ptr[i] = NULL;
|
|
|
|
free(ids);
|
|
}
|
|
|
|
return (pb_Platform**) ptr;
|
|
}
|
|
|
|
pb_Context*
|
|
createContext(pb_Platform* pb_platform, pb_Device* pb_device) {
|
|
pb_Context* c = (pb_Context*) malloc(sizeof(pb_Context));
|
|
cl_int clStatus;
|
|
cl_context_properties clCps[3] = {
|
|
CL_CONTEXT_PLATFORM, (cl_context_properties)(pb_platform->clPlatform), 0
|
|
};
|
|
c->clContext =
|
|
clCreateContext(clCps, 1, (cl_device_id*)&pb_device->clDevice, NULL, NULL, &clStatus);
|
|
c->clPlatformId = pb_platform->clPlatform;
|
|
c->clDeviceId = pb_device->clDevice;
|
|
c->pb_platform = pb_platform;
|
|
c->pb_device = pb_device;
|
|
pb_platform->in_use = 1;
|
|
pb_device->in_use = 1;
|
|
unsigned int i = 0;
|
|
if (pb_platform->contexts == NULL) {
|
|
pb_platform->contexts = (pb_Context**) malloc(2*sizeof(pb_Context*));
|
|
} else {
|
|
for (i = 0; pb_platform->contexts[i] != NULL; i++) {};
|
|
pb_platform->contexts = (pb_Context**) realloc(pb_platform->contexts,
|
|
(i+1)*sizeof(pb_Context*));
|
|
}
|
|
pb_platform->contexts[i+1] = NULL;
|
|
pb_platform->contexts[i] = c;
|
|
return c;
|
|
}
|
|
|
|
// choose a platform by name.
|
|
pb_Platform*
|
|
pb_GetPlatformByName(const char* name) {
|
|
pb_Platform** ps = (pb_Platform **) pb_GetPlatforms();
|
|
if (ps == NULL) return NULL;
|
|
if (name == NULL) {
|
|
return *ps;
|
|
}
|
|
|
|
while (*ps) {
|
|
if (strstr((*ps)->name, name)) break;
|
|
ps++;
|
|
}
|
|
return (pb_Platform*) *ps;
|
|
}
|
|
|
|
pb_Device**
|
|
pb_GetDevices(pb_Platform* pb_platform) {
|
|
if (pb_platform->devices == NULL) {
|
|
cl_uint num_devs;
|
|
cl_device_id* dev_ids;
|
|
clGetDeviceIDs((cl_platform_id) pb_platform->clPlatform,
|
|
CL_DEVICE_TYPE_ALL, 0, NULL, &num_devs);
|
|
if (num_devs == 0) return NULL;
|
|
|
|
pb_platform->devices =
|
|
(pb_Device **) malloc((num_devs + 1) * sizeof(pb_Device *));
|
|
dev_ids = (cl_device_id *) malloc(sizeof(cl_device_id) * num_devs);
|
|
clGetDeviceIDs((cl_platform_id) pb_platform->clPlatform,
|
|
CL_DEVICE_TYPE_ALL, num_devs, dev_ids, NULL);
|
|
|
|
unsigned int i;
|
|
for (i = 0; i < num_devs; i++) {
|
|
pb_platform->devices[i] = (pb_Device *) malloc(sizeof(pb_Device));
|
|
|
|
pb_platform->devices[i]->clDevice = dev_ids[i];
|
|
pb_platform->devices[i]->id = i;
|
|
|
|
size_t sz;
|
|
clGetDeviceInfo(dev_ids[i], CL_DEVICE_NAME, 0, NULL, &sz);
|
|
char* name = (char *) malloc(sz + 1);
|
|
clGetDeviceInfo(dev_ids[i], CL_DEVICE_NAME, sz, name, NULL);
|
|
name[sz] = '\0';
|
|
pb_platform->devices[i]->name = (char *) name;
|
|
|
|
cl_bool available;
|
|
clGetDeviceInfo(dev_ids[i], CL_DEVICE_AVAILABLE, sizeof(cl_bool), &available, NULL);
|
|
pb_platform->devices[i]->available = (int) available;
|
|
|
|
pb_platform->devices[i]->in_use = 0;
|
|
}
|
|
pb_platform->devices[i] = NULL;
|
|
}
|
|
return (pb_Device **) pb_platform->devices;
|
|
}
|
|
|
|
// choose a device by name.
|
|
static pb_Device*
|
|
pb_SelectDeviceByName(pb_Device **ds, const char* name) {
|
|
if (ds == NULL) return NULL;
|
|
if (name == NULL) return *ds;
|
|
while (*ds) {
|
|
if (strstr((*ds)->name, name)) break;
|
|
ds++;
|
|
}
|
|
|
|
return *ds;
|
|
}
|
|
|
|
// choose a device by name and set the device's 'in_use' flag.
|
|
pb_Device*
|
|
pb_GetDeviceByName(pb_Platform* pb_platform, const char* name) {
|
|
pb_Device** ds = (pb_Device **) pb_GetDevices(pb_platform);
|
|
pb_Device *d = pb_SelectDeviceByName(ds, name);
|
|
|
|
if (d) d->in_use = 1;
|
|
|
|
return d;
|
|
}
|
|
|
|
void
|
|
pb_ReleasePlatforms() {
|
|
if (!ptr) return;
|
|
pb_Platform** cur_ptr = ptr;
|
|
while (*cur_ptr) {
|
|
pb_Platform* pfptr = *cur_ptr++;
|
|
if (pfptr->devices) {
|
|
pb_Device** dvptr = pfptr->devices;
|
|
while (*dvptr) {
|
|
pb_Device* d = *dvptr++;
|
|
free(d->name);
|
|
free(d);
|
|
}
|
|
free(pfptr->devices);
|
|
}
|
|
if (pfptr->contexts) {
|
|
pb_Context** cptr = pfptr->contexts;
|
|
while (*cptr) {
|
|
free(*cptr++);
|
|
}
|
|
free(pfptr->contexts);
|
|
}
|
|
free(pfptr->name);
|
|
free(pfptr);
|
|
}
|
|
free(ptr);
|
|
ptr = NULL;
|
|
}
|
|
|
|
pb_Platform*
|
|
pb_GetPlatformByNameAndVersion(const char* name, const char* version) {
|
|
pb_Platform** ps = (pb_Platform **) pb_GetPlatforms();
|
|
if (ps == NULL) return NULL;
|
|
if (name == NULL) return *ps;
|
|
while (*ps) {
|
|
if (strstr((*ps)->name, name) && strstr((*ps)->version, version)) break;
|
|
ps++;
|
|
}
|
|
return (pb_Platform*) *ps;
|
|
}
|
|
|
|
/* Return a pointer to the device at the specified index, or NULL.
|
|
* Used by pb_GetDevice. */
|
|
static pb_Device *
|
|
select_device_by_index(pb_Device** ds, int id)
|
|
{
|
|
int i = 0;
|
|
pb_Device** p = ds;
|
|
while (*p && (i < id)) { p++; i++; }
|
|
return *p;
|
|
}
|
|
|
|
/* Return a pointer to the device with the specified type, or NULL.
|
|
* Used by pb_GetDevice. */
|
|
static pb_Device *
|
|
select_device_by_type(pb_Device** ds,
|
|
enum pb_DeviceSelectionCriterion criterion)
|
|
{
|
|
cl_device_type sought_type;
|
|
|
|
/* Determine the OpenCL device type to search for */
|
|
switch(criterion) {
|
|
case pb_Device_CPU:
|
|
sought_type = CL_DEVICE_TYPE_CPU;
|
|
break;
|
|
case pb_Device_GPU:
|
|
sought_type = CL_DEVICE_TYPE_GPU;
|
|
break;
|
|
case pb_Device_ACCELERATOR:
|
|
sought_type = CL_DEVICE_TYPE_ACCELERATOR;
|
|
break;
|
|
default:
|
|
fprintf(stderr, "pb_GetDevice: Invalid device type");
|
|
exit(-1);
|
|
}
|
|
|
|
/* Find the device */
|
|
{
|
|
pb_Device** p = ds;
|
|
cl_device_type type;
|
|
while (*p) {
|
|
clGetDeviceInfo(((cl_device_id) ((*p)->clDevice)), CL_DEVICE_TYPE,
|
|
sizeof(cl_device_type), &type, NULL);
|
|
if (type == sought_type) break;
|
|
}
|
|
|
|
return *p;
|
|
}
|
|
}
|
|
|
|
pb_Device*
|
|
pb_GetDevice(pb_Platform* pb_platform, struct pb_DeviceParam *device)
|
|
{
|
|
pb_Device** ds = (pb_Device **) pb_GetDevices(pb_platform);
|
|
|
|
// The list of devices must be nonempty
|
|
if (ds == NULL || *ds == NULL) {
|
|
fprintf(stderr, "Error: No device is found in platform: name = %s, version = %s\n.", pb_platform->name, pb_platform->version);
|
|
exit(-1);
|
|
}
|
|
|
|
pb_Device *selected_device = NULL;
|
|
|
|
if (device != NULL) {
|
|
/* Use 'device' to select and return a device.
|
|
* If unable to select a device, fall
|
|
* back on the default selection mechanism. */
|
|
switch(device->criterion) {
|
|
case pb_Device_INDEX:
|
|
selected_device = select_device_by_index(ds, device->index);
|
|
break;
|
|
case pb_Device_GPU:
|
|
case pb_Device_CPU:
|
|
case pb_Device_ACCELERATOR:
|
|
selected_device = select_device_by_type(ds, device->criterion);
|
|
break;
|
|
case pb_Device_NAME:
|
|
selected_device = pb_SelectDeviceByName(ds, device->name);
|
|
break;
|
|
default:
|
|
fprintf(stderr, "pb_GetDevice: Invalid argument");
|
|
exit(-1);
|
|
}
|
|
}
|
|
|
|
/* By default or if user-specified selection failed,
|
|
* select the first device */
|
|
if (selected_device == NULL)
|
|
selected_device = *ds;
|
|
|
|
/* Set the in_use flag */
|
|
selected_device->in_use = 1;
|
|
|
|
return selected_device;
|
|
}
|
|
|
|
pb_Device*
|
|
pb_GetDeviceByEnvVars(pb_Platform* pb_platform) {
|
|
|
|
/* Convert environment variables to a 'pb_DeviceParam' */
|
|
struct pb_DeviceParam *param = NULL;
|
|
|
|
char* device_num = getenv("PARBOIL_DEVICE_NUMBER");
|
|
if (device_num && strcmp(device_num, "")) {
|
|
int id = atoi(device_num);
|
|
param = pb_DeviceParam_index(id);
|
|
}
|
|
else {
|
|
char* device_name = getenv("PARBOIL_DEVICE_NAME");
|
|
if (device_name && strcmp(device_name, "")) {
|
|
param = pb_DeviceParam_name(strdup(device_name));
|
|
}
|
|
else {
|
|
char* device_type = getenv("PARBOIL_DEVICE_TYPE");
|
|
if (device_type && strcmp(device_type, "")) {
|
|
if (strcmp(device_type, "CPU") == 0)
|
|
param = pb_DeviceParam_cpu();
|
|
else if (strcmp(device_type, "GPU") == 0)
|
|
param = pb_DeviceParam_gpu();
|
|
else if (strcmp(device_type, "ACCELERATOR") == 0)
|
|
param = pb_DeviceParam_accelerator();
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Get a device */
|
|
pb_Device *d = pb_GetDevice(pb_platform, param);
|
|
pb_FreeDeviceParam(param);
|
|
|
|
return d;
|
|
}
|
|
|
|
pb_Platform*
|
|
pb_GetPlatformByEnvVars() {
|
|
char* name = getenv("PARBOIL_PLATFORM_NAME");
|
|
char* version = getenv("PARBOIL_PLATFORM_VERSION");
|
|
|
|
/* Create a pb_PlatformParam object (or NULL) representing the data from the
|
|
* environment variables */
|
|
struct pb_PlatformParam *platform;
|
|
|
|
if (name) {
|
|
if (version) {
|
|
platform = pb_PlatformParam(strdup(name), strdup(version));
|
|
}
|
|
else {
|
|
platform = pb_PlatformParam(strdup(name), NULL);
|
|
}
|
|
}
|
|
else {
|
|
platform = NULL;
|
|
}
|
|
|
|
/* Convert to a platform */
|
|
pb_Platform *p = pb_GetPlatform(platform);
|
|
pb_FreePlatformParam(platform);
|
|
|
|
return p;
|
|
}
|
|
|
|
/* Choose an OpenCL platform based on the given command-line parameters.
|
|
* If NULL, use the default OpenCL platform. */
|
|
pb_Platform*
|
|
pb_GetPlatform(struct pb_PlatformParam *platform) {
|
|
if (platform != NULL) {
|
|
/* Try to use command-line parameters to choose platform */
|
|
char *name = platform->name;
|
|
char *version = platform->version;
|
|
|
|
if (!name) {
|
|
fprintf(stderr, "Internal error: NULL pointer");
|
|
exit(-1);
|
|
}
|
|
|
|
if (version) {
|
|
pb_Platform* p = pb_GetPlatformByNameAndVersion(name, version);
|
|
if (p) return p;
|
|
}
|
|
|
|
pb_Platform* p = pb_GetPlatformByName(name);
|
|
if (p) return p;
|
|
}
|
|
|
|
pb_Platform* p = pb_GetPlatformByName(NULL);
|
|
if (p == NULL) {
|
|
fprintf(stderr, "Error: No OpenCL platform in this system. Exiting.");
|
|
exit(-1);
|
|
}
|
|
return p;
|
|
}
|
|
|
|
//extern void perf_init();
|
|
//extern void mxpa_scheduler_init();
|
|
|
|
pb_Context*
|
|
pb_InitOpenCLContext(struct pb_Parameters* parameters) {
|
|
#if 0
|
|
pb_Platform* ps = pb_GetPlatform(parameters->platform);
|
|
if (!ps) return NULL;
|
|
pb_Device* ds = pb_GetDevice(ps, parameters->device);
|
|
if (!ds) return NULL;
|
|
|
|
/* HERE INITIALIZE TIMER */
|
|
//perf_init();
|
|
//mxpa_scheduler_init();
|
|
|
|
pb_Context* c = createContext(ps, ds);
|
|
pb_PrintPlatformInfo(c);
|
|
return c;
|
|
#endif
|
|
cl_int _err;
|
|
cl_platform_id platform_id;
|
|
cl_device_id device_id;
|
|
cl_context context;
|
|
clGetPlatformIDs(1, &platform_id, NULL);
|
|
clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL);
|
|
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err);
|
|
|
|
pb_Context* c = (pb_Context*)malloc(sizeof(pb_Context));
|
|
c->clContext = context;
|
|
c->clDeviceId = device_id;
|
|
c->clPlatformId = platform_id;
|
|
c->pb_platform = (pb_Platform*)malloc(sizeof(pb_Platform));
|
|
c->pb_device = (pb_Device*)malloc(sizeof(pb_Device));
|
|
c->pb_platform->devices = (pb_Device**)malloc(sizeof(pb_Device*) * 2);
|
|
c->pb_platform->devices[0] = c->pb_device;
|
|
c->pb_platform->devices[1] = NULL;
|
|
c->pb_platform->contexts = (pb_Context**)malloc(sizeof(pb_Context*) * 2);
|
|
c->pb_platform->contexts[0] = c;
|
|
c->pb_platform->contexts[1] = NULL;
|
|
c->pb_platform->in_use = 1;
|
|
c->pb_device->in_use = 1;
|
|
return c;
|
|
}
|
|
|
|
void
|
|
pb_ReleaseOpenCLContext(pb_Context* c) {
|
|
pb_ReleasePlatforms();
|
|
}
|
|
|
|
void
|
|
pb_PrintPlatformInfo(pb_Context* c) {
|
|
/*pb_Platform** ps = pb_GetPlatforms();
|
|
if (!ps) {
|
|
fprintf (stderr, "No platform found");
|
|
return;
|
|
}
|
|
|
|
printf ("********************************************************\n");
|
|
printf ("DETECTED OPENCL PLATFORMS AND DEVICES:\n");
|
|
printf ("--------------------------------------------------------\n");
|
|
|
|
while (*ps) {
|
|
printf ("PLATFORM = %s, %s", (*ps)->name, (*ps)->version);
|
|
if (c->pb_platform == *ps) printf (" (SELECTED)");
|
|
printf ("\n");
|
|
|
|
pb_Device** ds = (pb_Device **) pb_GetDevices((*ps));
|
|
if (ds == NULL) {
|
|
printf (" + (No devices)\n");
|
|
} else {
|
|
while (*ds) {
|
|
printf (" + %d: %s", (*ds)->id, (*ds)->name);
|
|
if (c->pb_device == *ds) printf (" (SELECTED)");
|
|
printf ("\n");
|
|
ds++;
|
|
}
|
|
}
|
|
|
|
ps++;
|
|
}
|
|
printf ("********************************************************\n");*/
|
|
}
|
|
|
|
#ifdef MEASURE_KERNEL_TIME
|
|
|
|
#undef clEnqueueNDRangeKernel
|
|
|
|
//extern void pin_trace_enable(char*);
|
|
//extern void pin_trace_disable(char*);
|
|
|
|
cl_int
|
|
pb_clEnqueueNDRangeKernel(cl_command_queue q/* command_queue */,
|
|
cl_kernel k/* kernel */,
|
|
cl_uint d/* work_dim */,
|
|
const size_t * o/* global_work_offset */,
|
|
const size_t * gws/* global_work_size */,
|
|
const size_t * lws/* local_work_size */,
|
|
cl_uint n/* num_events_in_wait_list */,
|
|
const cl_event * w/* event_wait_list */,
|
|
cl_event * e/* event */) {
|
|
|
|
char buf[128];
|
|
struct timeval begin, end;
|
|
clGetKernelInfo(k, CL_KERNEL_FUNCTION_NAME, 128, buf, NULL);
|
|
|
|
#if 0
|
|
int i;
|
|
for (i = 0; i < d; i++) {
|
|
printf ("%s: %d: %d / %d\n", buf, i, gws[i], (lws == NULL ? 0 : lws[i]));
|
|
}
|
|
#endif
|
|
|
|
clFinish(q); clFlush(q);
|
|
//pin_trace_enable(buf);
|
|
//gettimeofday(&begin, NULL);
|
|
cl_int result = clEnqueueNDRangeKernel(q, k, d, o, gws, lws, n, w, e);
|
|
clFinish(q); clFlush(q);
|
|
//gettimeofday(&end, NULL);
|
|
//pin_trace_disable(buf);
|
|
//float t = (float)(end.tv_sec - begin.tv_sec) + (end.tv_usec - begin.tv_usec) / 1000000.0f;
|
|
fflush(stdout);
|
|
fflush(stderr);
|
|
//printf ("PBTIMER: %s: %f\n", buf, t);
|
|
return result;
|
|
}
|
|
|
|
#endif
|
|
|
|
void
|
|
pb_sig_float(char* c, float* p, int sz) {
|
|
int i;
|
|
double s = 0.0;
|
|
for (i = 0; i < sz; i++) s += p[i] * (float)(i+1);
|
|
printf ("[Signature] %s = %lf\n", c, s);
|
|
}
|
|
|
|
void
|
|
pb_sig_double(char* c, double* p, int sz) {
|
|
int i;
|
|
double s = 0.0;
|
|
for (i = 0; i < sz; i++) s += p[i];
|
|
printf ("[Signature] %s = %lf\n", c, s);
|
|
}
|
|
|
|
void
|
|
pb_sig_short(char* c, short* p, int sz) {
|
|
int i;
|
|
long long int s = 0;
|
|
for (i = 0; i < sz; i++) s += p[i];
|
|
printf ("[Signature] %s = %lld\n", c, s);
|
|
}
|
|
|
|
void
|
|
pb_sig_int(char* c, int* p, int sz) {
|
|
int i;
|
|
long long int s = 0;
|
|
for (i = 0; i < sz; i++) s += p[i];
|
|
printf ("[Signature] %s = %lld\n", c, s);
|
|
}
|
|
|
|
void
|
|
pb_sig_uchar(char* c, unsigned char* p, unsigned int sz) {
|
|
int i;
|
|
unsigned long long int s = 0;
|
|
for (i = 0; i < sz; i++) s += p[i];
|
|
printf ("[Signature] %s = %lld\n", c, s);
|
|
}
|
|
|
|
void pb_sig_clmem(char* s, cl_command_queue command_queue, cl_mem memobj, int ty) {
|
|
size_t sz;
|
|
if (clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &sz, NULL) != CL_SUCCESS) {
|
|
printf ("Something wrong.\n");
|
|
assert(0);
|
|
} else {
|
|
printf ("size = %d\n", sz);
|
|
}
|
|
char* hp; // = (char*) malloc(sz);
|
|
//posix_memalign((void**)&hp, 64, sz);
|
|
hp = (char*)malloc(sz);
|
|
|
|
clEnqueueReadBuffer (command_queue,
|
|
memobj,
|
|
CL_TRUE,
|
|
0,
|
|
sz,
|
|
hp,
|
|
0,
|
|
NULL,
|
|
NULL);
|
|
|
|
if (ty == T_FLOAT) pb_sig_float(s, (float*)hp, sz/sizeof(float));
|
|
if (ty == T_DOUBLE) pb_sig_double(s, (double*)hp, sz/sizeof(double));
|
|
if (ty == T_INT) pb_sig_int(s, (int*)hp, sz/sizeof(int));
|
|
if (ty == T_SHORT) pb_sig_short(s, (short*)hp, sz/sizeof(short));
|
|
if (ty == T_UCHAR) pb_sig_uchar(s, (unsigned char*)hp, sz/sizeof(char));
|
|
|
|
free(hp);
|
|
}
|
|
|