mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-06-28 01:28:42 -04:00
minor update
This commit is contained in:
parent
9dbff0e77c
commit
0426856ab4
9 changed files with 124 additions and 129 deletions
|
@ -19,13 +19,11 @@ LLVM_CFLAGS += -Xclang -target-feature -Xclang +vortex -mllvm -vortex-branch-div
|
||||||
#LLVM_CFLAGS += --rtlib=libgcc
|
#LLVM_CFLAGS += --rtlib=libgcc
|
||||||
|
|
||||||
#CC = $(LLVM_VORTEX)/bin/clang $(LLVM_CFLAGS)
|
#CC = $(LLVM_VORTEX)/bin/clang $(LLVM_CFLAGS)
|
||||||
#CXX = $(LLVM_VORTEX)/bin/clang++ $(LLVM_CFLAGS)
|
|
||||||
#AR = $(LLVM_VORTEX)/bin/llvm-ar
|
#AR = $(LLVM_VORTEX)/bin/llvm-ar
|
||||||
#DP = $(LLVM_VORTEX)/bin/llvm-objdump
|
#DP = $(LLVM_VORTEX)/bin/llvm-objdump
|
||||||
#CP = $(LLVM_VORTEX)/bin/llvm-objcopy
|
#CP = $(LLVM_VORTEX)/bin/llvm-objcopy
|
||||||
|
|
||||||
CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc
|
CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc
|
||||||
CXX = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-g++
|
|
||||||
AR = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc-ar
|
AR = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc-ar
|
||||||
DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump
|
DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump
|
||||||
CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy
|
CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy
|
||||||
|
@ -48,9 +46,6 @@ $(PROJECT).dump: $(PROJECT).a
|
||||||
%.S.o: $(SRC_DIR)/%.S
|
%.S.o: $(SRC_DIR)/%.S
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
%.cpp.o: $(SRC_DIR)/%.cpp
|
|
||||||
$(CXX) $(CFLAGS) -c $< -o $@
|
|
||||||
|
|
||||||
%.c.o: $(SRC_DIR)/%.c
|
%.c.o: $(SRC_DIR)/%.c
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
|
|
@ -21,15 +21,7 @@ extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int vx_vprintf(const char* format, va_list va);
|
int vx_vprintf(const char* format, va_list va);
|
||||||
|
int vx_printf(const char * format, ...);
|
||||||
inline int vx_printf(const char * format, ...) {
|
|
||||||
int ret;
|
|
||||||
va_list va;
|
|
||||||
va_start(va, format);
|
|
||||||
ret = vx_vprintf(format, va);
|
|
||||||
va_end(va);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void vx_putchar(int c);
|
void vx_putchar(int c);
|
||||||
void vx_putint(int value, int base);
|
void vx_putint(int value, int base);
|
||||||
|
|
|
@ -27,8 +27,8 @@ extern "C" {
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const char* format;
|
const char* format;
|
||||||
va_list* va;
|
va_list* va;
|
||||||
int ret;
|
int ret;
|
||||||
} printf_arg_t;
|
} printf_arg_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -93,6 +93,15 @@ int vx_vprintf(const char* format, va_list va) {
|
||||||
return arg.ret;
|
return arg.ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int vx_printf(const char * format, ...) {
|
||||||
|
int ret;
|
||||||
|
va_list va;
|
||||||
|
va_start(va, format);
|
||||||
|
ret = vx_vprintf(format, va);
|
||||||
|
va_end(va);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
// Copyright © 2019-2023
|
// Copyright © 2019-2023
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
// You may obtain a copy of the License at
|
// You may obtain a copy of the License at
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
//
|
//
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
@ -18,7 +18,7 @@
|
||||||
|
|
||||||
.type vx_serial, @function
|
.type vx_serial, @function
|
||||||
.global vx_serial
|
.global vx_serial
|
||||||
vx_serial:
|
vx_serial:
|
||||||
#if (XLEN == 64)
|
#if (XLEN == 64)
|
||||||
addi sp, sp, -56
|
addi sp, sp, -56
|
||||||
sd ra, 48(sp)
|
sd ra, 48(sp)
|
||||||
|
@ -41,7 +41,7 @@ vx_serial:
|
||||||
mv s4, a0 # s4 <- callback
|
mv s4, a0 # s4 <- callback
|
||||||
mv s3, a1 # s3 <- arg
|
mv s3, a1 # s3 <- arg
|
||||||
csrr s2, VX_CSR_NUM_THREADS # s2 <- NT
|
csrr s2, VX_CSR_NUM_THREADS # s2 <- NT
|
||||||
csrr s1, VX_CSR_THREAD_ID # s1 <- tid
|
csrr s1, VX_CSR_THREAD_ID # s1 <- tid
|
||||||
li s0, 0 # s0 <- index
|
li s0, 0 # s0 <- index
|
||||||
label_loop:
|
label_loop:
|
||||||
sub t0, s0, s1
|
sub t0, s0, s1
|
||||||
|
@ -72,6 +72,5 @@ label_join:
|
||||||
lw s1, 4(sp)
|
lw s1, 4(sp)
|
||||||
lw s0, 0(sp)
|
lw s0, 0(sp)
|
||||||
addi sp, sp, 28
|
addi sp, sp, 28
|
||||||
#endif
|
#endif
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,7 @@ cl_command_queue *clCommandQueuePtr;
|
||||||
|
|
||||||
static int is_async(enum pb_TimerID timer)
|
static int is_async(enum pb_TimerID timer)
|
||||||
{
|
{
|
||||||
return (timer == pb_TimerID_KERNEL) ||
|
return (timer == pb_TimerID_KERNEL) ||
|
||||||
(timer == pb_TimerID_COPY_ASYNC);
|
(timer == pb_TimerID_COPY_ASYNC);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,17 +38,17 @@ static int is_blocking(enum pb_TimerID timer)
|
||||||
|
|
||||||
static int asyncs_outstanding(struct pb_TimerSet* timers)
|
static int asyncs_outstanding(struct pb_TimerSet* timers)
|
||||||
{
|
{
|
||||||
return (timers->async_markers != NULL) &&
|
return (timers->async_markers != NULL) &&
|
||||||
(timers->async_markers->timerID != INVALID_TIMERID);
|
(timers->async_markers->timerID != INVALID_TIMERID);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct pb_async_time_marker_list *
|
static struct pb_async_time_marker_list *
|
||||||
get_last_async(struct pb_TimerSet* timers)
|
get_last_async(struct pb_TimerSet* timers)
|
||||||
{
|
{
|
||||||
/* Find the last event recorded thus far */
|
/* Find the last event recorded thus far */
|
||||||
struct pb_async_time_marker_list * last_event = timers->async_markers;
|
struct pb_async_time_marker_list * last_event = timers->async_markers;
|
||||||
if(last_event != NULL && last_event->timerID != INVALID_TIMERID) {
|
if(last_event != NULL && last_event->timerID != INVALID_TIMERID) {
|
||||||
while(last_event->next != NULL &&
|
while(last_event->next != NULL &&
|
||||||
last_event->next->timerID != INVALID_TIMERID)
|
last_event->next->timerID != INVALID_TIMERID)
|
||||||
last_event = last_event->next;
|
last_event = last_event->next;
|
||||||
return last_event;
|
return last_event;
|
||||||
|
@ -66,7 +66,7 @@ static void insert_marker(struct pb_TimerSet* tset, enum pb_TimerID timer)
|
||||||
}
|
}
|
||||||
|
|
||||||
if(*new_event == NULL) {
|
if(*new_event == NULL) {
|
||||||
*new_event = (struct pb_async_time_marker_list *)
|
*new_event = (struct pb_async_time_marker_list *)
|
||||||
malloc(sizeof(struct pb_async_time_marker_list));
|
malloc(sizeof(struct pb_async_time_marker_list));
|
||||||
(*new_event)->marker = calloc(1, sizeof(cl_event));
|
(*new_event)->marker = calloc(1, sizeof(cl_event));
|
||||||
/*
|
/*
|
||||||
|
@ -106,7 +106,7 @@ static void insert_submarker(struct pb_TimerSet* tset, char *label, enum pb_Time
|
||||||
}
|
}
|
||||||
|
|
||||||
if(*new_event == NULL) {
|
if(*new_event == NULL) {
|
||||||
*new_event = (struct pb_async_time_marker_list *)
|
*new_event = (struct pb_async_time_marker_list *)
|
||||||
malloc(sizeof(struct pb_async_time_marker_list));
|
malloc(sizeof(struct pb_async_time_marker_list));
|
||||||
(*new_event)->marker = calloc(1, sizeof(cl_event));
|
(*new_event)->marker = calloc(1, sizeof(cl_event));
|
||||||
/*
|
/*
|
||||||
|
@ -143,12 +143,12 @@ static pb_Timestamp record_async_times(struct pb_TimerSet* tset)
|
||||||
struct pb_async_time_marker_list * last_marker = get_last_async(tset);
|
struct pb_async_time_marker_list * last_marker = get_last_async(tset);
|
||||||
pb_Timestamp total_async_time = 0;
|
pb_Timestamp total_async_time = 0;
|
||||||
enum pb_TimerID timer;
|
enum pb_TimerID timer;
|
||||||
|
|
||||||
for(next_interval = tset->async_markers; next_interval != last_marker;
|
for(next_interval = tset->async_markers; next_interval != last_marker;
|
||||||
next_interval = next_interval->next) {
|
next_interval = next_interval->next) {
|
||||||
cl_ulong command_start=0, command_end=0;
|
cl_ulong command_start=0, command_end=0;
|
||||||
cl_int ciErrNum = CL_SUCCESS;
|
cl_int ciErrNum = CL_SUCCESS;
|
||||||
|
|
||||||
ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->marker), CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &command_start, NULL);
|
ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->marker), CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &command_start, NULL);
|
||||||
if (ciErrNum != CL_SUCCESS) {
|
if (ciErrNum != CL_SUCCESS) {
|
||||||
fprintf(stderr, "Error getting first EventProfilingInfo: %d\n", ciErrNum);
|
fprintf(stderr, "Error getting first EventProfilingInfo: %d\n", ciErrNum);
|
||||||
|
@ -157,8 +157,8 @@ static pb_Timestamp record_async_times(struct pb_TimerSet* tset)
|
||||||
ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->next->marker), CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &command_end, NULL);
|
ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->next->marker), CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &command_end, NULL);
|
||||||
if (ciErrNum != CL_SUCCESS) {
|
if (ciErrNum != CL_SUCCESS) {
|
||||||
fprintf(stderr, "Error getting second EventProfilingInfo: %d\n", ciErrNum);
|
fprintf(stderr, "Error getting second EventProfilingInfo: %d\n", ciErrNum);
|
||||||
}
|
}
|
||||||
|
|
||||||
pb_Timestamp interval = (pb_Timestamp) (((double)(command_end - command_start)) / 1e3);
|
pb_Timestamp interval = (pb_Timestamp) (((double)(command_end - command_start)) / 1e3);
|
||||||
tset->timers[next_interval->timerID].elapsed += interval;
|
tset->timers[next_interval->timerID].elapsed += interval;
|
||||||
if (next_interval->label != NULL) {
|
if (next_interval->label != NULL) {
|
||||||
|
@ -169,15 +169,15 @@ static pb_Timestamp record_async_times(struct pb_TimerSet* tset)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
subtimer = subtimer->next;
|
subtimer = subtimer->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
total_async_time += interval;
|
total_async_time += interval;
|
||||||
next_interval->timerID = INVALID_TIMERID;
|
next_interval->timerID = INVALID_TIMERID;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(next_interval != NULL)
|
if(next_interval != NULL)
|
||||||
next_interval->timerID = INVALID_TIMERID;
|
next_interval->timerID = INVALID_TIMERID;
|
||||||
|
|
||||||
return total_async_time;
|
return total_async_time;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -268,11 +268,11 @@ pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer)
|
||||||
{
|
{
|
||||||
struct timeval tv;
|
struct timeval tv;
|
||||||
gettimeofday(&tv, NULL);
|
gettimeofday(&tv, NULL);
|
||||||
|
|
||||||
if (numNotStopped & 0x2) {
|
if (numNotStopped & 0x2) {
|
||||||
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (numNotStopped & 0x1) {
|
if (numNotStopped & 0x1) {
|
||||||
subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||||
}
|
}
|
||||||
|
@ -351,7 +351,7 @@ void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer)
|
||||||
accumulate_time(&timer->elapsed, timer->init, fini);
|
accumulate_time(&timer->elapsed, timer->init, fini);
|
||||||
timer->init = fini;
|
timer->init = fini;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (numNotRunning & 0x1) {
|
if (numNotRunning & 0x1) {
|
||||||
accumulate_time(&subtimer->elapsed, subtimer->init, fini);
|
accumulate_time(&subtimer->elapsed, subtimer->init, fini);
|
||||||
subtimer->init = fini;
|
subtimer->init = fini;
|
||||||
|
@ -391,7 +391,7 @@ pb_InitializeTimerSet(struct pb_TimerSet *timers)
|
||||||
timers->current = pb_TimerID_NONE;
|
timers->current = pb_TimerID_NONE;
|
||||||
|
|
||||||
timers->async_markers = NULL;
|
timers->async_markers = NULL;
|
||||||
|
|
||||||
for (n = 0; n < pb_TimerID_LAST; n++) {
|
for (n = 0; n < pb_TimerID_LAST; n++) {
|
||||||
pb_ResetTimer(&timers->timers[n]);
|
pb_ResetTimer(&timers->timers[n]);
|
||||||
timers->sub_timer_list[n] = NULL;
|
timers->sub_timer_list[n] = NULL;
|
||||||
|
@ -405,20 +405,20 @@ void pb_SetOpenCL(void *p_clContextPtr, void *p_clCommandQueuePtr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) {
|
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) {
|
||||||
/*#ifndef DISABLE_PARBOIL_TIMER
|
/*#ifndef DISABLE_PARBOIL_TIMER
|
||||||
|
|
||||||
struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc
|
struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc
|
||||||
(sizeof(struct pb_SubTimer));
|
(sizeof(struct pb_SubTimer));
|
||||||
|
|
||||||
int len = strlen(label);
|
int len = strlen(label);
|
||||||
|
|
||||||
subtimer->label = (char *) malloc (sizeof(char)*(len+1));
|
subtimer->label = (char *) malloc (sizeof(char)*(len+1));
|
||||||
sprintf(subtimer->label, "%s\0", label);
|
sprintf(subtimer->label, "%s\0", label);
|
||||||
|
|
||||||
pb_ResetTimer(&subtimer->timer);
|
pb_ResetTimer(&subtimer->timer);
|
||||||
subtimer->next = NULL;
|
subtimer->next = NULL;
|
||||||
|
|
||||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category];
|
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category];
|
||||||
if (subtimerlist == NULL) {
|
if (subtimerlist == NULL) {
|
||||||
subtimerlist = (struct pb_SubTimerList *) calloc
|
subtimerlist = (struct pb_SubTimerList *) calloc
|
||||||
|
@ -433,7 +433,7 @@ pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Categ
|
||||||
}
|
}
|
||||||
element->next = subtimer;
|
element->next = subtimer;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif*/
|
#endif*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -447,7 +447,7 @@ pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer)
|
||||||
if (timers->current != pb_TimerID_NONE) {
|
if (timers->current != pb_TimerID_NONE) {
|
||||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
||||||
struct pb_SubTimer *currSubTimer = (subtimerlist != NULL) ? subtimerlist->current : NULL;
|
struct pb_SubTimer *currSubTimer = (subtimerlist != NULL) ? subtimerlist->current : NULL;
|
||||||
|
|
||||||
if (!is_async(timers->current) ) {
|
if (!is_async(timers->current) ) {
|
||||||
if (timers->current != timer) {
|
if (timers->current != timer) {
|
||||||
if (currSubTimer != NULL) {
|
if (currSubTimer != NULL) {
|
||||||
|
@ -467,62 +467,62 @@ pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pb_Timestamp currentTime = 0; //get_time();
|
pb_Timestamp currentTime = 0; //get_time();
|
||||||
|
|
||||||
/* The only cases we check for asynchronous task completion is
|
/* The only cases we check for asynchronous task completion is
|
||||||
* when an overlapping CPU operation completes, or the next
|
* when an overlapping CPU operation completes, or the next
|
||||||
* segment blocks on completion of previous async operations */
|
* segment blocks on completion of previous async operations */
|
||||||
if( asyncs_outstanding(timers) &&
|
if( asyncs_outstanding(timers) &&
|
||||||
(!is_async(timers->current) || is_blocking(timer) ) ) {
|
(!is_async(timers->current) || is_blocking(timer) ) ) {
|
||||||
|
|
||||||
struct pb_async_time_marker_list * last_event = get_last_async(timers);
|
struct pb_async_time_marker_list * last_event = get_last_async(timers);
|
||||||
/* CL_COMPLETE if completed */
|
/* CL_COMPLETE if completed */
|
||||||
|
|
||||||
cl_int ciErrNum = CL_SUCCESS;
|
cl_int ciErrNum = CL_SUCCESS;
|
||||||
cl_int async_done = CL_COMPLETE;
|
cl_int async_done = CL_COMPLETE;
|
||||||
|
|
||||||
ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &async_done, NULL);
|
ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &async_done, NULL);
|
||||||
if (ciErrNum != CL_SUCCESS) {
|
if (ciErrNum != CL_SUCCESS) {
|
||||||
fprintf(stderr, "Error Querying EventInfo!\n");
|
fprintf(stderr, "Error Querying EventInfo!\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if(is_blocking(timer)) {
|
if(is_blocking(timer)) {
|
||||||
/* Async operations completed after previous CPU operations:
|
/* Async operations completed after previous CPU operations:
|
||||||
* overlapped time is the total CPU time since this set of async
|
* overlapped time is the total CPU time since this set of async
|
||||||
* operations were first issued */
|
* operations were first issued */
|
||||||
|
|
||||||
// timer to switch to is COPY or NONE
|
// timer to switch to is COPY or NONE
|
||||||
if(async_done != CL_COMPLETE) {
|
if(async_done != CL_COMPLETE) {
|
||||||
accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed),
|
accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed),
|
||||||
timers->async_begin,currentTime);
|
timers->async_begin,currentTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Wait on async operation completion */
|
/* Wait on async operation completion */
|
||||||
ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker);
|
ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker);
|
||||||
if (ciErrNum != CL_SUCCESS) {
|
if (ciErrNum != CL_SUCCESS) {
|
||||||
fprintf(stderr, "Error Waiting for Events!\n");
|
fprintf(stderr, "Error Waiting for Events!\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
pb_Timestamp total_async_time = record_async_times(timers);
|
pb_Timestamp total_async_time = record_async_times(timers);
|
||||||
|
|
||||||
/* Async operations completed before previous CPU operations:
|
/* Async operations completed before previous CPU operations:
|
||||||
* overlapped time is the total async time */
|
* overlapped time is the total async time */
|
||||||
if(async_done == CL_COMPLETE) {
|
if(async_done == CL_COMPLETE) {
|
||||||
//fprintf(stderr, "Async_done: total_async_type = %lld\n", total_async_time);
|
//fprintf(stderr, "Async_done: total_async_type = %lld\n", total_async_time);
|
||||||
timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time;
|
timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time;
|
||||||
}
|
}
|
||||||
|
|
||||||
} else
|
} else
|
||||||
/* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
|
/* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
|
||||||
// i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are outstanding
|
// i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are outstanding
|
||||||
// so something is deeper in stack
|
// so something is deeper in stack
|
||||||
if(async_done == CL_COMPLETE ) {
|
if(async_done == CL_COMPLETE ) {
|
||||||
/* Async operations completed before previous CPU operations:
|
/* Async operations completed before previous CPU operations:
|
||||||
* overlapped time is the total async time */
|
* overlapped time is the total async time */
|
||||||
timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers);
|
timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Start the new timer */
|
/* Start the new timer */
|
||||||
|
@ -533,15 +533,15 @@ pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer)
|
||||||
// toSwitchTo Is Async (KERNEL/COPY_ASYNC)
|
// toSwitchTo Is Async (KERNEL/COPY_ASYNC)
|
||||||
if (!asyncs_outstanding(timers)) {
|
if (!asyncs_outstanding(timers)) {
|
||||||
/* No asyncs outstanding, insert a fresh async marker */
|
/* No asyncs outstanding, insert a fresh async marker */
|
||||||
|
|
||||||
insert_marker(timers, timer);
|
insert_marker(timers, timer);
|
||||||
timers->async_begin = currentTime;
|
timers->async_begin = currentTime;
|
||||||
} else if(!is_async(timers->current)) {
|
} else if(!is_async(timers->current)) {
|
||||||
/* Previous asyncs still in flight, but a previous SwitchTo
|
/* Previous asyncs still in flight, but a previous SwitchTo
|
||||||
* already marked the end of the most recent async operation,
|
* already marked the end of the most recent async operation,
|
||||||
* so we can rename that marker as the beginning of this async
|
* so we can rename that marker as the beginning of this async
|
||||||
* operation */
|
* operation */
|
||||||
|
|
||||||
struct pb_async_time_marker_list * last_event = get_last_async(timers);
|
struct pb_async_time_marker_list * last_event = get_last_async(timers);
|
||||||
last_event->label = NULL;
|
last_event->label = NULL;
|
||||||
last_event->timerID = timer;
|
last_event->timerID = timer;
|
||||||
|
@ -558,13 +558,13 @@ pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category)
|
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
#ifndef DISABLE_PARBOIL_TIMER
|
#ifndef DISABLE_PARBOIL_TIMER
|
||||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
||||||
struct pb_SubTimer *curr = (subtimerlist != NULL) ? subtimerlist->current : NULL;
|
struct pb_SubTimer *curr = (subtimerlist != NULL) ? subtimerlist->current : NULL;
|
||||||
|
|
||||||
if (timers->current != pb_TimerID_NONE) {
|
if (timers->current != pb_TimerID_NONE) {
|
||||||
if (!is_async(timers->current) ) {
|
if (!is_async(timers->current) ) {
|
||||||
if (timers->current != category) {
|
if (timers->current != category) {
|
||||||
|
@ -588,10 +588,10 @@ pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID cat
|
||||||
|
|
||||||
pb_Timestamp currentTime = 0; //get_time();
|
pb_Timestamp currentTime = 0; //get_time();
|
||||||
|
|
||||||
/* The only cases we check for asynchronous task completion is
|
/* The only cases we check for asynchronous task completion is
|
||||||
* when an overlapping CPU operation completes, or the next
|
* when an overlapping CPU operation completes, or the next
|
||||||
* segment blocks on completion of previous async operations */
|
* segment blocks on completion of previous async operations */
|
||||||
if( asyncs_outstanding(timers) &&
|
if( asyncs_outstanding(timers) &&
|
||||||
(!is_async(timers->current) || is_blocking(category) ) ) {
|
(!is_async(timers->current) || is_blocking(category) ) ) {
|
||||||
|
|
||||||
struct pb_async_time_marker_list * last_event = get_last_async(timers);
|
struct pb_async_time_marker_list * last_event = get_last_async(timers);
|
||||||
|
@ -599,23 +599,23 @@ pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID cat
|
||||||
|
|
||||||
cl_int ciErrNum = CL_SUCCESS;
|
cl_int ciErrNum = CL_SUCCESS;
|
||||||
cl_int async_done = CL_COMPLETE;
|
cl_int async_done = CL_COMPLETE;
|
||||||
|
|
||||||
ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &async_done, NULL);
|
ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &async_done, NULL);
|
||||||
if (ciErrNum != CL_SUCCESS) {
|
if (ciErrNum != CL_SUCCESS) {
|
||||||
fprintf(stderr, "Error Querying EventInfo!\n");
|
fprintf(stderr, "Error Querying EventInfo!\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if(is_blocking(category)) {
|
if(is_blocking(category)) {
|
||||||
/* Async operations completed after previous CPU operations:
|
/* Async operations completed after previous CPU operations:
|
||||||
* overlapped time is the total CPU time since this set of async
|
* overlapped time is the total CPU time since this set of async
|
||||||
* operations were first issued */
|
* operations were first issued */
|
||||||
|
|
||||||
// timer to switch to is COPY or NONE
|
// timer to switch to is COPY or NONE
|
||||||
// if it hasn't already finished, then just take now and use that as the elapsed time in OVERLAP
|
// if it hasn't already finished, then just take now and use that as the elapsed time in OVERLAP
|
||||||
// anything happening after now isn't OVERLAP because everything is being stopped to wait for synchronization
|
// anything happening after now isn't OVERLAP because everything is being stopped to wait for synchronization
|
||||||
// it seems that the extra sync wall time isn't being recorded anywhere
|
// it seems that the extra sync wall time isn't being recorded anywhere
|
||||||
if(async_done != CL_COMPLETE)
|
if(async_done != CL_COMPLETE)
|
||||||
accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed),
|
accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed),
|
||||||
timers->async_begin,currentTime);
|
timers->async_begin,currentTime);
|
||||||
|
|
||||||
/* Wait on async operation completion */
|
/* Wait on async operation completion */
|
||||||
|
@ -625,28 +625,28 @@ pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID cat
|
||||||
}
|
}
|
||||||
pb_Timestamp total_async_time = record_async_times(timers);
|
pb_Timestamp total_async_time = record_async_times(timers);
|
||||||
|
|
||||||
/* Async operations completed before previous CPU operations:
|
/* Async operations completed before previous CPU operations:
|
||||||
* overlapped time is the total async time */
|
* overlapped time is the total async time */
|
||||||
// If it did finish, then accumulate all the async time that did happen into OVERLAP
|
// If it did finish, then accumulate all the async time that did happen into OVERLAP
|
||||||
// the immediately preceding EventSynchronize theoretically didn't have any effect since it was already completed.
|
// the immediately preceding EventSynchronize theoretically didn't have any effect since it was already completed.
|
||||||
if(async_done == CL_COMPLETE /*cudaSuccess*/)
|
if(async_done == CL_COMPLETE /*cudaSuccess*/)
|
||||||
timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time;
|
timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time;
|
||||||
|
|
||||||
} else
|
} else
|
||||||
/* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
|
/* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
|
||||||
// i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are outstanding
|
// i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are outstanding
|
||||||
// so something is deeper in stack
|
// so something is deeper in stack
|
||||||
if(async_done == CL_COMPLETE /*cudaSuccess*/) {
|
if(async_done == CL_COMPLETE /*cudaSuccess*/) {
|
||||||
/* Async operations completed before previous CPU operations:
|
/* Async operations completed before previous CPU operations:
|
||||||
* overlapped time is the total async time */
|
* overlapped time is the total async time */
|
||||||
timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers);
|
timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers);
|
||||||
}
|
}
|
||||||
// else, this isn't blocking, so just check the next time around
|
// else, this isn't blocking, so just check the next time around
|
||||||
}
|
}
|
||||||
|
|
||||||
subtimerlist = timers->sub_timer_list[category];
|
subtimerlist = timers->sub_timer_list[category];
|
||||||
struct pb_SubTimer *subtimer = NULL;
|
struct pb_SubTimer *subtimer = NULL;
|
||||||
|
|
||||||
if (label != NULL) {
|
if (label != NULL) {
|
||||||
subtimer = subtimerlist->subtimer_list;
|
subtimer = subtimerlist->subtimer_list;
|
||||||
while (subtimer != NULL) {
|
while (subtimer != NULL) {
|
||||||
|
@ -660,11 +660,11 @@ pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID cat
|
||||||
|
|
||||||
/* Start the new timer */
|
/* Start the new timer */
|
||||||
if (category != pb_TimerID_NONE) {
|
if (category != pb_TimerID_NONE) {
|
||||||
if(!is_async(category)) {
|
if(!is_async(category)) {
|
||||||
if (subtimerlist != NULL) {
|
if (subtimerlist != NULL) {
|
||||||
subtimerlist->current = subtimer;
|
subtimerlist->current = subtimer;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (category != timers->current && subtimer != NULL) {
|
if (category != timers->current && subtimer != NULL) {
|
||||||
pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer);
|
pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer);
|
||||||
} else if (subtimer != NULL) {
|
} else if (subtimer != NULL) {
|
||||||
|
@ -676,7 +676,7 @@ pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID cat
|
||||||
if (subtimerlist != NULL) {
|
if (subtimerlist != NULL) {
|
||||||
subtimerlist->current = subtimer;
|
subtimerlist->current = subtimer;
|
||||||
}
|
}
|
||||||
|
|
||||||
// toSwitchTo Is Async (KERNEL/COPY_ASYNC)
|
// toSwitchTo Is Async (KERNEL/COPY_ASYNC)
|
||||||
if (!asyncs_outstanding(timers)) {
|
if (!asyncs_outstanding(timers)) {
|
||||||
/* No asyncs outstanding, insert a fresh async marker */
|
/* No asyncs outstanding, insert a fresh async marker */
|
||||||
|
@ -684,22 +684,22 @@ pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID cat
|
||||||
timers->async_begin = currentTime;
|
timers->async_begin = currentTime;
|
||||||
} else if(!is_async(timers->current)) {
|
} else if(!is_async(timers->current)) {
|
||||||
/* Previous asyncs still in flight, but a previous SwitchTo
|
/* Previous asyncs still in flight, but a previous SwitchTo
|
||||||
* already marked the end of the most recent async operation,
|
* already marked the end of the most recent async operation,
|
||||||
* so we can rename that marker as the beginning of this async
|
* so we can rename that marker as the beginning of this async
|
||||||
* operation */
|
* operation */
|
||||||
|
|
||||||
struct pb_async_time_marker_list * last_event = get_last_async(timers);
|
struct pb_async_time_marker_list * last_event = get_last_async(timers);
|
||||||
last_event->timerID = category;
|
last_event->timerID = category;
|
||||||
last_event->label = label;
|
last_event->label = label;
|
||||||
} // else, marker for switchToThis was already inserted
|
} // else, marker for switchToThis was already inserted
|
||||||
|
|
||||||
//toSwitchto is already asynchronous, but if current/prev state is async too, then DRIVER is already running
|
//toSwitchto is already asynchronous, but if current/prev state is async too, then DRIVER is already running
|
||||||
if (!is_async(timers->current)) {
|
if (!is_async(timers->current)) {
|
||||||
pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]);
|
pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
timers->current = category;
|
timers->current = category;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -714,22 +714,22 @@ pb_PrintTimerSet(struct pb_TimerSet *timers)
|
||||||
|
|
||||||
struct pb_Timer *t = timers->timers;
|
struct pb_Timer *t = timers->timers;
|
||||||
struct pb_SubTimer* sub = NULL;
|
struct pb_SubTimer* sub = NULL;
|
||||||
|
|
||||||
int maxSubLength;
|
int maxSubLength;
|
||||||
|
|
||||||
const char *categories[] = {
|
const char *categories[] = {
|
||||||
"IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute"
|
"IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute"
|
||||||
};
|
};
|
||||||
|
|
||||||
const int maxCategoryLength = 10;
|
const int maxCategoryLength = 10;
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format
|
for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format
|
||||||
if(pb_GetElapsedTime(&t[i]) != 0) {
|
if(pb_GetElapsedTime(&t[i]) != 0) {
|
||||||
|
|
||||||
// Print Category Timer
|
// Print Category Timer
|
||||||
printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i]));
|
printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i]));
|
||||||
|
|
||||||
if (timers->sub_timer_list[i] != NULL) {
|
if (timers->sub_timer_list[i] != NULL) {
|
||||||
sub = timers->sub_timer_list[i]->subtimer_list;
|
sub = timers->sub_timer_list[i]->subtimer_list;
|
||||||
maxSubLength = 0;
|
maxSubLength = 0;
|
||||||
|
@ -740,14 +740,14 @@ pb_PrintTimerSet(struct pb_TimerSet *timers)
|
||||||
}
|
}
|
||||||
sub = sub->next;
|
sub = sub->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fit to Categories
|
// Fit to Categories
|
||||||
if (maxSubLength <= maxCategoryLength) {
|
if (maxSubLength <= maxCategoryLength) {
|
||||||
maxSubLength = maxCategoryLength;
|
maxSubLength = maxCategoryLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub = timers->sub_timer_list[i]->subtimer_list;
|
sub = timers->sub_timer_list[i]->subtimer_list;
|
||||||
|
|
||||||
// Print SubTimers
|
// Print SubTimers
|
||||||
while (sub != NULL) {
|
while (sub != NULL) {
|
||||||
printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer));
|
printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer));
|
||||||
|
@ -756,13 +756,13 @@ pb_PrintTimerSet(struct pb_TimerSet *timers)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0)
|
if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0)
|
||||||
printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]));
|
printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]));
|
||||||
|
|
||||||
float walltime = (wall_end - timers->wall_begin)/ 1e6;
|
float walltime = (wall_end - timers->wall_begin)/ 1e6;
|
||||||
printf("Timer Wall Time: %f\n", walltime);
|
printf("Timer Wall Time: %f\n", walltime);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -779,12 +779,12 @@ void pb_DestroyTimerSet(struct pb_TimerSet * timers)
|
||||||
if (ciErrNum != CL_SUCCESS) {
|
if (ciErrNum != CL_SUCCESS) {
|
||||||
//fprintf(stderr, "Error Waiting for Events!\n");
|
//fprintf(stderr, "Error Waiting for Events!\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
ciErrNum = clReleaseEvent( *((cl_event *)(event)->marker) );
|
ciErrNum = clReleaseEvent( *((cl_event *)(event)->marker) );
|
||||||
if (ciErrNum != CL_SUCCESS) {
|
if (ciErrNum != CL_SUCCESS) {
|
||||||
fprintf(stderr, "Error Release Events!\n");
|
fprintf(stderr, "Error Release Events!\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
free((event)->marker);
|
free((event)->marker);
|
||||||
struct pb_async_time_marker_list* next = ((event)->next);
|
struct pb_async_time_marker_list* next = ((event)->next);
|
||||||
|
|
||||||
|
@ -795,7 +795,7 @@ void pb_DestroyTimerSet(struct pb_TimerSet * timers)
|
||||||
}
|
}
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for(i = 0; i < pb_TimerID_LAST; ++i) {
|
for(i = 0; i < pb_TimerID_LAST; ++i) {
|
||||||
if (timers->sub_timer_list[i] != NULL) {
|
if (timers->sub_timer_list[i] != NULL) {
|
||||||
struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list;
|
struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list;
|
||||||
struct pb_SubTimer *prev = NULL;
|
struct pb_SubTimer *prev = NULL;
|
||||||
|
@ -854,7 +854,7 @@ pb_GetPlatforms() {
|
||||||
return (pb_Platform**) ptr;
|
return (pb_Platform**) ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
pb_Context*
|
pb_Context*
|
||||||
createContext(pb_Platform* pb_platform, pb_Device* pb_device) {
|
createContext(pb_Platform* pb_platform, pb_Device* pb_device) {
|
||||||
pb_Context* c = (pb_Context*) malloc(sizeof(pb_Context));
|
pb_Context* c = (pb_Context*) malloc(sizeof(pb_Context));
|
||||||
cl_int clStatus;
|
cl_int clStatus;
|
||||||
|
@ -928,7 +928,7 @@ pb_GetDevices(pb_Platform* pb_platform) {
|
||||||
pb_platform->devices[i]->name = (char *) name;
|
pb_platform->devices[i]->name = (char *) name;
|
||||||
|
|
||||||
cl_bool available;
|
cl_bool available;
|
||||||
clGetDeviceInfo(dev_ids[i], CL_DEVICE_AVAILABLE, sizeof(cl_bool), &available, NULL);
|
clGetDeviceInfo(dev_ids[i], CL_DEVICE_AVAILABLE, sizeof(cl_bool), &available, NULL);
|
||||||
pb_platform->devices[i]->available = (int) available;
|
pb_platform->devices[i]->available = (int) available;
|
||||||
|
|
||||||
pb_platform->devices[i]->in_use = 0;
|
pb_platform->devices[i]->in_use = 0;
|
||||||
|
@ -1219,16 +1219,16 @@ pb_InitOpenCLContext(struct pb_Parameters* parameters) {
|
||||||
|
|
||||||
_err = clGetPlatformIDs(1, &platform_id, NULL);
|
_err = clGetPlatformIDs(1, &platform_id, NULL);
|
||||||
if (_err != CL_SUCCESS) {
|
if (_err != CL_SUCCESS) {
|
||||||
fprintf(stderr, "Error querying platform!\n");
|
fprintf(stderr, "Error querying platform!\n");
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
_err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL);
|
_err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL);
|
||||||
if (_err != CL_SUCCESS) {
|
if (_err != CL_SUCCESS) {
|
||||||
fprintf(stderr, "Error querying device IDs!\n");
|
fprintf(stderr, "Error querying device IDs!\n");
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err);
|
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err);
|
||||||
if (_err != CL_SUCCESS) {
|
if (_err != CL_SUCCESS) {
|
||||||
fprintf(stderr, "Error Creating device context!\n");
|
fprintf(stderr, "Error Creating device context!\n");
|
||||||
|
@ -1240,15 +1240,15 @@ pb_InitOpenCLContext(struct pb_Parameters* parameters) {
|
||||||
c->clDeviceId = device_id;
|
c->clDeviceId = device_id;
|
||||||
c->clPlatformId = platform_id;
|
c->clPlatformId = platform_id;
|
||||||
c->pb_platform = (pb_Platform*)malloc(sizeof(pb_Platform));
|
c->pb_platform = (pb_Platform*)malloc(sizeof(pb_Platform));
|
||||||
c->pb_device = (pb_Device*)malloc(sizeof(pb_Device));
|
c->pb_device = (pb_Device*)malloc(sizeof(pb_Device));
|
||||||
c->pb_platform->devices = (pb_Device**)malloc(sizeof(pb_Device*) * 2);
|
c->pb_platform->devices = (pb_Device**)malloc(sizeof(pb_Device*) * 2);
|
||||||
c->pb_platform->devices[0] = c->pb_device;
|
c->pb_platform->devices[0] = c->pb_device;
|
||||||
c->pb_platform->devices[1] = NULL;
|
c->pb_platform->devices[1] = NULL;
|
||||||
c->pb_platform->contexts = (pb_Context**)malloc(sizeof(pb_Context*) * 2);
|
c->pb_platform->contexts = (pb_Context**)malloc(sizeof(pb_Context*) * 2);
|
||||||
c->pb_platform->contexts[0] = c;
|
c->pb_platform->contexts[0] = c;
|
||||||
c->pb_platform->contexts[1] = NULL;
|
c->pb_platform->contexts[1] = NULL;
|
||||||
c->pb_platform->in_use = 1;
|
c->pb_platform->in_use = 1;
|
||||||
c->pb_device->in_use = 1;
|
c->pb_device->in_use = 1;
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1270,7 +1270,7 @@ pb_PrintPlatformInfo(pb_Context* c) {
|
||||||
printf ("--------------------------------------------------------\n");
|
printf ("--------------------------------------------------------\n");
|
||||||
|
|
||||||
while (*ps) {
|
while (*ps) {
|
||||||
printf ("PLATFORM = %s, %s", (*ps)->name, (*ps)->version);
|
printf ("PLATFORM = %s, %s", (*ps)->name, (*ps)->version);
|
||||||
if (c->pb_platform == *ps) printf (" (SELECTED)");
|
if (c->pb_platform == *ps) printf (" (SELECTED)");
|
||||||
printf ("\n");
|
printf ("\n");
|
||||||
|
|
||||||
|
@ -1382,7 +1382,7 @@ void pb_sig_clmem(char* s, cl_command_queue command_queue, cl_mem memobj, int ty
|
||||||
printf ("Something wrong.\n");
|
printf ("Something wrong.\n");
|
||||||
assert(0);
|
assert(0);
|
||||||
} else {
|
} else {
|
||||||
printf ("size = %d\n", sz);
|
printf ("size = %ld\n", sz);
|
||||||
}
|
}
|
||||||
char* hp; // = (char*) malloc(sz);
|
char* hp; // = (char*) malloc(sz);
|
||||||
//posix_memalign((void**)&hp, 64, sz);
|
//posix_memalign((void**)&hp, 64, sz);
|
||||||
|
|
|
@ -139,7 +139,7 @@ static void cleanup() {
|
||||||
if (kernel_bin) free(kernel_bin);
|
if (kernel_bin) free(kernel_bin);
|
||||||
}
|
}
|
||||||
|
|
||||||
int size = 32;
|
uint32_t size = 32;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
printf("Usage: [-n size] [-h: help]\n");
|
printf("Usage: [-n size] [-h: help]\n");
|
||||||
|
|
|
@ -139,7 +139,7 @@ static void cleanup() {
|
||||||
if (kernel_bin) free(kernel_bin);
|
if (kernel_bin) free(kernel_bin);
|
||||||
}
|
}
|
||||||
|
|
||||||
int size = 16;
|
uint32_t size = 16;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
printf("Usage: [-n size] [-h: help]\n");
|
printf("Usage: [-n size] [-h: help]\n");
|
||||||
|
|
|
@ -139,8 +139,8 @@ static void cleanup() {
|
||||||
if (kernel_bin) free(kernel_bin);
|
if (kernel_bin) free(kernel_bin);
|
||||||
}
|
}
|
||||||
|
|
||||||
int size = 16;
|
uint32_t size = 16;
|
||||||
int tile_size = 8;
|
uint32_t tile_size = 8;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
printf("Usage: [-n size] [-t tile size] [-h: help]\n");
|
printf("Usage: [-n size] [-t tile size] [-h: help]\n");
|
||||||
|
|
|
@ -133,7 +133,7 @@ static void cleanup() {
|
||||||
if (kernel_bin) free(kernel_bin);
|
if (kernel_bin) free(kernel_bin);
|
||||||
}
|
}
|
||||||
|
|
||||||
int size = 64;
|
uint32_t size = 64;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
printf("Usage: [-n size] [-h: help]\n");
|
printf("Usage: [-n size] [-h: help]\n");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue