diff --git a/benchmarks/opencl/bfs/CLHelper.h b/benchmarks/opencl/bfs/CLHelper.h index 94536d7c8..6ac9c6091 100755 --- a/benchmarks/opencl/bfs/CLHelper.h +++ b/benchmarks/opencl/bfs/CLHelper.h @@ -233,7 +233,7 @@ free(allPlatforms);*/ //--cambine-4: Create an OpenCL command queue oclHandles.queue = clCreateCommandQueue( oclHandles.context, oclHandles.devices[DEVICE_ID_INUSED], 0, &resultCL); - printf("resultCL=%d, queue=0x%x\n", resultCL, oclHandles.queue); + //printf("resultCL=%d, queue=0x%x\n", resultCL, oclHandles.queue); if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL)) throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)")); @@ -383,8 +383,8 @@ void _clRelease() { errorFlag = true; } oclHandles.kernel[nKernel] = NULL; + printf("clReleaseKernel()\n"); } - oclHandles.kernel.clear(); } if (oclHandles.program != NULL) { @@ -394,6 +394,7 @@ void _clRelease() { errorFlag = true; } oclHandles.program = NULL; + printf("clReleaseProgram()\n"); } if (oclHandles.queue != NULL) { @@ -403,10 +404,9 @@ void _clRelease() { errorFlag = true; } oclHandles.queue = NULL; + printf("clReleaseCommandQueue()\n"); } - free(oclHandles.devices); - if (oclHandles.context != NULL) { cl_int resultCL = clReleaseContext(oclHandles.context); if (resultCL != CL_SUCCESS) { @@ -414,6 +414,17 @@ void _clRelease() { errorFlag = true; } oclHandles.context = NULL; + printf("clReleaseContext()\n"); + } + + if (oclHandles.devices != NULL) { + cl_int resultCL = clReleaseDevice(oclHandles.devices[0]); + if (resultCL != CL_SUCCESS) { + cerr << "ReleaseCL()::Error: In clReleaseDevice" << endl; + errorFlag = true; + } + free(oclHandles.devices); + printf("clReleaseDevice()\n"); } if (errorFlag) @@ -675,7 +686,7 @@ void _clFinish() throw(string) { void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(string) { cl_uint work_dim = WORK_DIM; - cl_event e[1]; + //cl_event e[1]; if (work_items % work_group_size != 0) // process situations that work_items // cannot be divided by work_group_size work_items = @@ -684,7 +695,7 @@ void _clInvokeKernel(int kernel_id, int work_items, size_t global_work_size[] = {work_items, 1}; oclHandles.cl_status = clEnqueueNDRangeKernel( oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, - global_work_size, local_work_size, 0, 0, &(e[0])); + global_work_size, local_work_size, 0, 0, NULL); #ifdef ERRMSG oclHandles.error_str = "excpetion in _clInvokeKernel() -> "; switch (oclHandles.cl_status) { @@ -749,13 +760,13 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, cl_uint work_dim = WORK_DIM; size_t local_work_size[] = {group_x, group_y}; size_t global_work_size[] = {range_x, range_y}; - cl_event e[1]; + //cl_event e[1]; /*if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size work_items = work_items + (work_group_size-(work_items%work_group_size));*/ oclHandles.cl_status = clEnqueueNDRangeKernel( oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, - global_work_size, local_work_size, 0, 0, &(e[0])); + global_work_size, local_work_size, 0, 0, NULL); #ifdef ERRMSG oclHandles.error_str = "excpetion in _clInvokeKernel() -> "; switch (oclHandles.cl_status) { diff --git a/benchmarks/opencl/bfs/main.cc b/benchmarks/opencl/bfs/main.cc index 701209d4e..1a1cf1d2b 100755 --- a/benchmarks/opencl/bfs/main.cc +++ b/benchmarks/opencl/bfs/main.cc @@ -78,14 +78,15 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, char h_over; cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, d_graph_visited, d_cost, d_over; + try { //--1 transfer data from host to device _clInit(); + d_graph_nodes = _clMalloc(no_of_nodes * sizeof(Node), h_graph_nodes); d_graph_edges = _clMalloc(edge_list_size * sizeof(int), h_graph_edges); d_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_graph_mask); - d_updating_graph_mask = - _clMallocRW(no_of_nodes * sizeof(char), h_updating_graph_mask); + d_updating_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_updating_graph_mask); d_graph_visited = _clMallocRW(no_of_nodes * sizeof(char), h_graph_visited); d_cost = _clMallocRW(no_of_nodes * sizeof(int), h_cost); @@ -94,8 +95,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, _clMemcpyH2D(d_graph_nodes, no_of_nodes * sizeof(Node), h_graph_nodes); _clMemcpyH2D(d_graph_edges, edge_list_size * sizeof(int), h_graph_edges); _clMemcpyH2D(d_graph_mask, no_of_nodes * sizeof(char), h_graph_mask); - _clMemcpyH2D(d_updating_graph_mask, no_of_nodes * sizeof(char), - h_updating_graph_mask); + _clMemcpyH2D(d_updating_graph_mask, no_of_nodes * sizeof(char), h_updating_graph_mask); _clMemcpyH2D(d_graph_visited, no_of_nodes * sizeof(char), h_graph_visited); _clMemcpyH2D(d_cost, no_of_nodes * sizeof(int), h_cost); @@ -106,6 +106,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, kernel_timer.reset(); kernel_timer.start(); #endif + do { h_over = false; _clMemcpyH2D(d_over, sizeof(char), &h_over); @@ -136,9 +137,8 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, _clInvokeKernel(kernel_id, no_of_nodes, work_group_size); _clMemcpyD2H(d_over, sizeof(char), &h_over); - } while (h_over); + } while (h_over); - _clFinish(); #ifdef PROFILING kernel_timer.stop(); kernel_time = kernel_timer.getTimeInSeconds(); diff --git a/benchmarks/opencl/bfs/util.h b/benchmarks/opencl/bfs/util.h index 425edfba5..b67abc0d7 100755 --- a/benchmarks/opencl/bfs/util.h +++ b/benchmarks/opencl/bfs/util.h @@ -60,10 +60,10 @@ void compare_results(const datatype *cpu_results, const datatype *gpu_results, c } } if (passed){ - std::cout << "--cambine:passed:-)" << endl; + std::cout << "--cambine: passed: -)" << endl; } else{ - std::cout << "--cambine: failed:-(" << endl; + std::cout << "--cambine: failed :-(" << endl; } return ; } diff --git a/benchmarks/opencl/guassian/clutils.cpp b/benchmarks/opencl/guassian/clutils.cpp index 37cabb7da..c977477a0 100755 --- a/benchmarks/opencl/guassian/clutils.cpp +++ b/benchmarks/opencl/guassian/clutils.cpp @@ -69,7 +69,7 @@ static cl_uint numPlatforms; //! All discoverable OpenCL devices (one pointer per platform) static cl_device_id* devices = NULL; -static cl_uint* numDevices; +static cl_uint* numDevices = NULL; //! The chosen OpenCL platform static cl_platform_id platform = NULL; @@ -88,7 +88,6 @@ static cl_command_queue commandQueueNoProf = NULL; //! Global status of events static bool eventsEnabled = false; - //------------------------------------------------------- // Initialization and Cleanup //------------------------------------------------------- @@ -239,14 +238,34 @@ static bool eventsEnabled = false; return context; }*/ +static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { + if (nullptr == filename || nullptr == data || 0 == size) + return -1; + + FILE* fp = fopen(filename, "r"); + if (NULL == fp) { + fprintf(stderr, "Failed to load kernel."); + return -1; + } + fseek(fp , 0 , SEEK_END); + long fsize = ftell(fp); + rewind(fp); + + *data = (uint8_t*)malloc(fsize); + *size = fread(*data, 1, fsize, fp); + + fclose(fp); + + return 0; +} + + cl_context cl_init_context(int platform, int dev,int quiet) { int printInfo=1; if (platform >= 0 && dev >= 0) printInfo = 0; cl_int status; // Used to iterate through the platforms and devices, respectively - cl_uint numPlatforms; - cl_uint numDevices; - + // These will hold the platform and device we select (can potentially be // multiple, but we're just doing one for now) // cl_platform_id platform = NULL; @@ -376,23 +395,24 @@ cl_context cl_init_context(int platform, int dev,int quiet) { // Getting platform and device information numPlatforms = 1; - numDevices = 1; - int platform_touse = 0; - int device_touse = 0; platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id)); - devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices); - status = clGetPlatformIDs(1, platforms, NULL); + numDevices = (cl_uint*)malloc(sizeof(cl_uint)*numPlatforms); + numDevices[0] = 1; + devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices[0]); + + int platform_touse = 0; + int device_touse = 0; + + status = clGetPlatformIDs(numPlatforms, platforms, NULL); cl_errChk(status, "Oops!", true); - status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, 1, devices, NULL); + status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, numDevices[0], devices, NULL); cl_errChk(status, "Oops!", true); - context = clCreateContext(NULL, 1, devices, NULL, NULL, &status); + context = clCreateContext(NULL, numDevices[0], devices, NULL, NULL, &status); cl_errChk(status, "Oops!", true); device=devices[device_touse]; -#define PROFILING - #ifdef PROFILING commandQueue = clCreateCommandQueue(context, @@ -400,7 +420,7 @@ cl_context cl_init_context(int platform, int dev,int quiet) { #else - clCommandQueue = clCreateCommandQueue(clGPUContext, + commandQueue = clCreateCommandQueue(context, devices[device_touse], NULL, &status); #endif // PROFILING @@ -413,22 +433,34 @@ cl_context cl_init_context(int platform, int dev,int quiet) { /*! Release all resources that the user doesn't have access to. */ -void cl_cleanup() +void cl_cleanup() { + cl_int status; + // Free the command queue - if(commandQueue) { - clReleaseCommandQueue(commandQueue); + if (commandQueue) { + status = clReleaseCommandQueue(commandQueue); + cl_errChk(status, "Oops!", true); + printf("clReleaseCommandQueue()\n"); } // Free the context - if(context) { - clReleaseContext(context); + if (context) { + status = clReleaseContext(context); + cl_errChk(status, "Oops!", true); + printf("clReleaseContext()\n"); + } + + for (int p = 0; p < numPlatforms; ++p) { + for (int d = 0; d < numDevices[p]; ++d) { + status = clReleaseDevice(devices[d]); + cl_errChk(status, "Oops!", true); + printf("clReleaseDevice()\n"); + } } free(devices); free(numDevices); - - // Free the platforms free(platforms); } @@ -443,6 +475,7 @@ void cl_freeKernel(cl_kernel kernel) if(kernel != NULL) { status = clReleaseKernel(kernel); cl_errChk(status, "Releasing kernel object", true); + printf("clReleaseKernel()\n"); } } @@ -457,6 +490,7 @@ void cl_freeMem(cl_mem mem) if(mem != NULL) { status = clReleaseMemObject(mem); cl_errChk(status, "Releasing mem object", true); + printf("clReleaseMemObject()\n"); } } @@ -471,6 +505,7 @@ void cl_freeProgram(cl_program program) if(program != NULL) { status = clReleaseProgram(program); cl_errChk(status, "Releasing program object", true); + printf("clReleaseProgram()\n"); } } @@ -782,27 +817,6 @@ void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size) cl_unmapBuffer(mem, ptr); } -static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} - //------------------------------------------------------- // Program and kernels //------------------------------------------------------- @@ -858,17 +872,17 @@ cl_program cl_compileProgram(char* kernelPath, char* compileoptions, bool verbos fread(source, 1, size, fp); source[size] = '\0';*/ - // Create the program object - //cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status); - //cl_program clProgramReturn = clCreateProgramWithBuiltInKernels(context, 1, &device, "Fan1;Fan2", &status); - // read kernel binary from file + // read kernel binary from file uint8_t *kernel_bin = NULL; size_t kernel_size; - cl_int binary_status = 0; - status = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size); - cl_errChk(status, "read_kernel_file", true); + cl_int binary_status = 0; + int err = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size); + cl_errChk(err, "read_kernel_file", true); + + // Create the program object + //cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status); cl_program clProgramReturn = clCreateProgramWithBinary( - context, 1, &device, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &status); + context, 1, devices, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &status); free(kernel_bin); cl_errChk(status, "Creating program", true); @@ -1440,4 +1454,4 @@ char* itoa_portable(int value, char* result, int base) { } return result; -} +} \ No newline at end of file diff --git a/benchmarks/opencl/guassian/main.cc b/benchmarks/opencl/guassian/main.cc index 4972a253c..d7e235eb2 100755 --- a/benchmarks/opencl/guassian/main.cc +++ b/benchmarks/opencl/guassian/main.cc @@ -76,6 +76,9 @@ int main(int argc, char *argv[]) { free(b); free(finalVec); // OpenClGaussianElimination(context,timing); + + cl_cleanup(); + printf("Passed!\n"); return 0; } @@ -142,7 +145,8 @@ void ForwardSub(cl_context context, float *a, float *b, float *m, int size, writeTime += eventTime(writeEvent, command_queue); clReleaseEvent(writeEvent); - error = clEnqueueWriteBuffer(command_queue, m_dev, + error = clEnqueueWriteBuffer(command_queue, + m_dev, 1, // change to 0 for nonblocking write 0, // offset sizeof(float) * size * size, m, 0, NULL, @@ -258,6 +262,13 @@ void ForwardSub(cl_context context, float *a, float *b, float *m, int size, printf("%f\n\n", writeTime + kernelTime + readTime); } + + cl_freeMem(a_dev); + cl_freeMem(b_dev); + cl_freeMem(m_dev); + cl_freeKernel(fan1_kernel); + cl_freeKernel(fan2_kernel); + cl_freeProgram(gaussianElim_program); } float eventTime(cl_event event, cl_command_queue command_queue) { diff --git a/benchmarks/opencl/nearn/clutils.cpp b/benchmarks/opencl/nearn/clutils.cpp index 3c433c0cf..c977477a0 100755 --- a/benchmarks/opencl/nearn/clutils.cpp +++ b/benchmarks/opencl/nearn/clutils.cpp @@ -69,7 +69,7 @@ static cl_uint numPlatforms; //! All discoverable OpenCL devices (one pointer per platform) static cl_device_id* devices = NULL; -static cl_uint* numDevices; +static cl_uint* numDevices = NULL; //! The chosen OpenCL platform static cl_platform_id platform = NULL; @@ -265,9 +265,7 @@ cl_context cl_init_context(int platform, int dev,int quiet) { if (platform >= 0 && dev >= 0) printInfo = 0; cl_int status; // Used to iterate through the platforms and devices, respectively - cl_uint numPlatforms; - cl_uint numDevices; - + // These will hold the platform and device we select (can potentially be // multiple, but we're just doing one for now) // cl_platform_id platform = NULL; @@ -397,23 +395,24 @@ cl_context cl_init_context(int platform, int dev,int quiet) { // Getting platform and device information numPlatforms = 1; - numDevices = 1; - int platform_touse = 0; - int device_touse = 0; platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id)); - devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices); - status = clGetPlatformIDs(1, platforms, NULL); + numDevices = (cl_uint*)malloc(sizeof(cl_uint)*numPlatforms); + numDevices[0] = 1; + devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices[0]); + + int platform_touse = 0; + int device_touse = 0; + + status = clGetPlatformIDs(numPlatforms, platforms, NULL); cl_errChk(status, "Oops!", true); - status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, 1, devices, NULL); + status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, numDevices[0], devices, NULL); cl_errChk(status, "Oops!", true); - context = clCreateContext(NULL, 1, devices, NULL, NULL, &status); + context = clCreateContext(NULL, numDevices[0], devices, NULL, NULL, &status); cl_errChk(status, "Oops!", true); device=devices[device_touse]; -#define PROFILING - #ifdef PROFILING commandQueue = clCreateCommandQueue(context, @@ -421,7 +420,7 @@ cl_context cl_init_context(int platform, int dev,int quiet) { #else - clCommandQueue = clCreateCommandQueue(clGPUContext, + commandQueue = clCreateCommandQueue(context, devices[device_touse], NULL, &status); #endif // PROFILING @@ -434,22 +433,34 @@ cl_context cl_init_context(int platform, int dev,int quiet) { /*! Release all resources that the user doesn't have access to. */ -void cl_cleanup() +void cl_cleanup() { + cl_int status; + // Free the command queue - if(commandQueue) { - clReleaseCommandQueue(commandQueue); + if (commandQueue) { + status = clReleaseCommandQueue(commandQueue); + cl_errChk(status, "Oops!", true); + printf("clReleaseCommandQueue()\n"); } // Free the context - if(context) { - clReleaseContext(context); + if (context) { + status = clReleaseContext(context); + cl_errChk(status, "Oops!", true); + printf("clReleaseContext()\n"); + } + + for (int p = 0; p < numPlatforms; ++p) { + for (int d = 0; d < numDevices[p]; ++d) { + status = clReleaseDevice(devices[d]); + cl_errChk(status, "Oops!", true); + printf("clReleaseDevice()\n"); + } } free(devices); free(numDevices); - - // Free the platforms free(platforms); } @@ -464,6 +475,7 @@ void cl_freeKernel(cl_kernel kernel) if(kernel != NULL) { status = clReleaseKernel(kernel); cl_errChk(status, "Releasing kernel object", true); + printf("clReleaseKernel()\n"); } } @@ -478,6 +490,7 @@ void cl_freeMem(cl_mem mem) if(mem != NULL) { status = clReleaseMemObject(mem); cl_errChk(status, "Releasing mem object", true); + printf("clReleaseMemObject()\n"); } } @@ -492,6 +505,7 @@ void cl_freeProgram(cl_program program) if(program != NULL) { status = clReleaseProgram(program); cl_errChk(status, "Releasing program object", true); + printf("clReleaseProgram()\n"); } } diff --git a/benchmarks/opencl/nearn/main.cc b/benchmarks/opencl/nearn/main.cc index 62d08c582..43ce16343 100755 --- a/benchmarks/opencl/nearn/main.cc +++ b/benchmarks/opencl/nearn/main.cc @@ -49,25 +49,27 @@ int main(int argc, char *argv[]) { printf("%s --> Distance=%f\n", records[i].recString, records[i].distance); } free(recordDistances); + + cl_cleanup(); + printf("Passed!\n"); + return 0; } float *OpenClFindNearestNeighbors(cl_context context, int numRecords, std::vector &locations, float lat, float lng, int timing) { - - // 1. set up kernel - cl_kernel NN_kernel; cl_int status; + + // 1. set up kernel + cl_kernel NN_kernel; cl_program cl_NN_program; cl_NN_program = cl_compileProgram((char *)"nearestNeighbor_kernel.cl", NULL); NN_kernel = clCreateKernel(cl_NN_program, "NearestNeighbor", &status); - status = - cl_errChk(status, (char *)"Error Creating Nearest Neighbor kernel", true); - if (status) - exit(1); + cl_errChk(status, (char *)"Error Creating Nearest Neighbor kernel", true); + // 2. set up memory on device and send ipts data to device // copy ipts(1,2) to device // also need to alloate memory for the distancePoints @@ -78,9 +80,11 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords, d_locations = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(LatLong) * numRecords, NULL, &error); + cl_errChk(error, "ERROR: clCreateBuffer() failed", true); d_distances = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * numRecords, NULL, &error); + cl_errChk(error, "ERROR: clCreateBuffer() failed", true); cl_command_queue command_queue = cl_getCommandQueue(); cl_event writeEvent, kernelEvent, readEvent; @@ -89,6 +93,7 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords, 0, // offset sizeof(LatLong) * numRecords, &locations[0], 0, NULL, &writeEvent); + cl_errChk(error, "ERROR: clEnqueueWriteBuffer() failed", true); // 3. send arguments to device cl_int argchk; @@ -124,8 +129,10 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords, &readEvent); cl_errChk(error, "ERROR with clEnqueueReadBuffer", true); - if (timing) { - clFinish(command_queue); + + clFinish(command_queue); + + if (timing) { cl_ulong eventStart, eventEnd, totalTime = 0; printf("# Records\tWrite(s) [size]\t\tKernel(s)\tRead(s) " "[size]\t\tTotal(s)\n"); @@ -166,8 +173,14 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords, printf("%f\n\n", (float)(totalTime / 1e9)); } // 6. return finalized data and release buffers - clReleaseMemObject(d_locations); - clReleaseMemObject(d_distances); + clReleaseEvent(writeEvent); + clReleaseEvent(kernelEvent); + clReleaseEvent(readEvent); + cl_freeMem(d_locations); + cl_freeMem(d_distances); + cl_freeKernel(NN_kernel); + cl_freeProgram(cl_NN_program); + return distances; } diff --git a/benchmarks/opencl/saxpy/main.cc b/benchmarks/opencl/saxpy/main.cc index d4076d70c..cf090486f 100644 --- a/benchmarks/opencl/saxpy/main.cc +++ b/benchmarks/opencl/saxpy/main.cc @@ -157,7 +157,7 @@ int main(int argc, char **argv) { context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err)); cl_command_queue queue; - queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &_err)); + queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, NULL, &_err)); cl_kernel kernel = 0; cl_mem memObjects[2] = {0, 0};