mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 13:57:17 -04:00
opencl benchmark fixes
This commit is contained in:
parent
8940183c10
commit
6ef0c99389
8 changed files with 165 additions and 102 deletions
|
@ -233,7 +233,7 @@ free(allPlatforms);*/
|
|||
//--cambine-4: Create an OpenCL command queue
|
||||
oclHandles.queue = clCreateCommandQueue(
|
||||
oclHandles.context, oclHandles.devices[DEVICE_ID_INUSED], 0, &resultCL);
|
||||
printf("resultCL=%d, queue=0x%x\n", resultCL, oclHandles.queue);
|
||||
//printf("resultCL=%d, queue=0x%x\n", resultCL, oclHandles.queue);
|
||||
|
||||
if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))
|
||||
throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
|
||||
|
@ -383,8 +383,8 @@ void _clRelease() {
|
|||
errorFlag = true;
|
||||
}
|
||||
oclHandles.kernel[nKernel] = NULL;
|
||||
printf("clReleaseKernel()\n");
|
||||
}
|
||||
oclHandles.kernel.clear();
|
||||
}
|
||||
|
||||
if (oclHandles.program != NULL) {
|
||||
|
@ -394,6 +394,7 @@ void _clRelease() {
|
|||
errorFlag = true;
|
||||
}
|
||||
oclHandles.program = NULL;
|
||||
printf("clReleaseProgram()\n");
|
||||
}
|
||||
|
||||
if (oclHandles.queue != NULL) {
|
||||
|
@ -403,10 +404,9 @@ void _clRelease() {
|
|||
errorFlag = true;
|
||||
}
|
||||
oclHandles.queue = NULL;
|
||||
printf("clReleaseCommandQueue()\n");
|
||||
}
|
||||
|
||||
free(oclHandles.devices);
|
||||
|
||||
if (oclHandles.context != NULL) {
|
||||
cl_int resultCL = clReleaseContext(oclHandles.context);
|
||||
if (resultCL != CL_SUCCESS) {
|
||||
|
@ -414,6 +414,17 @@ void _clRelease() {
|
|||
errorFlag = true;
|
||||
}
|
||||
oclHandles.context = NULL;
|
||||
printf("clReleaseContext()\n");
|
||||
}
|
||||
|
||||
if (oclHandles.devices != NULL) {
|
||||
cl_int resultCL = clReleaseDevice(oclHandles.devices[0]);
|
||||
if (resultCL != CL_SUCCESS) {
|
||||
cerr << "ReleaseCL()::Error: In clReleaseDevice" << endl;
|
||||
errorFlag = true;
|
||||
}
|
||||
free(oclHandles.devices);
|
||||
printf("clReleaseDevice()\n");
|
||||
}
|
||||
|
||||
if (errorFlag)
|
||||
|
@ -675,7 +686,7 @@ void _clFinish() throw(string) {
|
|||
void _clInvokeKernel(int kernel_id, int work_items,
|
||||
int work_group_size) throw(string) {
|
||||
cl_uint work_dim = WORK_DIM;
|
||||
cl_event e[1];
|
||||
//cl_event e[1];
|
||||
if (work_items % work_group_size != 0) // process situations that work_items
|
||||
// cannot be divided by work_group_size
|
||||
work_items =
|
||||
|
@ -684,7 +695,7 @@ void _clInvokeKernel(int kernel_id, int work_items,
|
|||
size_t global_work_size[] = {work_items, 1};
|
||||
oclHandles.cl_status = clEnqueueNDRangeKernel(
|
||||
oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0,
|
||||
global_work_size, local_work_size, 0, 0, &(e[0]));
|
||||
global_work_size, local_work_size, 0, 0, NULL);
|
||||
#ifdef ERRMSG
|
||||
oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
|
||||
switch (oclHandles.cl_status) {
|
||||
|
@ -749,13 +760,13 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x,
|
|||
cl_uint work_dim = WORK_DIM;
|
||||
size_t local_work_size[] = {group_x, group_y};
|
||||
size_t global_work_size[] = {range_x, range_y};
|
||||
cl_event e[1];
|
||||
//cl_event e[1];
|
||||
/*if(work_items%work_group_size != 0) //process situations that work_items
|
||||
cannot be divided by work_group_size
|
||||
work_items = work_items + (work_group_size-(work_items%work_group_size));*/
|
||||
oclHandles.cl_status = clEnqueueNDRangeKernel(
|
||||
oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0,
|
||||
global_work_size, local_work_size, 0, 0, &(e[0]));
|
||||
global_work_size, local_work_size, 0, 0, NULL);
|
||||
#ifdef ERRMSG
|
||||
oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
|
||||
switch (oclHandles.cl_status) {
|
||||
|
|
|
@ -78,14 +78,15 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
|
|||
char h_over;
|
||||
cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask,
|
||||
d_graph_visited, d_cost, d_over;
|
||||
|
||||
try {
|
||||
//--1 transfer data from host to device
|
||||
_clInit();
|
||||
|
||||
d_graph_nodes = _clMalloc(no_of_nodes * sizeof(Node), h_graph_nodes);
|
||||
d_graph_edges = _clMalloc(edge_list_size * sizeof(int), h_graph_edges);
|
||||
d_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_graph_mask);
|
||||
d_updating_graph_mask =
|
||||
_clMallocRW(no_of_nodes * sizeof(char), h_updating_graph_mask);
|
||||
d_updating_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_updating_graph_mask);
|
||||
d_graph_visited = _clMallocRW(no_of_nodes * sizeof(char), h_graph_visited);
|
||||
|
||||
d_cost = _clMallocRW(no_of_nodes * sizeof(int), h_cost);
|
||||
|
@ -94,8 +95,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
|
|||
_clMemcpyH2D(d_graph_nodes, no_of_nodes * sizeof(Node), h_graph_nodes);
|
||||
_clMemcpyH2D(d_graph_edges, edge_list_size * sizeof(int), h_graph_edges);
|
||||
_clMemcpyH2D(d_graph_mask, no_of_nodes * sizeof(char), h_graph_mask);
|
||||
_clMemcpyH2D(d_updating_graph_mask, no_of_nodes * sizeof(char),
|
||||
h_updating_graph_mask);
|
||||
_clMemcpyH2D(d_updating_graph_mask, no_of_nodes * sizeof(char), h_updating_graph_mask);
|
||||
_clMemcpyH2D(d_graph_visited, no_of_nodes * sizeof(char), h_graph_visited);
|
||||
_clMemcpyH2D(d_cost, no_of_nodes * sizeof(int), h_cost);
|
||||
|
||||
|
@ -106,6 +106,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
|
|||
kernel_timer.reset();
|
||||
kernel_timer.start();
|
||||
#endif
|
||||
|
||||
do {
|
||||
h_over = false;
|
||||
_clMemcpyH2D(d_over, sizeof(char), &h_over);
|
||||
|
@ -136,9 +137,8 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
|
|||
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
|
||||
|
||||
_clMemcpyD2H(d_over, sizeof(char), &h_over);
|
||||
} while (h_over);
|
||||
} while (h_over);
|
||||
|
||||
_clFinish();
|
||||
#ifdef PROFILING
|
||||
kernel_timer.stop();
|
||||
kernel_time = kernel_timer.getTimeInSeconds();
|
||||
|
|
|
@ -60,10 +60,10 @@ void compare_results(const datatype *cpu_results, const datatype *gpu_results, c
|
|||
}
|
||||
}
|
||||
if (passed){
|
||||
std::cout << "--cambine:passed:-)" << endl;
|
||||
std::cout << "--cambine: passed: -)" << endl;
|
||||
}
|
||||
else{
|
||||
std::cout << "--cambine: failed:-(" << endl;
|
||||
std::cout << "--cambine: failed :-(" << endl;
|
||||
}
|
||||
return ;
|
||||
}
|
||||
|
|
|
@ -69,7 +69,7 @@ static cl_uint numPlatforms;
|
|||
|
||||
//! All discoverable OpenCL devices (one pointer per platform)
|
||||
static cl_device_id* devices = NULL;
|
||||
static cl_uint* numDevices;
|
||||
static cl_uint* numDevices = NULL;
|
||||
|
||||
//! The chosen OpenCL platform
|
||||
static cl_platform_id platform = NULL;
|
||||
|
@ -88,7 +88,6 @@ static cl_command_queue commandQueueNoProf = NULL;
|
|||
//! Global status of events
|
||||
static bool eventsEnabled = false;
|
||||
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Initialization and Cleanup
|
||||
//-------------------------------------------------------
|
||||
|
@ -239,14 +238,34 @@ static bool eventsEnabled = false;
|
|||
return context;
|
||||
}*/
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return -1;
|
||||
}
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
cl_context cl_init_context(int platform, int dev,int quiet) {
|
||||
int printInfo=1;
|
||||
if (platform >= 0 && dev >= 0) printInfo = 0;
|
||||
cl_int status;
|
||||
// Used to iterate through the platforms and devices, respectively
|
||||
cl_uint numPlatforms;
|
||||
cl_uint numDevices;
|
||||
|
||||
|
||||
// These will hold the platform and device we select (can potentially be
|
||||
// multiple, but we're just doing one for now)
|
||||
// cl_platform_id platform = NULL;
|
||||
|
@ -376,23 +395,24 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
|
|||
// Getting platform and device information
|
||||
|
||||
numPlatforms = 1;
|
||||
numDevices = 1;
|
||||
int platform_touse = 0;
|
||||
int device_touse = 0;
|
||||
platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id));
|
||||
devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices);
|
||||
|
||||
status = clGetPlatformIDs(1, platforms, NULL);
|
||||
numDevices = (cl_uint*)malloc(sizeof(cl_uint)*numPlatforms);
|
||||
numDevices[0] = 1;
|
||||
devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices[0]);
|
||||
|
||||
int platform_touse = 0;
|
||||
int device_touse = 0;
|
||||
|
||||
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, 1, devices, NULL);
|
||||
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, numDevices[0], devices, NULL);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
context = clCreateContext(NULL, 1, devices, NULL, NULL, &status);
|
||||
context = clCreateContext(NULL, numDevices[0], devices, NULL, NULL, &status);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
|
||||
device=devices[device_touse];
|
||||
|
||||
#define PROFILING
|
||||
|
||||
#ifdef PROFILING
|
||||
|
||||
commandQueue = clCreateCommandQueue(context,
|
||||
|
@ -400,7 +420,7 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
|
|||
|
||||
#else
|
||||
|
||||
clCommandQueue = clCreateCommandQueue(clGPUContext,
|
||||
commandQueue = clCreateCommandQueue(context,
|
||||
devices[device_touse], NULL, &status);
|
||||
|
||||
#endif // PROFILING
|
||||
|
@ -413,22 +433,34 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
|
|||
/*!
|
||||
Release all resources that the user doesn't have access to.
|
||||
*/
|
||||
void cl_cleanup()
|
||||
void cl_cleanup()
|
||||
{
|
||||
cl_int status;
|
||||
|
||||
// Free the command queue
|
||||
if(commandQueue) {
|
||||
clReleaseCommandQueue(commandQueue);
|
||||
if (commandQueue) {
|
||||
status = clReleaseCommandQueue(commandQueue);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
printf("clReleaseCommandQueue()\n");
|
||||
}
|
||||
|
||||
// Free the context
|
||||
if(context) {
|
||||
clReleaseContext(context);
|
||||
if (context) {
|
||||
status = clReleaseContext(context);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
printf("clReleaseContext()\n");
|
||||
}
|
||||
|
||||
for (int p = 0; p < numPlatforms; ++p) {
|
||||
for (int d = 0; d < numDevices[p]; ++d) {
|
||||
status = clReleaseDevice(devices[d]);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
printf("clReleaseDevice()\n");
|
||||
}
|
||||
}
|
||||
|
||||
free(devices);
|
||||
free(numDevices);
|
||||
|
||||
// Free the platforms
|
||||
free(platforms);
|
||||
}
|
||||
|
||||
|
@ -443,6 +475,7 @@ void cl_freeKernel(cl_kernel kernel)
|
|||
if(kernel != NULL) {
|
||||
status = clReleaseKernel(kernel);
|
||||
cl_errChk(status, "Releasing kernel object", true);
|
||||
printf("clReleaseKernel()\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -457,6 +490,7 @@ void cl_freeMem(cl_mem mem)
|
|||
if(mem != NULL) {
|
||||
status = clReleaseMemObject(mem);
|
||||
cl_errChk(status, "Releasing mem object", true);
|
||||
printf("clReleaseMemObject()\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -471,6 +505,7 @@ void cl_freeProgram(cl_program program)
|
|||
if(program != NULL) {
|
||||
status = clReleaseProgram(program);
|
||||
cl_errChk(status, "Releasing program object", true);
|
||||
printf("clReleaseProgram()\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -782,27 +817,6 @@ void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size)
|
|||
cl_unmapBuffer(mem, ptr);
|
||||
}
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return -1;
|
||||
}
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Program and kernels
|
||||
//-------------------------------------------------------
|
||||
|
@ -858,17 +872,17 @@ cl_program cl_compileProgram(char* kernelPath, char* compileoptions, bool verbos
|
|||
fread(source, 1, size, fp);
|
||||
source[size] = '\0';*/
|
||||
|
||||
// Create the program object
|
||||
//cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status);
|
||||
//cl_program clProgramReturn = clCreateProgramWithBuiltInKernels(context, 1, &device, "Fan1;Fan2", &status);
|
||||
// read kernel binary from file
|
||||
// read kernel binary from file
|
||||
uint8_t *kernel_bin = NULL;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status = 0;
|
||||
status = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size);
|
||||
cl_errChk(status, "read_kernel_file", true);
|
||||
cl_int binary_status = 0;
|
||||
int err = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size);
|
||||
cl_errChk(err, "read_kernel_file", true);
|
||||
|
||||
// Create the program object
|
||||
//cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status);
|
||||
cl_program clProgramReturn = clCreateProgramWithBinary(
|
||||
context, 1, &device, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &status);
|
||||
context, 1, devices, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &status);
|
||||
free(kernel_bin);
|
||||
cl_errChk(status, "Creating program", true);
|
||||
|
||||
|
@ -1440,4 +1454,4 @@ char* itoa_portable(int value, char* result, int base) {
|
|||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -76,6 +76,9 @@ int main(int argc, char *argv[]) {
|
|||
free(b);
|
||||
free(finalVec);
|
||||
// OpenClGaussianElimination(context,timing);
|
||||
|
||||
cl_cleanup();
|
||||
|
||||
printf("Passed!\n");
|
||||
return 0;
|
||||
}
|
||||
|
@ -142,7 +145,8 @@ void ForwardSub(cl_context context, float *a, float *b, float *m, int size,
|
|||
writeTime += eventTime(writeEvent, command_queue);
|
||||
clReleaseEvent(writeEvent);
|
||||
|
||||
error = clEnqueueWriteBuffer(command_queue, m_dev,
|
||||
error = clEnqueueWriteBuffer(command_queue,
|
||||
m_dev,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * size * size, m, 0, NULL,
|
||||
|
@ -258,6 +262,13 @@ void ForwardSub(cl_context context, float *a, float *b, float *m, int size,
|
|||
|
||||
printf("%f\n\n", writeTime + kernelTime + readTime);
|
||||
}
|
||||
|
||||
cl_freeMem(a_dev);
|
||||
cl_freeMem(b_dev);
|
||||
cl_freeMem(m_dev);
|
||||
cl_freeKernel(fan1_kernel);
|
||||
cl_freeKernel(fan2_kernel);
|
||||
cl_freeProgram(gaussianElim_program);
|
||||
}
|
||||
|
||||
float eventTime(cl_event event, cl_command_queue command_queue) {
|
||||
|
|
|
@ -69,7 +69,7 @@ static cl_uint numPlatforms;
|
|||
|
||||
//! All discoverable OpenCL devices (one pointer per platform)
|
||||
static cl_device_id* devices = NULL;
|
||||
static cl_uint* numDevices;
|
||||
static cl_uint* numDevices = NULL;
|
||||
|
||||
//! The chosen OpenCL platform
|
||||
static cl_platform_id platform = NULL;
|
||||
|
@ -265,9 +265,7 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
|
|||
if (platform >= 0 && dev >= 0) printInfo = 0;
|
||||
cl_int status;
|
||||
// Used to iterate through the platforms and devices, respectively
|
||||
cl_uint numPlatforms;
|
||||
cl_uint numDevices;
|
||||
|
||||
|
||||
// These will hold the platform and device we select (can potentially be
|
||||
// multiple, but we're just doing one for now)
|
||||
// cl_platform_id platform = NULL;
|
||||
|
@ -397,23 +395,24 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
|
|||
// Getting platform and device information
|
||||
|
||||
numPlatforms = 1;
|
||||
numDevices = 1;
|
||||
int platform_touse = 0;
|
||||
int device_touse = 0;
|
||||
platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id));
|
||||
devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices);
|
||||
|
||||
status = clGetPlatformIDs(1, platforms, NULL);
|
||||
numDevices = (cl_uint*)malloc(sizeof(cl_uint)*numPlatforms);
|
||||
numDevices[0] = 1;
|
||||
devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices[0]);
|
||||
|
||||
int platform_touse = 0;
|
||||
int device_touse = 0;
|
||||
|
||||
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, 1, devices, NULL);
|
||||
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, numDevices[0], devices, NULL);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
context = clCreateContext(NULL, 1, devices, NULL, NULL, &status);
|
||||
context = clCreateContext(NULL, numDevices[0], devices, NULL, NULL, &status);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
|
||||
device=devices[device_touse];
|
||||
|
||||
#define PROFILING
|
||||
|
||||
#ifdef PROFILING
|
||||
|
||||
commandQueue = clCreateCommandQueue(context,
|
||||
|
@ -421,7 +420,7 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
|
|||
|
||||
#else
|
||||
|
||||
clCommandQueue = clCreateCommandQueue(clGPUContext,
|
||||
commandQueue = clCreateCommandQueue(context,
|
||||
devices[device_touse], NULL, &status);
|
||||
|
||||
#endif // PROFILING
|
||||
|
@ -434,22 +433,34 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
|
|||
/*!
|
||||
Release all resources that the user doesn't have access to.
|
||||
*/
|
||||
void cl_cleanup()
|
||||
void cl_cleanup()
|
||||
{
|
||||
cl_int status;
|
||||
|
||||
// Free the command queue
|
||||
if(commandQueue) {
|
||||
clReleaseCommandQueue(commandQueue);
|
||||
if (commandQueue) {
|
||||
status = clReleaseCommandQueue(commandQueue);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
printf("clReleaseCommandQueue()\n");
|
||||
}
|
||||
|
||||
// Free the context
|
||||
if(context) {
|
||||
clReleaseContext(context);
|
||||
if (context) {
|
||||
status = clReleaseContext(context);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
printf("clReleaseContext()\n");
|
||||
}
|
||||
|
||||
for (int p = 0; p < numPlatforms; ++p) {
|
||||
for (int d = 0; d < numDevices[p]; ++d) {
|
||||
status = clReleaseDevice(devices[d]);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
printf("clReleaseDevice()\n");
|
||||
}
|
||||
}
|
||||
|
||||
free(devices);
|
||||
free(numDevices);
|
||||
|
||||
// Free the platforms
|
||||
free(platforms);
|
||||
}
|
||||
|
||||
|
@ -464,6 +475,7 @@ void cl_freeKernel(cl_kernel kernel)
|
|||
if(kernel != NULL) {
|
||||
status = clReleaseKernel(kernel);
|
||||
cl_errChk(status, "Releasing kernel object", true);
|
||||
printf("clReleaseKernel()\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -478,6 +490,7 @@ void cl_freeMem(cl_mem mem)
|
|||
if(mem != NULL) {
|
||||
status = clReleaseMemObject(mem);
|
||||
cl_errChk(status, "Releasing mem object", true);
|
||||
printf("clReleaseMemObject()\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -492,6 +505,7 @@ void cl_freeProgram(cl_program program)
|
|||
if(program != NULL) {
|
||||
status = clReleaseProgram(program);
|
||||
cl_errChk(status, "Releasing program object", true);
|
||||
printf("clReleaseProgram()\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -49,25 +49,27 @@ int main(int argc, char *argv[]) {
|
|||
printf("%s --> Distance=%f\n", records[i].recString, records[i].distance);
|
||||
}
|
||||
free(recordDistances);
|
||||
|
||||
cl_cleanup();
|
||||
|
||||
printf("Passed!\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
|
||||
std::vector<LatLong> &locations, float lat,
|
||||
float lng, int timing) {
|
||||
|
||||
// 1. set up kernel
|
||||
cl_kernel NN_kernel;
|
||||
cl_int status;
|
||||
|
||||
// 1. set up kernel
|
||||
cl_kernel NN_kernel;
|
||||
cl_program cl_NN_program;
|
||||
cl_NN_program = cl_compileProgram((char *)"nearestNeighbor_kernel.cl", NULL);
|
||||
|
||||
NN_kernel = clCreateKernel(cl_NN_program, "NearestNeighbor", &status);
|
||||
status =
|
||||
cl_errChk(status, (char *)"Error Creating Nearest Neighbor kernel", true);
|
||||
if (status)
|
||||
exit(1);
|
||||
cl_errChk(status, (char *)"Error Creating Nearest Neighbor kernel", true);
|
||||
|
||||
// 2. set up memory on device and send ipts data to device
|
||||
// copy ipts(1,2) to device
|
||||
// also need to alloate memory for the distancePoints
|
||||
|
@ -78,9 +80,11 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
|
|||
|
||||
d_locations = clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||
sizeof(LatLong) * numRecords, NULL, &error);
|
||||
cl_errChk(error, "ERROR: clCreateBuffer() failed", true);
|
||||
|
||||
d_distances = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(float) * numRecords, NULL, &error);
|
||||
cl_errChk(error, "ERROR: clCreateBuffer() failed", true);
|
||||
|
||||
cl_command_queue command_queue = cl_getCommandQueue();
|
||||
cl_event writeEvent, kernelEvent, readEvent;
|
||||
|
@ -89,6 +93,7 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
|
|||
0, // offset
|
||||
sizeof(LatLong) * numRecords, &locations[0], 0,
|
||||
NULL, &writeEvent);
|
||||
cl_errChk(error, "ERROR: clEnqueueWriteBuffer() failed", true);
|
||||
|
||||
// 3. send arguments to device
|
||||
cl_int argchk;
|
||||
|
@ -124,8 +129,10 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
|
|||
&readEvent);
|
||||
|
||||
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
|
||||
if (timing) {
|
||||
clFinish(command_queue);
|
||||
|
||||
clFinish(command_queue);
|
||||
|
||||
if (timing) {
|
||||
cl_ulong eventStart, eventEnd, totalTime = 0;
|
||||
printf("# Records\tWrite(s) [size]\t\tKernel(s)\tRead(s) "
|
||||
"[size]\t\tTotal(s)\n");
|
||||
|
@ -166,8 +173,14 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
|
|||
printf("%f\n\n", (float)(totalTime / 1e9));
|
||||
}
|
||||
// 6. return finalized data and release buffers
|
||||
clReleaseMemObject(d_locations);
|
||||
clReleaseMemObject(d_distances);
|
||||
clReleaseEvent(writeEvent);
|
||||
clReleaseEvent(kernelEvent);
|
||||
clReleaseEvent(readEvent);
|
||||
cl_freeMem(d_locations);
|
||||
cl_freeMem(d_distances);
|
||||
cl_freeKernel(NN_kernel);
|
||||
cl_freeProgram(cl_NN_program);
|
||||
|
||||
return distances;
|
||||
}
|
||||
|
||||
|
|
|
@ -157,7 +157,7 @@ int main(int argc, char **argv) {
|
|||
context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err));
|
||||
|
||||
cl_command_queue queue;
|
||||
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &_err));
|
||||
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, NULL, &_err));
|
||||
|
||||
cl_kernel kernel = 0;
|
||||
cl_mem memObjects[2] = {0, 0};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue