From 80c81fc77dba8d06dc5dc243cbe392ea85bfc42f Mon Sep 17 00:00:00 2001 From: shin0403 Date: Wed, 24 Apr 2024 11:11:50 +0900 Subject: [PATCH] change d2d to d2h & h2d --- tests/opencl/cfd/CLHelper.h | 2221 +++++++++++++++++------------------ tests/opencl/cfd/euler3d.cc | 654 ++++++----- 2 files changed, 1437 insertions(+), 1438 deletions(-) diff --git a/tests/opencl/cfd/CLHelper.h b/tests/opencl/cfd/CLHelper.h index 41e2802af..dc382eedb 100644 --- a/tests/opencl/cfd/CLHelper.h +++ b/tests/opencl/cfd/CLHelper.h @@ -6,34 +6,31 @@ #ifndef _CL_HELPER_ #define _CL_HELPER_ -#include -#include -#include -#include -#include -#include "util.h" - -#ifdef TIMING #include "timing.h" -#endif +#include "util.h" +#include +#include +#include +#include +#include -using std::string; -using std::ifstream; using std::cerr; +using std::cout; using std::endl; -//using std::cout; +using std::ifstream; +using std::string; //#define PROFILE_ #ifdef PROFILE_ -double TE; //: total execution time; -double CC; //: Context creation time; -double CR; //: Context release time; -double MA; //: GPU memory allocation time; -double MF; //: GPU memory free time; -double H2D; //: the time to transfer data from host to device; -double D2H; //: the time to transfer data from device to host; +double TE; //: total execution time; +double CC; //: Context creation time; +double CR; //: Context release time; +double MA; //: GPU memory allocation time; +double MF; //: GPU memory free time; +double H2D; //: the time to transfer data from host to device; +double D2H; //: the time to transfer data from device to host; double D2D; //: the time to transfer data from device to device; -double KE; //: the kernel execution time +double KE; //: the kernel execution time double KC; //: the kernel compilation time #endif @@ -47,18 +44,18 @@ struct timeval tv_d2h_start, tv_d2h_end; struct timeval tv_kernel_start, tv_kernel_end; struct timeval tv_mem_alloc_start, tv_mem_alloc_end; struct timeval tv_close_start, tv_close_end; -float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time= 0, +float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, d2h_time = 0, d2d_time = 0, close_time = 0, total_time = 0; #endif //#pragma OPENCL EXTENSION cl_nv_compiler_options:enable -#define WORK_DIM 2 //work-items dimensions +#define WORK_DIM 2 //work-items dimensions /*------------------------------------------------------------ @struct: the structure of device properties @date: 24/03/2011 ------------------------------------------------------------*/ -struct _clDeviceProp{ -/*CL_DEVICE_ADDRESS_BITS +struct _clDeviceProp { + /*CL_DEVICE_ADDRESS_BITS CL_DEVICE_AVAILABLE CL_DEVICE_COMPILER_AVAILABLE CL_DEVICE_ENDIAN_LITTLE @@ -108,53 +105,30 @@ CL_DEVICE_VENDOR_ID CL_DEVICE_VENDOR CL_DEVICE_VERSION CL_DRIVER_VERSION*/ -char device_name[100]; + char device_name[100]; }; -struct oclHandleStruct{ - cl_context context; - cl_device_id *devices; - cl_command_queue queue; - cl_program program; - cl_int cl_status; - std::string error_str; - std::vector kernel; - cl_mem pinned_mem_out; - cl_mem pinned_mem_in; +struct oclHandleStruct { + cl_context context; + cl_device_id* devices; + cl_command_queue queue; + cl_program program; + cl_int cl_status; + std::string error_str; + std::vector kernel; + cl_mem pinned_mem_out; + cl_mem pinned_mem_in; }; struct oclHandleStruct oclHandles; -char kernel_file[100] = "Kernels.cl"; +char kernel_file[100] = "Kernels.cl"; int total_kernels = 5; //string kernel_names[9] = {"memset_kernel", "initialize_variables", "compute_step_factor", "compute_flux", "time_step", "compute_speed_sqd", "compute_velocity", "compute_pressure", "compute_speed_of_sound"}; -string kernel_names[5] = {"memset_kernel", "initialize_variables", "compute_step_factor", "compute_flux", "time_step"}; +string kernel_names[5] = { "memset_kernel", "initialize_variables", "compute_step_factor", "compute_flux", "time_step" }; int work_group_size = BLOCK_SIZE_0; int device_id_inused = 0; //deviced id used (default : 0) int number_devices = 0; -int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} - - - /*------------------------------------------------------------ @function: select device to use @params: @@ -162,45 +136,45 @@ int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { @return: NULL @date: 24/03/2011 ------------------------------------------------------------*/ -void _clSetDevice(int idx) throw(string){ +void _clSetDevice(int idx) throw(string) +{ - cl_int resultCL; - oclHandles.context = NULL; - oclHandles.devices = NULL; - oclHandles.queue = NULL; - oclHandles.program = NULL; - cl_uint deviceListSize; - cl_uint numPlatforms; - cl_platform_id targetPlatform = NULL; + cl_int resultCL; + oclHandles.context = NULL; + oclHandles.devices = NULL; + oclHandles.queue = NULL; + oclHandles.program = NULL; + cl_uint deviceListSize; + cl_uint numPlatforms; + cl_platform_id targetPlatform = NULL; - resultCL = clGetPlatformIDs(0, NULL, &numPlatforms); - if (resultCL != CL_SUCCESS) - throw (string("InitCL()::Error: Getting number of platforms (clGetPlatformIDs)")); + resultCL = clGetPlatformIDs(0, NULL, &numPlatforms); + if (resultCL != CL_SUCCESS) + throw(string("InitCL()::Error: Getting number of platforms (clGetPlatformIDs)")); - if (!(numPlatforms > 0)) - throw (string("InitCL()::Error: No platforms found (clGetPlatformIDs)")); + if (!(numPlatforms > 0)) + throw(string("InitCL()::Error: No platforms found (clGetPlatformIDs)")); - cl_platform_id* allPlatforms = (cl_platform_id*) malloc(numPlatforms * sizeof(cl_platform_id)); + cl_platform_id* allPlatforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id)); - resultCL = clGetPlatformIDs(numPlatforms, allPlatforms, NULL); - if (resultCL != CL_SUCCESS) - throw (string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)")); + resultCL = clGetPlatformIDs(numPlatforms, allPlatforms, NULL); + if (resultCL != CL_SUCCESS) + throw(string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)")); - /* Select the target platform. Default: first platform */ - targetPlatform = allPlatforms[0]; - free(allPlatforms); + /* Select the target platform. Default: first platform */ + targetPlatform = allPlatforms[0]; + free(allPlatforms); - oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize); - if(oclHandles.cl_status!=CL_SUCCESS){ - throw(string("exception in _clInit -> clGetDeviceIDs")); - } - if (deviceListSize == 0) - throw(string("InitCL()::Error: No devices found.")); - - if(idx>(deviceListSize-1)) - throw(string(":invalid device ID:")); - device_id_inused = idx; - + oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize); + if (oclHandles.cl_status != CL_SUCCESS) { + throw(string("exception in _clInit -> clGetDeviceIDs")); + } + if (deviceListSize == 0) + throw(string("InitCL()::Error: No devices found.")); + + if (idx > (deviceListSize - 1)) + throw(string(":invalid device ID:")); + device_id_inused = idx; } /*------------------------------------------------------------ @@ -211,71 +185,94 @@ void _clSetDevice(int idx) throw(string){ @return: prop @date: 24/03/2011 ------------------------------------------------------------*/ -void _clGetDeviceProperties(int idx, _clDeviceProp *prop) throw(string){ - - oclHandles.cl_status= clGetDeviceInfo(oclHandles.devices[idx], CL_DEVICE_NAME, 100, prop->device_name, NULL); - +void _clGetDeviceProperties(int idx, _clDeviceProp* prop) throw(string) +{ + + oclHandles.cl_status = clGetDeviceInfo(oclHandles.devices[idx], CL_DEVICE_NAME, 100, prop->device_name, NULL); + #ifdef ERRMSG - if(oclHandles.cl_status != CL_SUCCESS){ - oclHandles.error_str = "exception in _clGetDeviceProperties-> "; - switch(oclHandles.cl_status){ - case CL_INVALID_DEVICE: - oclHandles.error_str += "CL_INVALID_DEVICE"; - break; - case CL_INVALID_VALUE: - oclHandles.error_str += "CL_INVALID_VALUE"; - break; - default: - oclHandles.error_str += "unknown reasons"; - break; - } - throw(oclHandles.error_str); - } + if (oclHandles.cl_status != CL_SUCCESS) { + oclHandles.error_str = "exception in _clGetDeviceProperties-> "; + switch (oclHandles.cl_status) { + case CL_INVALID_DEVICE: + oclHandles.error_str += "CL_INVALID_DEVICE"; + break; + case CL_INVALID_VALUE: + oclHandles.error_str += "CL_INVALID_VALUE"; + break; + default: + oclHandles.error_str += "unknown reasons"; + break; + } + throw(oclHandles.error_str); + } #endif } +int read_kernel_file(const char* filename, uint8_t** data, size_t* size) +{ + if (nullptr == filename || nullptr == data || 0 == size) + return -1; + + FILE* fp = fopen(filename, "r"); + if (NULL == fp) { + fprintf(stderr, "Failed to load kernel."); + return -1; + } + fseek(fp, 0, SEEK_END); + long fsize = ftell(fp); + rewind(fp); + + *data = (uint8_t*)malloc(fsize); + *size = fread(*data, 1, fsize, fp); + + fclose(fp); + + return 0; +} + /* * Converts the contents of a file into a string */ -string FileToString(const string fileName){ - ifstream f(fileName.c_str(), ifstream::in | ifstream::binary); +string FileToString(const string fileName) +{ + ifstream f(fileName.c_str(), ifstream::in | ifstream::binary); - try{ - size_t size; - char* str; - string s; + try { + size_t size; + char* str; + string s; - if(f.is_open()){ - size_t fileSize; - f.seekg(0, ifstream::end); - size = fileSize = f.tellg(); - f.seekg(0, ifstream::beg); + if (f.is_open()) { + size_t fileSize; + f.seekg(0, ifstream::end); + size = fileSize = f.tellg(); + f.seekg(0, ifstream::beg); - str = new char[size+1]; - if (!str) throw(string("Could not allocate memory")); + str = new char[size + 1]; + if (!str) + throw(string("Could not allocate memory")); - f.read(str, fileSize); - f.close(); - str[size] = '\0'; - - s = str; - delete [] str; - return s; - } + f.read(str, fileSize); + f.close(); + str[size] = '\0'; + + s = str; + delete[] str; + return s; } - catch(std::string msg){ - cerr << "Exception caught in FileToString(): " << msg << endl; - if(f.is_open()) - f.close(); - } - catch(...){ - cerr << "Exception caught in FileToString()" << endl; - if(f.is_open()) - f.close(); - } - string errorMsg = "FileToString()::Error: Unable to open file " - + fileName; - throw(errorMsg); + } catch (std::string msg) { + cerr << "Exception caught in FileToString(): " << msg << endl; + if (f.is_open()) + f.close(); + } catch (...) { + cerr << "Exception caught in FileToString()" << endl; + if (f.is_open()) + f.close(); + } + string errorMsg = "FileToString()::Error: Unable to open file " + + fileName; + throw(errorMsg); } /*------------------------------------------------------------ @@ -287,40 +284,37 @@ string FileToString(const string fileName){ char device_type[3]; int device_id = 0; int platform_id = 0; -void _clCmdParams(int argc, char* argv[]){ - for (int i = 0; i < argc; ++i){ - switch (argv[i][1]){ - case 't': //--t stands for device type - if (++i < argc){ - sscanf(argv[i], "%s", device_type); - } - else{ - std::cerr << "Could not read argument after option " << argv[i-1] << std::endl; - throw; - } - break; - case 'd': //--d stands for device id - if (++i < argc){ - sscanf(argv[i], "%d", &device_id); - } - else{ - std::cerr << "Could not read argument after option " << argv[i-1] << std::endl; - throw; - } - break; - case 'p': //--p stands for platform id - if (++i < argc){ - sscanf(argv[i], "%d", &platform_id); - } - else{ - std::cerr << "Could not read argument after option " << argv[i-1] << std::endl; - throw; - } - break; - default: - ; - } - } +void _clCmdParams(int argc, char* argv[]) +{ + for (int i = 0; i < argc; ++i) { + switch (argv[i][1]) { + case 't': //--t stands for device type + if (++i < argc) { + sscanf(argv[i], "%s", device_type); + } else { + std::cerr << "Could not read argument after option " << argv[i - 1] << std::endl; + throw; + } + break; + case 'd': //--d stands for device id + if (++i < argc) { + sscanf(argv[i], "%d", &device_id); + } else { + std::cerr << "Could not read argument after option " << argv[i - 1] << std::endl; + throw; + } + break; + case 'p': //--p stands for platform id + if (++i < argc) { + sscanf(argv[i], "%d", &platform_id); + } else { + std::cerr << "Could not read argument after option " << argv[i - 1] << std::endl; + throw; + } + break; + default:; + } + } } /*------------------------------------------------------------ @@ -340,213 +334,203 @@ void _clCmdParams(int argc, char* argv[]){ get the number of devices and devices have no relationship with context @date: 24/03/2011 ------------------------------------------------------------*/ -void _clInit(string device_type, int device_id)throw(string){ +void _clInit(string device_type, int device_id) throw(string) +{ #ifdef TIMING - gettimeofday(&tv_total_start, NULL); + gettimeofday(&tv_total_start, NULL); #endif #ifdef PROFILE_ - TE = 0; - CC = 0; - CR = 0; - MA = 0; - MF = 0; - H2D = 0; - D2H = 0; - D2D = 0; - KE = 0; - KC = 0; + TE = 0; + CC = 0; + CR = 0; + MA = 0; + MF = 0; + H2D = 0; + D2H = 0; + D2D = 0; + KE = 0; + KC = 0; #endif - int DEVICE_ID_INUSED = 0; - _clDeviceProp prop; + int DEVICE_ID_INUSED = 0; + _clDeviceProp prop; #ifdef PROFILE_ - double t1 = gettime(); + double t1 = gettime(); #endif - cl_int resultCL; - oclHandles.context = NULL; - oclHandles.devices = NULL; - oclHandles.queue = NULL; - oclHandles.program = NULL; + cl_int resultCL; + oclHandles.context = NULL; + oclHandles.devices = NULL; + oclHandles.queue = NULL; + oclHandles.program = NULL; - cl_uint deviceListSize; - //----------------------------------------------- - //--cambine-1: find the available platforms and select one + cl_uint deviceListSize; + //----------------------------------------------- + //--cambine-1: find the available platforms and select one - cl_uint numPlatforms; - cl_platform_id targetPlatform = NULL; + cl_uint numPlatforms; + cl_platform_id targetPlatform = NULL; - resultCL = clGetPlatformIDs(0, NULL, &numPlatforms); - if (resultCL != CL_SUCCESS) - throw (string("InitCL()::Error: Getting number of platforms (clGetPlatformIDs)")); + resultCL = clGetPlatformIDs(0, NULL, &numPlatforms); + if (resultCL != CL_SUCCESS) + throw(string("InitCL()::Error: Getting number of platforms (clGetPlatformIDs)")); //printf("number of platforms:%d\n",numPlatforms); //by cambine - printf("--cambine: number of platforms: %d\n",numPlatforms); +#ifdef DEV_INFO + //std::cout << "--cambine: number of platforms: " << numPlatforms << std::endl; +#endif - if (!(numPlatforms > 0)) - throw (string("InitCL()::Error: No platforms found (clGetPlatformIDs)")); + if (!(numPlatforms > 0)) + throw(string("InitCL()::Error: No platforms found (clGetPlatformIDs)")); - cl_platform_id* allPlatforms = (cl_platform_id*) malloc(numPlatforms * sizeof(cl_platform_id)); + cl_platform_id* allPlatforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id)); - resultCL = clGetPlatformIDs(numPlatforms, allPlatforms, NULL); + resultCL = clGetPlatformIDs(numPlatforms, allPlatforms, NULL); + if (resultCL != CL_SUCCESS) + throw(string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)")); + + // Select the target platform. Default: first platform + targetPlatform = allPlatforms[platform_id]; + for (int i = 0; i < numPlatforms; i++) { + char pbuff[128]; + resultCL = clGetPlatformInfo(allPlatforms[i], + CL_PLATFORM_VENDOR, + sizeof(pbuff), + pbuff, + NULL); if (resultCL != CL_SUCCESS) - throw (string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)")); + throw(string("InitCL()::Error: Getting platform info (clGetPlatformInfo)")); - // Select the target platform. Default: first platform - targetPlatform = allPlatforms[platform_id]; - for (int i = 0; i < numPlatforms; i++) - { - char pbuff[128]; - resultCL = clGetPlatformInfo( allPlatforms[i], - CL_PLATFORM_VENDOR, - sizeof(pbuff), - pbuff, - NULL); - if (resultCL != CL_SUCCESS) - throw (string("InitCL()::Error: Getting platform info (clGetPlatformInfo)")); + //printf("vedor is %s\n",pbuff); +#ifdef DEV_INFO + //std::cout << "--cambine: vedor is: " << pbuff << std::endl; +#endif + } + free(allPlatforms); + //----------------------------------------------- + //--cambine-2: detect OpenCL devices + // First, get the size of device list + if (device_type.compare("") != 0) { + if (device_type.compare("cpu") == 0) { + oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceListSize); + if (oclHandles.cl_status != CL_SUCCESS) { + throw(string("exception in _clInit -> clGetDeviceIDs -> CPU")); + } + } + if (device_type.compare("gpu") == 0) { + oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &deviceListSize); + if (oclHandles.cl_status != CL_SUCCESS) { + throw(string("exception in _clInit -> clGetDeviceIDs -> GPU")); + } + } + if (device_type.compare("acc") == 0) { + oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &deviceListSize); + if (oclHandles.cl_status != CL_SUCCESS) { + throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR")); + } + } + } else { + oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize); + if (oclHandles.cl_status != CL_SUCCESS) { + throw(string("exception in _clInit -> clGetDeviceIDs -> ALL")); + } + } - //printf("vedor is %s\n",pbuff); -#ifdef DEV_INFO - //std::cout<<"--cambine: vedor is: "< clGetDeviceIDs -> CPU ->2")); + } + } + if (device_type.compare("gpu") == 0) { + oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, deviceListSize, oclHandles.devices, NULL); + if (oclHandles.cl_status != CL_SUCCESS) { + throw(string("exception in _clInit -> clGetDeviceIDs -> GPU -> 2")); + } + } + if (device_type.compare("acc") == 0) { + oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, deviceListSize, oclHandles.devices, NULL); + if (oclHandles.cl_status != CL_SUCCESS) { + throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR -> 2")); + } + } + } else { + oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, deviceListSize, oclHandles.devices, NULL); + if (oclHandles.cl_status != CL_SUCCESS) { + throw(string("exception in _clInit -> clGetDeviceIDs -> ALL -> 2")); + } + } + if (device_id != 0) { + if (device_id > (deviceListSize - 1)) + throw(string("Invalidate device id")); + DEVICE_ID_INUSED = device_id; + } + + _clGetDeviceProperties(DEVICE_ID_INUSED, &prop); + //std::cout << "--cambine: device name=" << prop.device_name << std::endl; + +#ifdef DEV_INFO + //std::cout << "--cambine: return device list successfully!" << std::endl; #endif - } - free(allPlatforms); - //----------------------------------------------- - //--cambine-2: detect OpenCL devices - // First, get the size of device list - if(device_type.compare("")!=0){ - if(device_type.compare("cpu")==0){ - oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceListSize); - if(oclHandles.cl_status!=CL_SUCCESS){ - throw(string("exception in _clInit -> clGetDeviceIDs -> CPU")); - } - } - if(device_type.compare("gpu")==0){ - oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &deviceListSize); - if(oclHandles.cl_status!=CL_SUCCESS){ - throw(string("exception in _clInit -> clGetDeviceIDs -> GPU")); - } - } - if(device_type.compare("acc")==0){ - oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &deviceListSize); - if(oclHandles.cl_status!=CL_SUCCESS){ - throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR")); - } - } - } - else{ - oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize); - if(oclHandles.cl_status!=CL_SUCCESS){ - throw(string("exception in _clInit -> clGetDeviceIDs -> ALL")); - } - } - - if (deviceListSize == 0) - throw(string("InitCL()::Error: No devices found.")); - -//#ifdef DEV_INFO - printf("--cambine: number of device\n"); //< clGetDeviceIDs -> CPU ->2")); - } - } - if(device_type.compare("gpu")==0){ - oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, deviceListSize, oclHandles.devices, NULL); - if(oclHandles.cl_status!=CL_SUCCESS){ - throw(string("exception in _clInit -> clGetDeviceIDs -> GPU -> 2")); - } - } - if(device_type.compare("acc")==0){ - oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, deviceListSize, oclHandles.devices, NULL); - if(oclHandles.cl_status!=CL_SUCCESS){ - throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR -> 2")); - } - } - } - else{ - oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, deviceListSize, oclHandles.devices, NULL); - if(oclHandles.cl_status!=CL_SUCCESS){ - throw(string("exception in _clInit -> clGetDeviceIDs -> ALL -> 2")); - } - } - if(device_id!=0){ - if(device_id>(deviceListSize-1)) - throw(string("Invalidate device id")); - DEVICE_ID_INUSED = device_id; - } - - _clGetDeviceProperties(DEVICE_ID_INUSED, &prop); - //std::cout<<"--cambine: device name="< clGetProgramInfo-2")); - } + //get program information in intermediate representation +#ifdef PTX_MSG + size_t binary_sizes[deviceListSize]; + char* binaries[deviceListSize]; + //figure out number of devices and the sizes of the binary for each device. + oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * deviceListSize, &binary_sizes, NULL); + if (oclHandles.cl_status != CL_SUCCESS) { + throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-2")); + } - //std::cout<<"--cambine:"< clGetProgramInfo-3")); - } - for(int i=0;i clGetProgramInfo-3")); + } + for (int i = 0; i < deviceListSize; i++) + binaries[i][binary_sizes[i]] = '\0'; + //std::cout << "--cambine:writing ptd information..." << std::endl; + FILE* ptx_file = fopen("cl.ptx", "w"); + if (ptx_file == NULL) { + throw(string("exceptions in allocate ptx file.")); + } + fprintf(ptx_file, "%s", binaries[DEVICE_ID_INUSED]); + fclose(ptx_file); + //std::cout << "--cambine:writing ptd information done." << std::endl; + for (int i = 0; i < deviceListSize; i++) + free(binaries[i]); #endif - for (int nKernel = 0; nKernel < total_kernels; nKernel++) - { - // get a kernel object handle for a kernel with the given name - cl_kernel kernel = clCreateKernel(oclHandles.program, - (kernel_names[nKernel]).c_str(), - &resultCL); + for (int nKernel = 0; nKernel < total_kernels; nKernel++) { + // get a kernel object handle for a kernel with the given name + cl_kernel kernel = clCreateKernel(oclHandles.program, + (kernel_names[nKernel]).c_str(), + &resultCL); - if ((resultCL != CL_SUCCESS) || (kernel == NULL)) - { - string errorMsg = "InitCL()::Error: Creating Kernel (clCreateKernel) \"" + kernel_names[nKernel] + "\""; - throw(errorMsg); - } - - oclHandles.kernel.push_back(kernel); + if ((resultCL != CL_SUCCESS) || (kernel == NULL)) { + string errorMsg = "InitCL()::Error: Creating Kernel (clCreateKernel) \"" + kernel_names[nKernel] + "\""; + throw(errorMsg); } + + oclHandles.kernel.push_back(kernel); + } //get resource alocation information #ifdef RES_MSG - char * build_log; - size_t ret_val_size; - oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); - if(oclHandles.cl_status!=CL_SUCCESS){ - throw(string("exceptions in _InitCL -> getting resource information")); - } + char* build_log; + size_t ret_val_size; + oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); + if (oclHandles.cl_status != CL_SUCCESS) { + throw(string("exceptions in _InitCL -> getting resource information")); + } - build_log = (char *)malloc(ret_val_size+1); - oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); - if(oclHandles.cl_status!=CL_SUCCESS){ - throw(string("exceptions in _InitCL -> getting resources allocation information-2")); - } - build_log[ret_val_size] = '\0'; - //std::cout<<"--cambine:"< getting resources allocation information-2")); + } + build_log[ret_val_size] = '\0'; + //std::cout << "--cambine:" << build_log << std::endl; + free(build_log); #endif #ifdef PROFILE_ - double t4 = gettime(); - CC += t4 - t3; + double t4 = gettime(); + CC += t4 - t3; #endif -#ifdef TIMING - gettimeofday(&tv_init_end, NULL); - tvsub(&tv_init_end, &tv_total_start, &tv); - init_time = tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0; +#ifdef TIMING + gettimeofday(&tv_init_end, NULL); + tvsub(&tv_init_end, &tv_total_start, &tv); + init_time = tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; #endif } @@ -698,65 +677,65 @@ void _clInit(string device_type, int device_id)throw(string){ void _clRelease() { #ifdef TIMING - gettimeofday(&tv_close_start, NULL); + gettimeofday(&tv_close_start, NULL); #endif #ifdef PROFILE_ - double t1 = gettime(); + double t1 = gettime(); #endif - bool errorFlag = false; + bool errorFlag = false; - for (int nKernel = 0; nKernel < oclHandles.kernel.size(); nKernel++){ - if (oclHandles.kernel[nKernel] != NULL){ - cl_int resultCL = clReleaseKernel(oclHandles.kernel[nKernel]); - if (resultCL != CL_SUCCESS){ - cerr << "ReleaseCL()::Error: In clReleaseKernel" << endl; - errorFlag = true; - } - oclHandles.kernel[nKernel] = NULL; - } - oclHandles.kernel.clear(); + for (int nKernel = 0; nKernel < oclHandles.kernel.size(); nKernel++) { + if (oclHandles.kernel[nKernel] != NULL) { + cl_int resultCL = clReleaseKernel(oclHandles.kernel[nKernel]); + if (resultCL != CL_SUCCESS) { + cerr << "ReleaseCL()::Error: In clReleaseKernel" << endl; + errorFlag = true; + } + oclHandles.kernel[nKernel] = NULL; } + oclHandles.kernel.clear(); + } - if (oclHandles.program != NULL){ - cl_int resultCL = clReleaseProgram(oclHandles.program); - if (resultCL != CL_SUCCESS){ - cerr << "ReleaseCL()::Error: In clReleaseProgram" << endl; - errorFlag = true; - } - oclHandles.program = NULL; + if (oclHandles.program != NULL) { + cl_int resultCL = clReleaseProgram(oclHandles.program); + if (resultCL != CL_SUCCESS) { + cerr << "ReleaseCL()::Error: In clReleaseProgram" << endl; + errorFlag = true; } + oclHandles.program = NULL; + } - if (oclHandles.queue != NULL){ - cl_int resultCL = clReleaseCommandQueue(oclHandles.queue); - if (resultCL != CL_SUCCESS) - { - cerr << "ReleaseCL()::Error: In clReleaseCommandQueue" << endl; - errorFlag = true; - } - oclHandles.queue = NULL; + if (oclHandles.queue != NULL) { + cl_int resultCL = clReleaseCommandQueue(oclHandles.queue); + if (resultCL != CL_SUCCESS) { + cerr << "ReleaseCL()::Error: In clReleaseCommandQueue" << endl; + errorFlag = true; } + oclHandles.queue = NULL; + } - free(oclHandles.devices); + free(oclHandles.devices); - if (oclHandles.context != NULL){ - cl_int resultCL = clReleaseContext(oclHandles.context); - if (resultCL != CL_SUCCESS){ - cerr << "ReleaseCL()::Error: In clReleaseContext" << endl; - errorFlag = true; - } - oclHandles.context = NULL; + if (oclHandles.context != NULL) { + cl_int resultCL = clReleaseContext(oclHandles.context); + if (resultCL != CL_SUCCESS) { + cerr << "ReleaseCL()::Error: In clReleaseContext" << endl; + errorFlag = true; } + oclHandles.context = NULL; + } - if (errorFlag) throw(string("ReleaseCL()::Error encountered.")); + if (errorFlag) + throw(string("ReleaseCL()::Error encountered.")); #ifdef PROFILE_ - double t2 = gettime(); - CR += t2 - t1; + double t2 = gettime(); + CR += t2 - t1; #endif #ifdef TIMING - gettimeofday(&tv_close_end, NULL); - tvsub(&tv_close_end, &tv_close_start, &tv); - close_time += tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0; + gettimeofday(&tv_close_end, NULL); + tvsub(&tv_close_end, &tv_close_start, &tv); + close_time += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; #endif } @@ -767,57 +746,58 @@ void _clRelease() @return: mem_d @date: 24/03/2011 ------------------------------------------------------------*/ -cl_mem _clMalloc(int size) throw(string){ -#ifdef TIMING - gettimeofday(&tv_mem_alloc_start, NULL); +cl_mem _clMalloc(int size) throw(string) +{ +#ifdef TIMING + gettimeofday(&tv_mem_alloc_start, NULL); #endif #ifdef PROFILE_ - double t1 = gettime(); + double t1 = gettime(); #endif - cl_mem d_mem; - d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE, size, NULL, &oclHandles.cl_status); + cl_mem d_mem; + d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE, size, NULL, &oclHandles.cl_status); #ifdef ERRMSG - if(oclHandles.cl_status != CL_SUCCESS){ - oclHandles.error_str = "excpetion in _clMalloc -> "; - switch(oclHandles.cl_status){ - case CL_INVALID_CONTEXT: - oclHandles.error_str += "CL_INVALID_CONTEXT"; - break; - case CL_INVALID_VALUE: - oclHandles.error_str += "CL_INVALID_VALUE"; - break; - case CL_INVALID_BUFFER_SIZE: - oclHandles.error_str += "CL_INVALID_BUFFER_SIZE"; - break; - case CL_INVALID_HOST_PTR: - oclHandles.error_str += "CL_INVALID_HOST_PTR"; - break; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - default: - oclHandles.error_str += "unknown reasons"; - break; - } - throw(oclHandles.error_str); - } + if (oclHandles.cl_status != CL_SUCCESS) { + oclHandles.error_str = "excpetion in _clMalloc -> "; + switch (oclHandles.cl_status) { + case CL_INVALID_CONTEXT: + oclHandles.error_str += "CL_INVALID_CONTEXT"; + break; + case CL_INVALID_VALUE: + oclHandles.error_str += "CL_INVALID_VALUE"; + break; + case CL_INVALID_BUFFER_SIZE: + oclHandles.error_str += "CL_INVALID_BUFFER_SIZE"; + break; + case CL_INVALID_HOST_PTR: + oclHandles.error_str += "CL_INVALID_HOST_PTR"; + break; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + default: + oclHandles.error_str += "unknown reasons"; + break; + } + throw(oclHandles.error_str); + } #endif #ifdef PROFILE_ - double t2 = gettime(); - MA += t2 - t1; + double t2 = gettime(); + MA += t2 - t1; #endif -#ifdef TIMING - gettimeofday(&tv_mem_alloc_end, NULL); - tvsub(&tv_mem_alloc_end, &tv_mem_alloc_start, &tv); - mem_alloc_time += tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0; +#ifdef TIMING + gettimeofday(&tv_mem_alloc_end, NULL); + tvsub(&tv_mem_alloc_end, &tv_mem_alloc_start, &tv); + mem_alloc_time += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; #endif - return d_mem; + return d_mem; } /*------------------------------------------------------------ @function: malloc pinned memoty @@ -827,77 +807,78 @@ cl_mem _clMalloc(int size) throw(string){ @date: 06/04/2011 ------------------------------------------------------------*/ -void* _clMallocHost(int size)throw(string){ - void * mem_h; - oclHandles.pinned_mem_out = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR, size, NULL, &oclHandles.cl_status); +void* _clMallocHost(int size) throw(string) +{ + void* mem_h; + oclHandles.pinned_mem_out = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size, NULL, &oclHandles.cl_status); #ifdef ERRMSG - if(oclHandles.cl_status != CL_SUCCESS){ - oclHandles.error_str = "excpetion in _clMallocHost -> clCreateBuffer"; - switch(oclHandles.cl_status){ - case CL_INVALID_CONTEXT: - oclHandles.error_str += "CL_INVALID_CONTEXT"; - break; - case CL_INVALID_VALUE: - oclHandles.error_str += "CL_INVALID_VALUE"; - break; - case CL_INVALID_BUFFER_SIZE: - oclHandles.error_str += "CL_INVALID_BUFFER_SIZE"; - break; - case CL_INVALID_HOST_PTR: - oclHandles.error_str += "CL_INVALID_HOST_PTR"; - break; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - default: - oclHandles.error_str += "unknown reasons"; - break; - } - throw(oclHandles.error_str); - } + if (oclHandles.cl_status != CL_SUCCESS) { + oclHandles.error_str = "excpetion in _clMallocHost -> clCreateBuffer"; + switch (oclHandles.cl_status) { + case CL_INVALID_CONTEXT: + oclHandles.error_str += "CL_INVALID_CONTEXT"; + break; + case CL_INVALID_VALUE: + oclHandles.error_str += "CL_INVALID_VALUE"; + break; + case CL_INVALID_BUFFER_SIZE: + oclHandles.error_str += "CL_INVALID_BUFFER_SIZE"; + break; + case CL_INVALID_HOST_PTR: + oclHandles.error_str += "CL_INVALID_HOST_PTR"; + break; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + default: + oclHandles.error_str += "unknown reasons"; + break; + } + throw(oclHandles.error_str); + } #endif - mem_h = clEnqueueMapBuffer(oclHandles.queue, oclHandles.pinned_mem_out, CL_TRUE, CL_MAP_READ, 0, size, 0, NULL, NULL, &oclHandles.cl_status); - + mem_h = clEnqueueMapBuffer(oclHandles.queue, oclHandles.pinned_mem_out, CL_TRUE, CL_MAP_READ, 0, size, 0, NULL, NULL, &oclHandles.cl_status); + #ifdef ERRMSG - if(oclHandles.cl_status != CL_SUCCESS||mem_h==NULL){ - oclHandles.error_str = "excpetion in _clMallocHost -> clEnqueueMapBuffer"; - switch(oclHandles.cl_status){ - case CL_INVALID_COMMAND_QUEUE: - oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; - break; - case CL_INVALID_CONTEXT: - oclHandles.error_str += "CL_INVALID_CONTEXT"; - break; - case CL_INVALID_MEM_OBJECT: - oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; - break; - case CL_INVALID_VALUE: - oclHandles.error_str += "CL_INVALID_VALUE"; - break; - case CL_INVALID_EVENT_WAIT_LIST: - oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST"; - break; - case CL_MAP_FAILURE: - oclHandles.error_str += "CL_MAP_FAILURE"; - break; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - default: - oclHandles.error_str += "unknown reasons"; - break; - } - throw(oclHandles.error_str); - } + if (oclHandles.cl_status != CL_SUCCESS || mem_h == NULL) { + oclHandles.error_str = "excpetion in _clMallocHost -> clEnqueueMapBuffer"; + switch (oclHandles.cl_status) { + case CL_INVALID_COMMAND_QUEUE: + oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; + break; + case CL_INVALID_CONTEXT: + oclHandles.error_str += "CL_INVALID_CONTEXT"; + break; + case CL_INVALID_MEM_OBJECT: + oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; + break; + case CL_INVALID_VALUE: + oclHandles.error_str += "CL_INVALID_VALUE"; + break; + case CL_INVALID_EVENT_WAIT_LIST: + oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST"; + break; + case CL_MAP_FAILURE: + oclHandles.error_str += "CL_MAP_FAILURE"; + break; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + default: + oclHandles.error_str += "unknown reasons"; + break; + } + throw(oclHandles.error_str); + } #endif - return mem_h; + return mem_h; } /*------------------------------------------------------------ @function: free pinned memory @@ -907,77 +888,76 @@ void* _clMallocHost(int size)throw(string){ @return: NULL @date: 06/04/2011 ------------------------------------------------------------*/ -void _clFreeHost(int io, void * mem_h){ - if(io==0){ //in - if(mem_h){ - oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_in, (void*)mem_h, 0, NULL, NULL); +void _clFreeHost(int io, void* mem_h) +{ + if (io == 0) { //in + if (mem_h) { + oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_in, (void*)mem_h, 0, NULL, NULL); #ifdef ERRMSG - if(oclHandles.cl_status != CL_SUCCESS){ - oclHandles.error_str = "excpetion in _clFreeHost -> clEnqueueUnmapMemObject(in)"; - switch(oclHandles.cl_status){ - case CL_INVALID_COMMAND_QUEUE: - oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; - break; - case CL_INVALID_MEM_OBJECT: - oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; - break; - case CL_INVALID_VALUE: - oclHandles.error_str += "CL_INVALID_VALUE"; - break; - case CL_OUT_OF_RESOURCES: - oclHandles.error_str += "CL_OUT_OF_RESOURCES"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - case CL_INVALID_CONTEXT: - oclHandles.error_str += "CL_INVALID_CONTEXT"; - break; - default: - oclHandles.error_str += "unknown reasons"; - break; - } - throw(oclHandles.error_str); - } + if (oclHandles.cl_status != CL_SUCCESS) { + oclHandles.error_str = "excpetion in _clFreeHost -> clEnqueueUnmapMemObject(in)"; + switch (oclHandles.cl_status) { + case CL_INVALID_COMMAND_QUEUE: + oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; + break; + case CL_INVALID_MEM_OBJECT: + oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; + break; + case CL_INVALID_VALUE: + oclHandles.error_str += "CL_INVALID_VALUE"; + break; + case CL_OUT_OF_RESOURCES: + oclHandles.error_str += "CL_OUT_OF_RESOURCES"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + case CL_INVALID_CONTEXT: + oclHandles.error_str += "CL_INVALID_CONTEXT"; + break; + default: + oclHandles.error_str += "unknown reasons"; + break; + } + throw(oclHandles.error_str); + } #endif - } - } - else if(io==1){ //out - if(mem_h){ - oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_out, (void*)mem_h, 0, NULL, NULL); - #ifdef ERRMSG - if(oclHandles.cl_status != CL_SUCCESS){ - oclHandles.error_str = "excpetion in _clFreeHost -> clEnqueueUnmapMemObject(in)"; - switch(oclHandles.cl_status){ - case CL_INVALID_COMMAND_QUEUE: - oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; - break; - case CL_INVALID_MEM_OBJECT: - oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; - break; - case CL_INVALID_VALUE: - oclHandles.error_str += "CL_INVALID_VALUE"; - break; - case CL_OUT_OF_RESOURCES: - oclHandles.error_str += "CL_OUT_OF_RESOURCES"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - case CL_INVALID_CONTEXT: - oclHandles.error_str += "CL_INVALID_CONTEXT"; - break; - default: - oclHandles.error_str += "unknown reasons"; - break; - } - throw(oclHandles.error_str); - } + } + } else if (io == 1) { //out + if (mem_h) { + oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_out, (void*)mem_h, 0, NULL, NULL); +#ifdef ERRMSG + if (oclHandles.cl_status != CL_SUCCESS) { + oclHandles.error_str = "excpetion in _clFreeHost -> clEnqueueUnmapMemObject(in)"; + switch (oclHandles.cl_status) { + case CL_INVALID_COMMAND_QUEUE: + oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; + break; + case CL_INVALID_MEM_OBJECT: + oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; + break; + case CL_INVALID_VALUE: + oclHandles.error_str += "CL_INVALID_VALUE"; + break; + case CL_OUT_OF_RESOURCES: + oclHandles.error_str += "CL_OUT_OF_RESOURCES"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + case CL_INVALID_CONTEXT: + oclHandles.error_str += "CL_INVALID_CONTEXT"; + break; + default: + oclHandles.error_str += "unknown reasons"; + break; + } + throw(oclHandles.error_str); + } #endif - } - } - else - throw(string("encounter invalid choice when freeing pinned memmory")); + } + } else + throw(string("encounter invalid choice when freeing pinned memmory")); } /*------------------------------------------------------------ @function: transfer data from host to device @@ -988,50 +968,51 @@ void _clFreeHost(int io, void * mem_h){ @return: NULL @date: 17/01/2011 ------------------------------------------------------------*/ -void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){ +void _clMemcpyH2D(cl_mem dst, const void* src, int size) throw(string) +{ #ifdef PROFILE_ - double t1 = gettime(); + double t1 = gettime(); #endif - cl_event event; - oclHandles.cl_status = clEnqueueWriteBuffer(oclHandles.queue, dst, CL_TRUE, 0, size, src, 0, NULL, &event); + cl_event event; + oclHandles.cl_status = clEnqueueWriteBuffer(oclHandles.queue, dst, CL_TRUE, 0, size, src, 0, NULL, &event); #ifdef ERRMSG - if(oclHandles.cl_status != CL_SUCCESS){ - oclHandles.error_str = "excpetion in _clMemcpyH2D -> "; - switch(oclHandles.cl_status){ - case CL_INVALID_COMMAND_QUEUE: - oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; - break; - case CL_INVALID_CONTEXT: - oclHandles.error_str += "CL_INVALID_CONTEXT"; - break; - case CL_INVALID_MEM_OBJECT: - oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; - break; - case CL_INVALID_VALUE: - oclHandles.error_str += "CL_INVALID_VALUE"; - break; - case CL_INVALID_EVENT_WAIT_LIST: - oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST"; - break; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - default: - oclHandles.error_str += "Unknown reason"; - break; - } - throw(oclHandles.error_str); - } + if (oclHandles.cl_status != CL_SUCCESS) { + oclHandles.error_str = "excpetion in _clMemcpyH2D -> "; + switch (oclHandles.cl_status) { + case CL_INVALID_COMMAND_QUEUE: + oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; + break; + case CL_INVALID_CONTEXT: + oclHandles.error_str += "CL_INVALID_CONTEXT"; + break; + case CL_INVALID_MEM_OBJECT: + oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; + break; + case CL_INVALID_VALUE: + oclHandles.error_str += "CL_INVALID_VALUE"; + break; + case CL_INVALID_EVENT_WAIT_LIST: + oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST"; + break; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + default: + oclHandles.error_str += "Unknown reason"; + break; + } + throw(oclHandles.error_str); + } #endif #ifdef PROFILE_ - double t2 = gettime(); - H2D += t2 - t1; + double t2 = gettime(); + H2D += t2 - t1; #endif #ifdef TIMING - h2d_time += probe_event_time(event, oclHandles.queue); + h2d_time += probe_event_time(event, oclHandles.queue); #endif } @@ -1044,50 +1025,51 @@ void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){ @return: NULL @date: 17/01/2011 ------------------------------------------------------------*/ -void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){ +void _clMemcpyD2H(void* dst, cl_mem src, int size) throw(string) +{ #ifdef PROFILE_ - double t1 = gettime(); + double t1 = gettime(); #endif - cl_event event; - oclHandles.cl_status = clEnqueueReadBuffer(oclHandles.queue, src, CL_TRUE, 0, size, dst, 0,0,&event); + cl_event event; + oclHandles.cl_status = clEnqueueReadBuffer(oclHandles.queue, src, CL_TRUE, 0, size, dst, 0, 0, &event); #ifdef ERRMSG - if(oclHandles.cl_status != CL_SUCCESS){ - oclHandles.error_str = "excpetion in _clMemCpyD2H -> "; - switch(oclHandles.cl_status){ - case CL_INVALID_COMMAND_QUEUE: - oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; - break; - case CL_INVALID_CONTEXT: - oclHandles.error_str += "CL_INVALID_CONTEXT"; - break; - case CL_INVALID_MEM_OBJECT: - oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; - break; - case CL_INVALID_VALUE: - oclHandles.error_str += "CL_INVALID_VALUE"; - break; - case CL_INVALID_EVENT_WAIT_LIST: - oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST"; - break; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - default: - oclHandles.error_str += "Unknown reason"; - break; - } - throw(oclHandles.error_str); - } + if (oclHandles.cl_status != CL_SUCCESS) { + oclHandles.error_str = "excpetion in _clMemCpyD2H -> "; + switch (oclHandles.cl_status) { + case CL_INVALID_COMMAND_QUEUE: + oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; + break; + case CL_INVALID_CONTEXT: + oclHandles.error_str += "CL_INVALID_CONTEXT"; + break; + case CL_INVALID_MEM_OBJECT: + oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; + break; + case CL_INVALID_VALUE: + oclHandles.error_str += "CL_INVALID_VALUE"; + break; + case CL_INVALID_EVENT_WAIT_LIST: + oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST"; + break; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + default: + oclHandles.error_str += "Unknown reason"; + break; + } + throw(oclHandles.error_str); + } #endif #ifdef PROFILE_ - double t2 = gettime(); - D2H += t2 - t1; + double t2 = gettime(); + D2H += t2 - t1; #endif #ifdef TIMING - d2h_time += probe_event_time(event, oclHandles.queue); + d2h_time += probe_event_time(event, oclHandles.queue); #endif } /*------------------------------------------------------------ @@ -1099,59 +1081,60 @@ void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){ @return: NULL @date: 27/03/2011 ------------------------------------------------------------*/ -void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string){ +void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string) +{ #ifdef PROFILE_ - double t1 = gettime(); + double t1 = gettime(); #endif - cl_event event; - oclHandles.cl_status = clEnqueueCopyBuffer(oclHandles.queue, src, dst, 0, 0, size, 0, NULL, &event); + cl_event event; + oclHandles.cl_status = clEnqueueCopyBuffer(oclHandles.queue, src, dst, 0, 0, size, 0, NULL, &event); #ifdef ERRMSG - if(oclHandles.cl_status != CL_SUCCESS){ - oclHandles.error_str = "excpetion in _clCpyMemD2D -> "; - switch(oclHandles.cl_status){ - case CL_INVALID_COMMAND_QUEUE: - oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; - break; - case CL_INVALID_CONTEXT: - oclHandles.error_str += "CL_INVALID_CONTEXT"; - break; - case CL_INVALID_MEM_OBJECT: - oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; - break; - case CL_INVALID_VALUE: - oclHandles.error_str += "CL_INVALID_VALUE"; - break; - case CL_INVALID_EVENT_WAIT_LIST: - oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST"; - break; - case CL_MISALIGNED_SUB_BUFFER_OFFSET: - oclHandles.error_str += "CL_MISALIGNED_SUB_BUFFER_OFFSET"; - break; - case CL_MEM_COPY_OVERLAP: - oclHandles.error_str += "CL_MEM_COPY_OVERLAP"; - break; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; - break; - case CL_OUT_OF_RESOURCES: - oclHandles.error_str += "CL_OUT_OF_RESOURCES"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - default: - oclHandles.error_str += "Unknown reason"; - break; - } - throw(oclHandles.error_str); - } + if (oclHandles.cl_status != CL_SUCCESS) { + oclHandles.error_str = "excpetion in _clCpyMemD2D -> "; + switch (oclHandles.cl_status) { + case CL_INVALID_COMMAND_QUEUE: + oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; + break; + case CL_INVALID_CONTEXT: + oclHandles.error_str += "CL_INVALID_CONTEXT"; + break; + case CL_INVALID_MEM_OBJECT: + oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; + break; + case CL_INVALID_VALUE: + oclHandles.error_str += "CL_INVALID_VALUE"; + break; + case CL_INVALID_EVENT_WAIT_LIST: + oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST"; + break; + case CL_MISALIGNED_SUB_BUFFER_OFFSET: + oclHandles.error_str += "CL_MISALIGNED_SUB_BUFFER_OFFSET"; + break; + case CL_MEM_COPY_OVERLAP: + oclHandles.error_str += "CL_MEM_COPY_OVERLAP"; + break; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + break; + case CL_OUT_OF_RESOURCES: + oclHandles.error_str += "CL_OUT_OF_RESOURCES"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + default: + oclHandles.error_str += "Unknown reason"; + break; + } + throw(oclHandles.error_str); + } #endif #ifdef PROFILE_ - double t2 = gettime(); - D2D += t2 - t1; + double t2 = gettime(); + D2D += t2 - t1; #endif #ifdef TIMING - d2d_time += probe_event_time(event, oclHandles.queue); + d2d_time += probe_event_time(event, oclHandles.queue); #endif } @@ -1165,103 +1148,104 @@ void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string){ @return: NULL @date: 03/04/2011 ------------------------------------------------------------*/ -void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(string){ - if(!size){ - oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, sizeof(d_mem), &d_mem); - #ifdef ERRMSG - oclHandles.error_str = "excpetion in _clSetKernelArg()-1 "; - switch(oclHandles.cl_status){ - case CL_INVALID_KERNEL: - oclHandles.error_str += "CL_INVALID_KERNEL"; - break; - case CL_INVALID_ARG_INDEX: - oclHandles.error_str += "CL_INVALID_ARG_INDEX"; - break; - case CL_INVALID_ARG_VALUE: - oclHandles.error_str += "CL_INVALID_ARG_VALUE"; - break; - case CL_INVALID_MEM_OBJECT: - oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; - break; - case CL_INVALID_SAMPLER: - oclHandles.error_str += "CL_INVALID_SAMPLER"; - break; - case CL_INVALID_ARG_SIZE: - oclHandles.error_str += "CL_INVALID_ARG_SIZE"; - break; - case CL_OUT_OF_RESOURCES: - oclHandles.error_str += "CL_OUT_OF_RESOURCES"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - default: - oclHandles.error_str += "Unknown reason"; - break; - } - if(oclHandles.cl_status != CL_SUCCESS) - throw(oclHandles.error_str); - #endif - } - else{ - oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, size, d_mem); - #ifdef ERRMSG - oclHandles.error_str = "excpetion in _clSetKernelArg()-2 "; - switch(oclHandles.cl_status){ - case CL_INVALID_KERNEL: - oclHandles.error_str += "CL_INVALID_KERNEL"; - break; - case CL_INVALID_ARG_INDEX: - oclHandles.error_str += "CL_INVALID_ARG_INDEX"; - break; - case CL_INVALID_ARG_VALUE: - oclHandles.error_str += "CL_INVALID_ARG_VALUE"; - break; - case CL_INVALID_MEM_OBJECT: - oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; - break; - case CL_INVALID_SAMPLER: - oclHandles.error_str += "CL_INVALID_SAMPLER"; - break; - case CL_INVALID_ARG_SIZE: - oclHandles.error_str += "CL_INVALID_ARG_SIZE"; - break; - case CL_OUT_OF_RESOURCES: - oclHandles.error_str += "CL_OUT_OF_RESOURCES"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - default: - oclHandles.error_str += "Unknown reason"; - break; - } - if(oclHandles.cl_status != CL_SUCCESS) - throw(oclHandles.error_str); - #endif - } -} -void _clFinish() throw(string){ - oclHandles.cl_status = clFinish(oclHandles.queue); +void _clSetArgs(int kernel_id, int arg_idx, void* d_mem, int size = 0) throw(string) +{ + if (!size) { + oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, sizeof(d_mem), &d_mem); #ifdef ERRMSG - if(oclHandles.cl_status!=CL_SUCCESS){ - oclHandles.error_str = "excpetion in _clFinish"; - switch(oclHandles.cl_status){ - case CL_INVALID_COMMAND_QUEUE: - oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; - break; - case CL_OUT_OF_RESOURCES: - oclHandles.error_str += "CL_OUT_OF_RESOURCES"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - default: - oclHandles.error_str += "Unknown reasons"; - break; - } - throw(oclHandles.error_str); - } + oclHandles.error_str = "excpetion in _clSetKernelArg()-1 "; + switch (oclHandles.cl_status) { + case CL_INVALID_KERNEL: + oclHandles.error_str += "CL_INVALID_KERNEL"; + break; + case CL_INVALID_ARG_INDEX: + oclHandles.error_str += "CL_INVALID_ARG_INDEX"; + break; + case CL_INVALID_ARG_VALUE: + oclHandles.error_str += "CL_INVALID_ARG_VALUE"; + break; + case CL_INVALID_MEM_OBJECT: + oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; + break; + case CL_INVALID_SAMPLER: + oclHandles.error_str += "CL_INVALID_SAMPLER"; + break; + case CL_INVALID_ARG_SIZE: + oclHandles.error_str += "CL_INVALID_ARG_SIZE"; + break; + case CL_OUT_OF_RESOURCES: + oclHandles.error_str += "CL_OUT_OF_RESOURCES"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + default: + oclHandles.error_str += "Unknown reason"; + break; + } + if (oclHandles.cl_status != CL_SUCCESS) + throw(oclHandles.error_str); +#endif + } else { + oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, size, d_mem); +#ifdef ERRMSG + oclHandles.error_str = "excpetion in _clSetKernelArg()-2 "; + switch (oclHandles.cl_status) { + case CL_INVALID_KERNEL: + oclHandles.error_str += "CL_INVALID_KERNEL"; + break; + case CL_INVALID_ARG_INDEX: + oclHandles.error_str += "CL_INVALID_ARG_INDEX"; + break; + case CL_INVALID_ARG_VALUE: + oclHandles.error_str += "CL_INVALID_ARG_VALUE"; + break; + case CL_INVALID_MEM_OBJECT: + oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; + break; + case CL_INVALID_SAMPLER: + oclHandles.error_str += "CL_INVALID_SAMPLER"; + break; + case CL_INVALID_ARG_SIZE: + oclHandles.error_str += "CL_INVALID_ARG_SIZE"; + break; + case CL_OUT_OF_RESOURCES: + oclHandles.error_str += "CL_OUT_OF_RESOURCES"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + default: + oclHandles.error_str += "Unknown reason"; + break; + } + if (oclHandles.cl_status != CL_SUCCESS) + throw(oclHandles.error_str); +#endif + } +} +void _clFinish() throw(string) +{ + oclHandles.cl_status = clFinish(oclHandles.queue); +#ifdef ERRMSG + if (oclHandles.cl_status != CL_SUCCESS) { + oclHandles.error_str = "excpetion in _clFinish"; + switch (oclHandles.cl_status) { + case CL_INVALID_COMMAND_QUEUE: + oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; + break; + case CL_OUT_OF_RESOURCES: + oclHandles.error_str += "CL_OUT_OF_RESOURCES"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + default: + oclHandles.error_str += "Unknown reasons"; + break; + } + throw(oclHandles.error_str); + } #endif } /*------------------------------------------------------------ @@ -1273,109 +1257,110 @@ void _clFinish() throw(string){ @return: NULL @date: 03/04/2011 ------------------------------------------------------------*/ -void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(string){ +void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(string) +{ #ifdef PROFILE_ - double t1 = gettime(); + double t1 = gettime(); #endif - cl_uint work_dim = WORK_DIM; - cl_event e[1]; - if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size - work_items = work_items + (work_group_size-(work_items%work_group_size)); - size_t local_work_size[] = {work_group_size, 1}; - size_t global_work_size[] = {work_items, 1}; - oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \ - global_work_size, local_work_size, 0 , 0, &(e[0]) ); - #ifdef ERRMSG - if(oclHandles.cl_status != CL_SUCCESS){ - oclHandles.error_str = "excpetion in _clInvokeKernel() -> "; - switch(oclHandles.cl_status){ - case CL_INVALID_PROGRAM_EXECUTABLE: - oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE"; - break; - case CL_INVALID_COMMAND_QUEUE: - oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; - break; - case CL_INVALID_KERNEL: - oclHandles.error_str += "CL_INVALID_KERNEL"; - break; - case CL_INVALID_CONTEXT: - oclHandles.error_str += "CL_INVALID_CONTEXT"; - break; - case CL_INVALID_KERNEL_ARGS: - oclHandles.error_str += "CL_INVALID_KERNEL_ARGS"; - break; - case CL_INVALID_WORK_DIMENSION: - oclHandles.error_str += "CL_INVALID_WORK_DIMENSION"; - break; - case CL_INVALID_GLOBAL_WORK_SIZE: - oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE"; - break; - case CL_INVALID_WORK_GROUP_SIZE: - oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE"; - break; - case CL_INVALID_WORK_ITEM_SIZE: - oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE"; - break; - case CL_INVALID_GLOBAL_OFFSET: - oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET"; - break; - case CL_OUT_OF_RESOURCES: - oclHandles.error_str += "CL_OUT_OF_RESOURCES"; - break; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; - break; - case CL_INVALID_EVENT_WAIT_LIST: - oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - default: - oclHandles.error_str += "Unkown reseason"; - break; - } - - throw(oclHandles.error_str); - } - #endif - //_clFinish(); - // oclHandles.cl_status = clWaitForEvents(1, &e[0]); - #ifdef ERRMSG - if (oclHandles.cl_status!= CL_SUCCESS){ - oclHandles.error_str = "excpetion in _clEnqueueNDRange() -> clWaitForEvents ->"; - switch(oclHandles.cl_status){ - case CL_INVALID_VALUE: - oclHandles.error_str += "CL_INVALID_VALUE"; - break; - case CL_INVALID_CONTEXT: - oclHandles.error_str += "CL_INVALID_CONTEXT"; - break; - case CL_INVALID_EVENT: - oclHandles.error_str += "CL_INVALID_EVENT"; - break; - case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: - oclHandles.error_str += "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; - break; - case CL_OUT_OF_RESOURCES: - oclHandles.error_str += "CL_OUT_OF_RESOURCES"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - default: - oclHandles.error_str += "Unkown Reason"; - break; - } - throw(oclHandles.error_str); + cl_uint work_dim = WORK_DIM; + cl_event e[1]; + if (work_items % work_group_size != 0) //process situations that work_items cannot be divided by work_group_size + work_items = work_items + (work_group_size - (work_items % work_group_size)); + size_t local_work_size[] = { work_group_size, 1 }; + size_t global_work_size[] = { work_items, 1 }; + oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, + global_work_size, local_work_size, 0, 0, &(e[0])); +#ifdef ERRMSG + if (oclHandles.cl_status != CL_SUCCESS) { + oclHandles.error_str = "excpetion in _clInvokeKernel() -> "; + switch (oclHandles.cl_status) { + case CL_INVALID_PROGRAM_EXECUTABLE: + oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE"; + break; + case CL_INVALID_COMMAND_QUEUE: + oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; + break; + case CL_INVALID_KERNEL: + oclHandles.error_str += "CL_INVALID_KERNEL"; + break; + case CL_INVALID_CONTEXT: + oclHandles.error_str += "CL_INVALID_CONTEXT"; + break; + case CL_INVALID_KERNEL_ARGS: + oclHandles.error_str += "CL_INVALID_KERNEL_ARGS"; + break; + case CL_INVALID_WORK_DIMENSION: + oclHandles.error_str += "CL_INVALID_WORK_DIMENSION"; + break; + case CL_INVALID_GLOBAL_WORK_SIZE: + oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE"; + break; + case CL_INVALID_WORK_GROUP_SIZE: + oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE"; + break; + case CL_INVALID_WORK_ITEM_SIZE: + oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE"; + break; + case CL_INVALID_GLOBAL_OFFSET: + oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET"; + break; + case CL_OUT_OF_RESOURCES: + oclHandles.error_str += "CL_OUT_OF_RESOURCES"; + break; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + break; + case CL_INVALID_EVENT_WAIT_LIST: + oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + default: + oclHandles.error_str += "Unkown reseason"; + break; } - #endif + + throw(oclHandles.error_str); + } +#endif +//_clFinish(); +// oclHandles.cl_status = clWaitForEvents(1, &e[0]); +#ifdef ERRMSG + if (oclHandles.cl_status != CL_SUCCESS) { + oclHandles.error_str = "excpetion in _clEnqueueNDRange() -> clWaitForEvents ->"; + switch (oclHandles.cl_status) { + case CL_INVALID_VALUE: + oclHandles.error_str += "CL_INVALID_VALUE"; + break; + case CL_INVALID_CONTEXT: + oclHandles.error_str += "CL_INVALID_CONTEXT"; + break; + case CL_INVALID_EVENT: + oclHandles.error_str += "CL_INVALID_EVENT"; + break; + case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: + oclHandles.error_str += "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; + break; + case CL_OUT_OF_RESOURCES: + oclHandles.error_str += "CL_OUT_OF_RESOURCES"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + default: + oclHandles.error_str += "Unkown Reason"; + break; + } + throw(oclHandles.error_str); + } +#endif #ifdef PROFILE_ - double t2 = gettime(); - KE += t2 - t1; + double t2 = gettime(); + KE += t2 - t1; #endif #ifdef TIMING - kernel_time += probe_event_time(e[0], oclHandles.queue); + kernel_time += probe_event_time(e[0], oclHandles.queue); #endif } @@ -1389,13 +1374,14 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s @date: 03/04/2011 ------------------------------------------------------------*/ -void _clMemset(cl_mem mem_d, short val, int number_bytes)throw(string){ - int kernel_id = 0; - int arg_idx = 0; - _clSetArgs(kernel_id, arg_idx++, mem_d); - _clSetArgs(kernel_id, arg_idx++, &val, sizeof(short)); - _clSetArgs(kernel_id, arg_idx++, &number_bytes, sizeof(int)); - _clInvokeKernel(kernel_id, number_bytes, work_group_size); +void _clMemset(cl_mem mem_d, short val, int number_bytes) throw(string) +{ + int kernel_id = 0; + int arg_idx = 0; + _clSetArgs(kernel_id, arg_idx++, mem_d); + _clSetArgs(kernel_id, arg_idx++, &val, sizeof(short)); + _clSetArgs(kernel_id, arg_idx++, &number_bytes, sizeof(int)); + _clInvokeKernel(kernel_id, number_bytes, work_group_size); } /*------------------------------------------------------------ @function: entry of invoke the kernel function using 2d working items @@ -1408,83 +1394,84 @@ void _clMemset(cl_mem mem_d, short val, int number_bytes)throw(string){ @return: NULL @date: 03/04/2011 ------------------------------------------------------------*/ -void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int group_y) throw(string){ +void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int group_y) throw(string) +{ #ifdef PROFILE_ - double t1 = gettime(); + double t1 = gettime(); #endif - cl_uint work_dim = WORK_DIM; - size_t local_work_size[] = {group_x, group_y}; - size_t global_work_size[] = {range_x, range_y}; - cl_event e[1]; - /*if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size + cl_uint work_dim = WORK_DIM; + size_t local_work_size[] = { group_x, group_y }; + size_t global_work_size[] = { range_x, range_y }; + cl_event e[1]; + /*if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size work_items = work_items + (work_group_size-(work_items%work_group_size));*/ - oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \ - global_work_size, local_work_size, 0 , 0, &(e[0]) ); - #ifdef ERRMSG - if(oclHandles.cl_status != CL_SUCCESS){ - oclHandles.error_str = "excpetion in _clInvokeKernel() -> "; - switch(oclHandles.cl_status){ - case CL_INVALID_PROGRAM_EXECUTABLE: - oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE"; - break; - case CL_INVALID_COMMAND_QUEUE: - oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; - break; - case CL_INVALID_KERNEL: - oclHandles.error_str += "CL_INVALID_KERNEL"; - break; - case CL_INVALID_CONTEXT: - oclHandles.error_str += "CL_INVALID_CONTEXT"; - break; - case CL_INVALID_KERNEL_ARGS: - oclHandles.error_str += "CL_INVALID_KERNEL_ARGS"; - break; - case CL_INVALID_WORK_DIMENSION: - oclHandles.error_str += "CL_INVALID_WORK_DIMENSION"; - break; - case CL_INVALID_GLOBAL_WORK_SIZE: - oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE"; - break; - case CL_INVALID_WORK_GROUP_SIZE: - oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE"; - break; - case CL_INVALID_WORK_ITEM_SIZE: - oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE"; - break; - case CL_INVALID_GLOBAL_OFFSET: - oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET"; - break; - case CL_OUT_OF_RESOURCES: - oclHandles.error_str += "CL_OUT_OF_RESOURCES"; - break; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; - break; - case CL_INVALID_EVENT_WAIT_LIST: - oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - default: - oclHandles.error_str += "Unkown reseason"; - break; - } - throw(oclHandles.error_str); - } - #endif - - // oclHandles.cl_status = clWaitForEvents(1, &e[0]); + oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, + global_work_size, local_work_size, 0, 0, &(e[0])); +#ifdef ERRMSG + if (oclHandles.cl_status != CL_SUCCESS) { + oclHandles.error_str = "excpetion in _clInvokeKernel() -> "; + switch (oclHandles.cl_status) { + case CL_INVALID_PROGRAM_EXECUTABLE: + oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE"; + break; + case CL_INVALID_COMMAND_QUEUE: + oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE"; + break; + case CL_INVALID_KERNEL: + oclHandles.error_str += "CL_INVALID_KERNEL"; + break; + case CL_INVALID_CONTEXT: + oclHandles.error_str += "CL_INVALID_CONTEXT"; + break; + case CL_INVALID_KERNEL_ARGS: + oclHandles.error_str += "CL_INVALID_KERNEL_ARGS"; + break; + case CL_INVALID_WORK_DIMENSION: + oclHandles.error_str += "CL_INVALID_WORK_DIMENSION"; + break; + case CL_INVALID_GLOBAL_WORK_SIZE: + oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE"; + break; + case CL_INVALID_WORK_GROUP_SIZE: + oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE"; + break; + case CL_INVALID_WORK_ITEM_SIZE: + oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE"; + break; + case CL_INVALID_GLOBAL_OFFSET: + oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET"; + break; + case CL_OUT_OF_RESOURCES: + oclHandles.error_str += "CL_OUT_OF_RESOURCES"; + break; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + break; + case CL_INVALID_EVENT_WAIT_LIST: + oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + default: + oclHandles.error_str += "Unkown reseason"; + break; + } + throw(oclHandles.error_str); + } +#endif + + // oclHandles.cl_status = clWaitForEvents(1, &e[0]); #ifdef ERRMSG - if (oclHandles.cl_status!= CL_SUCCESS) + if (oclHandles.cl_status != CL_SUCCESS) - throw(string("excpetion in _clEnqueueNDRange() -> clWaitForEvents")); + throw(string("excpetion in _clEnqueueNDRange() -> clWaitForEvents")); #endif #ifdef PROFILE_ - double t2 = gettime(); - KE += t2 - t1; + double t2 = gettime(); + KE += t2 - t1; #endif } @@ -1496,44 +1483,45 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int @date: 03/04/2011 ------------------------------------------------------------*/ -void _clFree(cl_mem ob) throw(string){ +void _clFree(cl_mem ob) throw(string) +{ #ifdef TIMING - gettimeofday(&tv_close_start, NULL); + gettimeofday(&tv_close_start, NULL); #endif #ifdef PROFILE_ - double t1 = gettime(); + double t1 = gettime(); #endif - if(ob!=NULL) - oclHandles.cl_status = clReleaseMemObject(ob); + if (ob != NULL) + oclHandles.cl_status = clReleaseMemObject(ob); #ifdef ERRMSG - if (oclHandles.cl_status!= CL_SUCCESS){ - oclHandles.error_str = "excpetion in _clFree() ->"; - switch(oclHandles.cl_status){ - case CL_INVALID_MEM_OBJECT: - oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; - break; - case CL_OUT_OF_RESOURCES: - oclHandles.error_str += "CL_OUT_OF_RESOURCES"; - break; - case CL_OUT_OF_HOST_MEMORY: - oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; - break; - default: - oclHandles.error_str += "Unkown reseason"; - break; - } - throw(oclHandles.error_str); - } + if (oclHandles.cl_status != CL_SUCCESS) { + oclHandles.error_str = "excpetion in _clFree() ->"; + switch (oclHandles.cl_status) { + case CL_INVALID_MEM_OBJECT: + oclHandles.error_str += "CL_INVALID_MEM_OBJECT"; + break; + case CL_OUT_OF_RESOURCES: + oclHandles.error_str += "CL_OUT_OF_RESOURCES"; + break; + case CL_OUT_OF_HOST_MEMORY: + oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY"; + break; + default: + oclHandles.error_str += "Unkown reseason"; + break; + } + throw(oclHandles.error_str); + } #endif #ifdef PROFILE_ - double t2 = gettime(); - MF += t2 - t1; + double t2 = gettime(); + MF += t2 - t1; #endif #ifdef TIMING - gettimeofday(&tv_close_end, NULL); - tvsub(&tv_close_end, &tv_close_start, &tv); - close_time += tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0; + gettimeofday(&tv_close_end, NULL); + tvsub(&tv_close_end, &tv_close_start, &tv); + close_time += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; #endif } @@ -1543,30 +1531,31 @@ void _clFree(cl_mem ob) throw(string){ @return: NULL @date: 03/04/2011 ------------------------------------------------------------*/ -void _clStatistics(){ +void _clStatistics() +{ #ifdef PROFILE_ - FILE *fp_pd = fopen("PD_OCL.txt", "a"); - fprintf(fp_pd, "%lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf\n", CC, CR, MA, MF, H2D, D2H, D2D, KE, KC); - fclose(fp_pd); -#endif - return ; + FILE* fp_pd = fopen("PD_OCL.txt", "a"); + fprintf(fp_pd, "%lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf\n", CC, CR, MA, MF, H2D, D2H, D2D, KE, KC); + fclose(fp_pd); +#endif + return; } -void _clPrintTiming(){ +void _clPrintTiming() +{ #ifdef TIMING - gettimeofday(&tv_total_end, NULL); - tvsub(&tv_total_end, &tv_total_start, &tv); - total_time = tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0; + gettimeofday(&tv_total_end, NULL); + tvsub(&tv_total_end, &tv_total_start, &tv); + total_time = tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; - printf("Init: %f\n", init_time); - printf("MemAlloc: %f\n", mem_alloc_time); - printf("HtoD: %f\n", h2d_time); - printf("DtoD: %f\n", d2d_time); - printf("Exec: %f\n", kernel_time); - printf("DtoH: %f\n", d2h_time); - printf("Close: %f\n", close_time); - printf("Total: %f\n", total_time); + printf("Init: %f\n", init_time); + printf("MemAlloc: %f\n", mem_alloc_time); + printf("HtoD: %f\n", h2d_time); + printf("DtoD: %f\n", d2d_time); + printf("Exec: %f\n", kernel_time); + printf("DtoH: %f\n", d2h_time); + printf("Close: %f\n", close_time); + printf("Total: %f\n", total_time); #endif - } #endif //_CL_HELPER_ diff --git a/tests/opencl/cfd/euler3d.cc b/tests/opencl/cfd/euler3d.cc index 9ed9f7418..0f589c9a5 100644 --- a/tests/opencl/cfd/euler3d.cc +++ b/tests/opencl/cfd/euler3d.cc @@ -12,25 +12,25 @@ on 24/03/2011 ********************************************************************/ -#include +#include "CLHelper.h" #include +#include #include -#include "CLHelper.h" - + /* * Options * - */ + */ #define GAMMA 1.4f #define iterations 2000 #ifndef block_length - #define block_length 192 +#define block_length 192 #endif #define NDIM 3 #define NNB 4 -#define RK 3 // 3rd order RK +#define RK 3 // 3rd order RK #define ff_mach 1.2f #define deg_angle_of_attack 0.0f @@ -38,381 +38,391 @@ * not options */ - #if block_length > 128 #warning "the kernels may fail too launch on some systems if the block length is too large" #endif - #define VAR_DENSITY 0 -#define VAR_MOMENTUM 1 -#define VAR_DENSITY_ENERGY (VAR_MOMENTUM+NDIM) -#define NVAR (VAR_DENSITY_ENERGY+1) +#define VAR_MOMENTUM 1 +#define VAR_DENSITY_ENERGY (VAR_MOMENTUM + NDIM) +#define NVAR (VAR_DENSITY_ENERGY + 1) //self-defined user type -typedef struct{ - float x; - float y; - float z; +typedef struct { + float x; + float y; + float z; } float3; /* * Generic functions */ template -cl_mem alloc(int N){ - cl_mem mem_d = _clMalloc(sizeof(T)*N); - return mem_d; +cl_mem alloc(int N) +{ + cl_mem mem_d = _clMalloc(sizeof(T) * N); + return mem_d; } template -void dealloc(cl_mem array){ - _clFree(array); +void dealloc(cl_mem array) +{ + _clFree(array); } template -void copy(cl_mem dst, cl_mem src, int N){ - _clMemcpyD2D(dst, src, N*sizeof(T)); +void copy(cl_mem dst, cl_mem src, int N) +{ + _clMemcpyD2D(dst, src, N * sizeof(T)); } template -void upload(cl_mem dst, T* src, int N){ - _clMemcpyH2D(dst, src, N*sizeof(T)); +void upload(cl_mem dst, T* src, int N) +{ + _clMemcpyH2D(dst, src, N * sizeof(T)); } template -void download(T* dst, cl_mem src, int N){ - _clMemcpyD2H(dst, src, N*sizeof(T)); +void download(T* dst, cl_mem src, int N) +{ + _clMemcpyD2H(dst, src, N * sizeof(T)); } -void dump(cl_mem variables, int nel, int nelr){ - float* h_variables = new float[nelr*NVAR]; - download(h_variables, variables, nelr*NVAR); +void dump(cl_mem variables, int nel, int nelr) +{ + float* h_variables = new float[nelr * NVAR]; + download(h_variables, variables, nelr * NVAR); - { - std::ofstream file("density.txt"); - file << nel << " " << nelr << std::endl; - for(int i = 0; i < nel; i++) file << h_variables[i + VAR_DENSITY*nelr] << std::endl; - } + { + std::ofstream file("density.txt"); + file << nel << " " << nelr << std::endl; + for (int i = 0; i < nel; i++) + file << h_variables[i + VAR_DENSITY * nelr] << std::endl; + } + { + std::ofstream file("momentum.txt"); + file << nel << " " << nelr << std::endl; + for (int i = 0; i < nel; i++) { + for (int j = 0; j != NDIM; j++) + file << h_variables[i + (VAR_MOMENTUM + j) * nelr] << " "; + file << std::endl; + } + } - { - std::ofstream file("momentum.txt"); - file << nel << " " << nelr << std::endl; - for(int i = 0; i < nel; i++) - { - for(int j = 0; j != NDIM; j++) - file << h_variables[i + (VAR_MOMENTUM+j)*nelr] << " "; - file << std::endl; - } - } - - { - std::ofstream file("density_energy.txt"); - file << nel << " " << nelr << std::endl; - for(int i = 0; i < nel; i++) file << h_variables[i + VAR_DENSITY_ENERGY*nelr] << std::endl; - } - delete[] h_variables; + { + std::ofstream file("density_energy.txt"); + file << nel << " " << nelr << std::endl; + for (int i = 0; i < nel; i++) + file << h_variables[i + VAR_DENSITY_ENERGY * nelr] << std::endl; + } + delete[] h_variables; } -void initialize_variables(int nelr, cl_mem variables, cl_mem ff_variable) throw(string){ +void initialize_variables(int nelr, cl_mem variables, cl_mem ff_variable) throw(string) +{ - int work_items = nelr; - int work_group_size = BLOCK_SIZE_1; - int kernel_id = 1; - int arg_idx = 0; - _clSetArgs(kernel_id, arg_idx++, variables); - _clSetArgs(kernel_id, arg_idx++, ff_variable); - _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int)); - _clInvokeKernel(kernel_id, work_items, work_group_size); + int work_items = nelr; + int work_group_size = BLOCK_SIZE_1; + int kernel_id = 1; + int arg_idx = 0; + _clSetArgs(kernel_id, arg_idx++, variables); + _clSetArgs(kernel_id, arg_idx++, ff_variable); + _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int)); + _clInvokeKernel(kernel_id, work_items, work_group_size); } -void compute_step_factor(int nelr, cl_mem variables, cl_mem areas, cl_mem step_factors){ +void compute_step_factor(int nelr, cl_mem variables, cl_mem areas, cl_mem step_factors) +{ - int work_items = nelr; - int work_group_size = BLOCK_SIZE_2; - int kernel_id = 2; - int arg_idx = 0; - _clSetArgs(kernel_id, arg_idx++, variables); - _clSetArgs(kernel_id, arg_idx++, areas); - _clSetArgs(kernel_id, arg_idx++, step_factors); - _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int)); - _clInvokeKernel(kernel_id, work_items, work_group_size); + int work_items = nelr; + int work_group_size = BLOCK_SIZE_2; + int kernel_id = 2; + int arg_idx = 0; + _clSetArgs(kernel_id, arg_idx++, variables); + _clSetArgs(kernel_id, arg_idx++, areas); + _clSetArgs(kernel_id, arg_idx++, step_factors); + _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int)); + _clInvokeKernel(kernel_id, work_items, work_group_size); } -void compute_flux(int nelr, cl_mem elements_surrounding_elements, cl_mem normals, cl_mem variables, cl_mem ff_variable, \ - cl_mem fluxes, cl_mem ff_flux_contribution_density_energy, - cl_mem ff_flux_contribution_momentum_x, - cl_mem ff_flux_contribution_momentum_y, - cl_mem ff_flux_contribution_momentum_z){ +void compute_flux(int nelr, cl_mem elements_surrounding_elements, cl_mem normals, cl_mem variables, cl_mem ff_variable, + cl_mem fluxes, cl_mem ff_flux_contribution_density_energy, + cl_mem ff_flux_contribution_momentum_x, + cl_mem ff_flux_contribution_momentum_y, + cl_mem ff_flux_contribution_momentum_z) +{ - int work_items = nelr; - int work_group_size = BLOCK_SIZE_3; - int kernel_id = 3; - int arg_idx = 0; - _clSetArgs(kernel_id, arg_idx++, elements_surrounding_elements); - _clSetArgs(kernel_id, arg_idx++, normals); - _clSetArgs(kernel_id, arg_idx++, variables); - _clSetArgs(kernel_id, arg_idx++, ff_variable); - _clSetArgs(kernel_id, arg_idx++, fluxes); - _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_density_energy); - _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_momentum_x); - _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_momentum_y); - _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_momentum_z); - _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int)); - _clInvokeKernel(kernel_id, work_items, work_group_size); + int work_items = nelr; + int work_group_size = BLOCK_SIZE_3; + int kernel_id = 3; + int arg_idx = 0; + _clSetArgs(kernel_id, arg_idx++, elements_surrounding_elements); + _clSetArgs(kernel_id, arg_idx++, normals); + _clSetArgs(kernel_id, arg_idx++, variables); + _clSetArgs(kernel_id, arg_idx++, ff_variable); + _clSetArgs(kernel_id, arg_idx++, fluxes); + _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_density_energy); + _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_momentum_x); + _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_momentum_y); + _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_momentum_z); + _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int)); + _clInvokeKernel(kernel_id, work_items, work_group_size); } -void time_step(int j, int nelr, cl_mem old_variables, cl_mem variables, cl_mem step_factors, cl_mem fluxes){ +void time_step(int j, int nelr, cl_mem old_variables, cl_mem variables, cl_mem step_factors, cl_mem fluxes) +{ - int work_items = nelr; - int work_group_size = BLOCK_SIZE_4; - int kernel_id = 4; - int arg_idx = 0; - _clSetArgs(kernel_id, arg_idx++, &j, sizeof(int)); - _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int)); - _clSetArgs(kernel_id, arg_idx++, old_variables); - _clSetArgs(kernel_id, arg_idx++, variables); - _clSetArgs(kernel_id, arg_idx++, step_factors); - _clSetArgs(kernel_id, arg_idx++, fluxes); - - _clInvokeKernel(kernel_id, work_items, work_group_size); + int work_items = nelr; + int work_group_size = BLOCK_SIZE_4; + int kernel_id = 4; + int arg_idx = 0; + _clSetArgs(kernel_id, arg_idx++, &j, sizeof(int)); + _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int)); + _clSetArgs(kernel_id, arg_idx++, old_variables); + _clSetArgs(kernel_id, arg_idx++, variables); + _clSetArgs(kernel_id, arg_idx++, step_factors); + _clSetArgs(kernel_id, arg_idx++, fluxes); + + _clInvokeKernel(kernel_id, work_items, work_group_size); } inline void compute_flux_contribution(float& density, float3& momentum, float& density_energy, float& pressure, float3& velocity, float3& fc_momentum_x, float3& fc_momentum_y, float3& fc_momentum_z, float3& fc_density_energy) { - fc_momentum_x.x = velocity.x*momentum.x + pressure; - fc_momentum_x.y = velocity.x*momentum.y; - fc_momentum_x.z = velocity.x*momentum.z; - - - fc_momentum_y.x = fc_momentum_x.y; - fc_momentum_y.y = velocity.y*momentum.y + pressure; - fc_momentum_y.z = velocity.y*momentum.z; + fc_momentum_x.x = velocity.x * momentum.x + pressure; + fc_momentum_x.y = velocity.x * momentum.y; + fc_momentum_x.z = velocity.x * momentum.z; - fc_momentum_z.x = fc_momentum_x.z; - fc_momentum_z.y = fc_momentum_y.z; - fc_momentum_z.z = velocity.z*momentum.z + pressure; + fc_momentum_y.x = fc_momentum_x.y; + fc_momentum_y.y = velocity.y * momentum.y + pressure; + fc_momentum_y.z = velocity.y * momentum.z; - float de_p = density_energy+pressure; - fc_density_energy.x = velocity.x*de_p; - fc_density_energy.y = velocity.y*de_p; - fc_density_energy.z = velocity.z*de_p; + fc_momentum_z.x = fc_momentum_x.z; + fc_momentum_z.y = fc_momentum_y.z; + fc_momentum_z.z = velocity.z * momentum.z + pressure; + + float de_p = density_energy + pressure; + fc_density_energy.x = velocity.x * de_p; + fc_density_energy.y = velocity.y * de_p; + fc_density_energy.z = velocity.z * de_p; } /* * Main function */ -int main(int argc, char** argv){ +int main(int argc, char** argv) +{ printf("WG size of kernel:initialize = %d, WG size of kernel:compute_step_factor = %d, WG size of kernel:compute_flux = %d, WG size of kernel:time_step = %d\n", BLOCK_SIZE_1, BLOCK_SIZE_2, BLOCK_SIZE_3, BLOCK_SIZE_4); - if (argc < 2){ - //std::cout << "specify data file name and [device type] [device id]" << std::endl; - return 0; - } - const char* data_file_name = argv[1]; - _clCmdParams(argc, argv); - cl_mem ff_variable, ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y,ff_flux_contribution_momentum_z, ff_flux_contribution_density_energy; - cl_mem areas, elements_surrounding_elements, normals; - cl_mem variables, old_variables, fluxes, step_factors; - float h_ff_variable[NVAR]; + if (argc < 2) { + //std::cout << "specify data file name and [device type] [device id]" << std::endl; + printf("specify data file name and [device type] [device id]\n"); + return 0; + } + const char* data_file_name = argv[1]; + _clCmdParams(argc, argv); + cl_mem ff_variable, ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y, ff_flux_contribution_momentum_z, ff_flux_contribution_density_energy; + cl_mem areas, elements_surrounding_elements, normals; + cl_mem variables, old_variables, fluxes, step_factors; + float h_ff_variable[NVAR]; - try{ - _clInit(device_type, device_id); - // set far field conditions and load them into constant memory on the gpu - { - //float h_ff_variable[NVAR]; - const float angle_of_attack = float(3.1415926535897931 / 180.0f) * float(deg_angle_of_attack); - - h_ff_variable[VAR_DENSITY] = float(1.4); - - float ff_pressure = float(1.0f); - float ff_speed_of_sound = sqrt(GAMMA*ff_pressure / h_ff_variable[VAR_DENSITY]); - float ff_speed = float(ff_mach)*ff_speed_of_sound; - - float3 ff_velocity; - ff_velocity.x = ff_speed*float(cos((float)angle_of_attack)); - ff_velocity.y = ff_speed*float(sin((float)angle_of_attack)); - ff_velocity.z = 0.0f; - - h_ff_variable[VAR_MOMENTUM+0] = h_ff_variable[VAR_DENSITY] * ff_velocity.x; - h_ff_variable[VAR_MOMENTUM+1] = h_ff_variable[VAR_DENSITY] * ff_velocity.y; - h_ff_variable[VAR_MOMENTUM+2] = h_ff_variable[VAR_DENSITY] * ff_velocity.z; - - h_ff_variable[VAR_DENSITY_ENERGY] = h_ff_variable[VAR_DENSITY]*(float(0.5f)*(ff_speed*ff_speed)) + (ff_pressure / float(GAMMA-1.0f)); + try { + _clInit(device_type, device_id); + // set far field conditions and load them into constant memory on the gpu + { + //float h_ff_variable[NVAR]; + const float angle_of_attack = float(3.1415926535897931 / 180.0f) * float(deg_angle_of_attack); - float3 h_ff_momentum; - h_ff_momentum.x = *(h_ff_variable+VAR_MOMENTUM+0); - h_ff_momentum.y = *(h_ff_variable+VAR_MOMENTUM+1); - h_ff_momentum.z = *(h_ff_variable+VAR_MOMENTUM+2); - float3 h_ff_flux_contribution_momentum_x; - float3 h_ff_flux_contribution_momentum_y; - float3 h_ff_flux_contribution_momentum_z; - float3 h_ff_flux_contribution_density_energy; - compute_flux_contribution(h_ff_variable[VAR_DENSITY], h_ff_momentum, h_ff_variable[VAR_DENSITY_ENERGY], ff_pressure, ff_velocity, h_ff_flux_contribution_momentum_x, h_ff_flux_contribution_momentum_y, h_ff_flux_contribution_momentum_z, h_ff_flux_contribution_density_energy); + h_ff_variable[VAR_DENSITY] = float(1.4); - // copy far field conditions to the gpu - //cl_mem ff_variable, ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y,ff_flux_contribution_momentum_z, ff_flux_contribution_density_energy; - ff_variable = _clMalloc(NVAR*sizeof(float)); - ff_flux_contribution_momentum_x = _clMalloc(sizeof(float3)); - ff_flux_contribution_momentum_y = _clMalloc(sizeof(float3)); - ff_flux_contribution_momentum_z = _clMalloc(sizeof(float3)); - ff_flux_contribution_density_energy = _clMalloc(sizeof(float3)); - _clMemcpyH2D(ff_variable, h_ff_variable, NVAR*sizeof(float)); - _clMemcpyH2D(ff_flux_contribution_momentum_x, &h_ff_flux_contribution_momentum_x, sizeof(float3)); - _clMemcpyH2D(ff_flux_contribution_momentum_y, &h_ff_flux_contribution_momentum_y, sizeof(float3)); - _clMemcpyH2D(ff_flux_contribution_momentum_z, &h_ff_flux_contribution_momentum_z, sizeof(float3)); - _clMemcpyH2D(ff_flux_contribution_density_energy, &h_ff_flux_contribution_density_energy, sizeof(float3)); - _clFinish(); - - } - int nel; - int nelr; - // read in domain geometry - //float* areas; - //int* elements_surrounding_elements; - //float* normals; - { - std::ifstream file(data_file_name); - if(!file.good()){ - throw(string("can not find/open file!")); - } - file >> nel; - nelr = block_length*((nel / block_length )+ std::min(1, nel % block_length)); - ////std::cout<<"--cambine: nel="<(old_variables, variables, nelr*NVAR); - // for the first iteration we compute the time step - compute_step_factor(nelr, variables, areas, step_factors); - for(int j = 0; j < RK; j++){ - compute_flux(nelr, elements_surrounding_elements, normals, variables, ff_variable, fluxes, ff_flux_contribution_density_energy, \ - ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y, ff_flux_contribution_momentum_z); - - time_step(j, nelr, old_variables, variables, step_factors, fluxes); - } - } - _clFinish(); - //std::cout << "Saving solution..." << std::endl; - dump(variables, nel, nelr); - //std::cout << "Saved solution..." << std::endl; - _clStatistics(); - //std::cout << "Cleaning up..." << std::endl; - - //--release resources - _clFree(ff_variable); - _clFree(ff_flux_contribution_momentum_x); - _clFree(ff_flux_contribution_momentum_y); - _clFree(ff_flux_contribution_momentum_z); - _clFree(ff_flux_contribution_density_energy); - _clFree(areas); - _clFree(elements_surrounding_elements); - _clFree(normals); - _clFree(variables); - _clFree(old_variables); - _clFree(fluxes); - _clFree(step_factors); - _clRelease(); - printf("Done...\n"); - _clPrintTiming(); - } - catch(string msg){ - printf("--cambine:( an exception catched in main body ->%s\n", msg.c_str()); - _clFree(ff_variable); - _clFree(ff_flux_contribution_momentum_x); - _clFree(ff_flux_contribution_momentum_y); - _clFree(ff_flux_contribution_momentum_z); - _clFree(ff_flux_contribution_density_energy); - _clFree(areas); - _clFree(elements_surrounding_elements); - _clFree(normals); - _clFree(variables); - _clFree(old_variables); - _clFree(fluxes); - _clFree(step_factors); - _clRelease(); - } - catch(...){ - //std::cout<<"--cambine:( unknow exceptions in main body..."<> nel; + nelr = block_length * ((nel / block_length) + std::min(1, nel % block_length)); + //std::cout << "--cambine: nel=" << nel << ", nelr=" << nelr << std::endl; + float* h_areas = new float[nelr]; + int* h_elements_surrounding_elements = new int[nelr * NNB]; + float* h_normals = new float[nelr * NDIM * NNB]; + + // read in data + for (int i = 0; i < nel; i++) { + file >> h_areas[i]; + for (int j = 0; j < NNB; j++) { + file >> h_elements_surrounding_elements[i + j * nelr]; + if (h_elements_surrounding_elements[i + j * nelr] < 0) + h_elements_surrounding_elements[i + j * nelr] = -1; + h_elements_surrounding_elements[i + j * nelr]--; //it's coming in with Fortran numbering + + for (int k = 0; k < NDIM; k++) { + file >> h_normals[i + (j + k * NNB) * nelr]; + h_normals[i + (j + k * NNB) * nelr] = -h_normals[i + (j + k * NNB) * nelr]; + } + } + } + + // fill in remaining data + int last = nel - 1; + for (int i = nel; i < nelr; i++) { + h_areas[i] = h_areas[last]; + for (int j = 0; j < NNB; j++) { + // duplicate the last element + h_elements_surrounding_elements[i + j * nelr] = h_elements_surrounding_elements[last + j * nelr]; + for (int k = 0; k < NDIM; k++) + h_normals[last + (j + k * NNB) * nelr] = h_normals[last + (j + k * NNB) * nelr]; + } + } + + areas = alloc(nelr); + upload(areas, h_areas, nelr); + + elements_surrounding_elements = alloc(nelr * NNB); + upload(elements_surrounding_elements, h_elements_surrounding_elements, nelr * NNB); + + normals = alloc(nelr * NDIM * NNB); + upload(normals, h_normals, nelr * NDIM * NNB); + + delete[] h_areas; + delete[] h_elements_surrounding_elements; + delete[] h_normals; + } + + // Create arrays and set initial conditions + variables = alloc(nelr * NVAR); + int tp = 0; + initialize_variables(nelr, variables, ff_variable); + old_variables = alloc(nelr * NVAR); + fluxes = alloc(nelr * NVAR); + step_factors = alloc(nelr); + // make sure all memory is floatly allocated before we start timing + initialize_variables(nelr, old_variables, ff_variable); + initialize_variables(nelr, fluxes, ff_variable); + _clMemset(step_factors, 0, sizeof(float) * nelr); + // make sure CUDA isn't still doing something before we start timing + _clFinish(); + // these need to be computed the first time in order to compute time step + //std::cout << "Starting..." << std::endl; + printf("starting .. \n"); + + // Begin iterations + for (int i = 0; i < iterations; i++) { + //copy(old_variables, variables, nelr * NVAR); + //download(ff_variable, old_variables, nelr * NVAR); + // upload(ff_variable, old_variables, nelr * NVAR); + _clMemcpyH2D(ff_variable, h_ff_variable, NVAR * sizeof(float)); + _clMemcpyD2H(h_ff_variable, variables, NVAR * sizeof(float)); + // for the first iteration we compute the time step + compute_step_factor(nelr, variables, areas, step_factors); + for (int j = 0; j < RK; j++) { + compute_flux(nelr, elements_surrounding_elements, normals, variables, ff_variable, fluxes, ff_flux_contribution_density_energy, + ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y, ff_flux_contribution_momentum_z); + + time_step(j, nelr, old_variables, variables, step_factors, fluxes); + } + } + _clFinish(); + //std::cout << "Saving solution..." << std::endl; + dump(variables, nel, nelr); + //std::cout << "Saved solution..." << std::endl; + _clStatistics(); + //std::cout << "Cleaning up..." << std::endl; + printf("Cleaning up ...\n"); + + //--release resources + _clFree(ff_variable); + _clFree(ff_flux_contribution_momentum_x); + _clFree(ff_flux_contribution_momentum_y); + _clFree(ff_flux_contribution_momentum_z); + _clFree(ff_flux_contribution_density_energy); + _clFree(areas); + _clFree(elements_surrounding_elements); + _clFree(normals); + _clFree(variables); + _clFree(old_variables); + _clFree(fluxes); + _clFree(step_factors); + _clRelease(); + //std::cout << "Done..." << std::endl; + _clPrintTiming(); + } catch (string msg) { + //std::cout << "--cambine:( an exception catched in main body ->" << msg << std::endl; + printf("--cambine:( an exception catched in main body\n"); + _clFree(ff_variable); + _clFree(ff_flux_contribution_momentum_x); + _clFree(ff_flux_contribution_momentum_y); + _clFree(ff_flux_contribution_momentum_z); + _clFree(ff_flux_contribution_density_energy); + _clFree(areas); + _clFree(elements_surrounding_elements); + _clFree(normals); + _clFree(variables); + _clFree(old_variables); + _clFree(fluxes); + _clFree(step_factors); + _clRelease(); + } catch (...) { + printf("--cambine:( unknow exceptions in main body...\n"); + //std::cout << "--cambine:( unknow exceptions in main body..." << std::endl; + _clFree(ff_variable); + _clFree(ff_flux_contribution_momentum_x); + _clFree(ff_flux_contribution_momentum_y); + _clFree(ff_flux_contribution_momentum_z); + _clFree(ff_flux_contribution_density_energy); + _clFree(areas); + _clFree(elements_surrounding_elements); + _clFree(normals); + _clFree(variables); + _clFree(old_variables); + _clFree(fluxes); + _clFree(step_factors); + _clRelease(); + } + + return 0; }