/*************************************************************************** *cr *cr (C) Copyright 2010 The Board of Trustees of the *cr University of Illinois *cr All Rights Reserved *cr ***************************************************************************/ #include #include #include #include #include #include //#include "file.h" #include "ocl.h" //#include "common.h" #define CHECK_ERROR(errorMessage) \ if(clStatus != CL_SUCCESS) \ { \ printf("Error: %s!\n",errorMessage); \ printf("Line: %d\n",__LINE__); \ exit(1); \ } static int read_data(float *A0, int nx,int ny,int nz,FILE *fp) { int s=0; int i,j,k; for(i=0;i\n"); parameters = pb_ReadParameters(&argc, argv); /*parameters->inpFiles = (char **)malloc(sizeof(char *) * 2); parameters->inpFiles[0] = (char *)malloc(100); parameters->inpFiles[1] = NULL; strncpy(parameters->inpFiles[0], "128x128x32.bin", 100);*/ pb_InitializeTimerSet(&timers); pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); //declaration int nx,ny,nz; int size; int iteration; float c0=1.0f/6.0f; float c1=1.0f/6.0f/6.0f; /*if (argc<5) { printf("Usage: probe nx ny nz t\n" "nx: the grid size x\n" "ny: the grid size y\n" "nz: the grid size z\n" "t: the iteration time\n"); return -1; } nx = atoi(argv[1]); if (nx<1) return -1; ny = atoi(argv[2]); if (ny<1) return -1; nz = atoi(argv[3]); if (nz<1) return -1; iteration = atoi(argv[4]); if(iteration<1) return -1;*/ nx = 64; ny = 64; nz = 8; iteration = 1; cl_int clStatus; cl_context clContext; cl_device_id clDevice; cl_platform_id clPlatform; // Below is the new interface, coupled with Parboil runtime. pb_Context* pb_context; pb_context = pb_InitOpenCLContext(parameters); if (pb_context == NULL) { fprintf (stderr, "Error: No OpenCL platform/device can be found."); return -1; } printf("OK\n"); // okay, let's deliver actual variables clPlatform = (cl_platform_id) pb_context->clPlatformId; clContext = (cl_context) pb_context->clContext; clDevice = (cl_device_id) pb_context->clDeviceId; cl_command_queue clCommandQueue = clCreateCommandQueue(clContext,clDevice,CL_QUEUE_PROFILING_ENABLE,&clStatus); CHECK_ERROR("clCreateCommandQueue") printf("OK\n"); pb_SetOpenCL(&clContext, &clCommandQueue); //const char* clSource[] = {readFile("src/opencl_base/kernel.cl")}; //cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus); cl_program clProgram = clCreateProgramWithBuiltInKernels( clContext, 1, &clDevice, "naive_kernel", &clStatus); CHECK_ERROR("clCreateProgramWithSource") char clOptions[50]; sprintf(clOptions,"-I src/opencl_base"); clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL); CHECK_ERROR("clBuildProgram") cl_kernel clKernel = clCreateKernel(clProgram,"naive_kernel",&clStatus); CHECK_ERROR("clCreateKernel") printf("OK+\n"); //host data float *h_A0; float *h_Anext; //device cl_mem d_A0; cl_mem d_Anext; //load data from files size=nx*ny*nz; h_A0=(float*)malloc(sizeof(float)*size); h_Anext=(float*)malloc(sizeof(float)*size); pb_SwitchToTimer(&timers, pb_TimerID_IO); //FILE *fp = fopen(parameters->inpFiles[0], "rb"); printf("OK+\n"); read_data(h_A0, nx,ny,nz,NULL); printf("OK+\n"); //fclose(fp); memcpy (h_Anext,h_A0,sizeof(float)*size); pb_SwitchToTimer(&timers, pb_TimerID_COPY); printf("OK+\n"); //memory allocation d_A0 = clCreateBuffer(clContext,CL_MEM_READ_WRITE,size*sizeof(float),NULL,&clStatus); CHECK_ERROR("clCreateBuffer") d_Anext = clCreateBuffer(clContext,CL_MEM_READ_WRITE,size*sizeof(float),NULL,&clStatus); CHECK_ERROR("clCreateBuffer") //memory copy clStatus = clEnqueueWriteBuffer(clCommandQueue,d_A0,CL_FALSE,0,size*sizeof(float),h_A0,0,NULL,NULL); CHECK_ERROR("clEnqueueWriteBuffer") clStatus = clEnqueueWriteBuffer(clCommandQueue,d_Anext,CL_TRUE,0,size*sizeof(float),h_Anext,0,NULL,NULL); CHECK_ERROR("clEnqueueWriteBuffer") pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); printf("OK+\n"); //only use 1D thread block int tx = 128; size_t block[3] = {tx,1,1}; size_t grid[3] = {(nx-2+tx-1)/tx*tx,ny-2,nz-2}; //size_t grid[3] = {nx-2,ny-2,nz-2}; size_t offset[3] = {1,1,1}; printf("grid size in x/y/z = %d %d %d\n",grid[0],grid[1],grid[2]); printf("block size in x/y/z = %d %d %d\n",block[0],block[1],block[2]); printf ("blocks = %d\n", (grid[0]/block[0])*(grid[1]/block[1])*(grid[2]*block[2])); clStatus = clSetKernelArg(clKernel,0,sizeof(float),(void*)&c0); clStatus = clSetKernelArg(clKernel,1,sizeof(float),(void*)&c1); clStatus = clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&d_A0); clStatus = clSetKernelArg(clKernel,3,sizeof(cl_mem),(void*)&d_Anext); clStatus = clSetKernelArg(clKernel,4,sizeof(int),(void*)&nx); clStatus = clSetKernelArg(clKernel,5,sizeof(int),(void*)&ny); clStatus = clSetKernelArg(clKernel,6,sizeof(int),(void*)&nz); CHECK_ERROR("clSetKernelArg") //main execution pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); printf("OK+0\n"); int t; for(t=0;toutFile) { pb_SwitchToTimer(&timers, pb_TimerID_IO); //outputData(parameters->outFile,h_Anext,nx,ny,nz); } pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); //free((void*)clSource[0]); free(h_A0); free(h_Anext); pb_SwitchToTimer(&timers, pb_TimerID_NONE); pb_PrintTimerSet(&timers); pb_FreeParameters(parameters); return 0; }