vortex/tests/opencl/mri-q/computeQ.c
2021-06-13 17:42:04 -07:00

118 lines
4.2 KiB
C

/***************************************************************************
*cr
*cr (C) Copyright 2007 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include <stdio.h>
#include <malloc.h>
#include <CL/cl.h>
#include "ocl.h"
#include "macros.h"
#include "computeQ.h"
#include "parboil.h"
#define NC 1
void computePhiMag_GPU(int numK,cl_mem phiR_d,cl_mem phiI_d,cl_mem phiMag_d,clPrmtr* clPrm)
{
int phiMagBlocks = numK / KERNEL_PHI_MAG_THREADS_PER_BLOCK;
if (numK % KERNEL_PHI_MAG_THREADS_PER_BLOCK)
phiMagBlocks++;
size_t DimPhiMagBlock = KERNEL_PHI_MAG_THREADS_PER_BLOCK;
size_t DimPhiMagGrid = phiMagBlocks*KERNEL_PHI_MAG_THREADS_PER_BLOCK;
cl_int clStatus;
clStatus = clSetKernelArg(clPrm->clKernel,0,sizeof(cl_mem),&phiR_d);
clStatus = clSetKernelArg(clPrm->clKernel,1,sizeof(cl_mem),&phiI_d);
clStatus = clSetKernelArg(clPrm->clKernel,2,sizeof(cl_mem),&phiMag_d);
clStatus = clSetKernelArg(clPrm->clKernel,3,sizeof(int),&numK);
CHECK_ERROR("clSetKernelArg")
clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimPhiMagGrid,&DimPhiMagBlock,0,NULL,NULL);
CHECK_ERROR("clEnqueueNDRangeKernel")
}
static
unsigned long long int
readElapsedTime(cl_event internal)
{
cl_int status;
cl_ulong t_begin, t_end;
status = clGetEventProfilingInfo(internal, CL_PROFILING_COMMAND_START,
sizeof(cl_ulong), &t_begin, NULL);
if (status != CL_SUCCESS) return 0;
status = clGetEventProfilingInfo(internal, CL_PROFILING_COMMAND_END,
sizeof(cl_ulong), &t_end, NULL);
if (status != CL_SUCCESS) return 0;
return (unsigned long long int)(t_end - t_begin);
}
void computeQ_GPU (int numK,int numX,
cl_mem x_d, cl_mem y_d, cl_mem z_d,
struct kValues* kVals,
cl_mem Qr_d, cl_mem Qi_d,
clPrmtr* clPrm)
{
int QGrids = numK / KERNEL_Q_K_ELEMS_PER_GRID;
if (numK % KERNEL_Q_K_ELEMS_PER_GRID)
QGrids++;
int QBlocks = numX / KERNEL_Q_THREADS_PER_BLOCK;
if (numX % KERNEL_Q_THREADS_PER_BLOCK)
QBlocks++;
size_t DimQBlock = KERNEL_Q_THREADS_PER_BLOCK/NC;
size_t DimQGrid = QBlocks*KERNEL_Q_THREADS_PER_BLOCK/NC;
cl_int clStatus;
cl_mem ck;
ck = clCreateBuffer(clPrm->clContext,CL_MEM_READ_WRITE,KERNEL_Q_K_ELEMS_PER_GRID*sizeof(struct kValues),NULL,&clStatus);
int QGrid;
for (QGrid = 0; QGrid < QGrids; QGrid++) {
// Put the tile of K values into constant mem
int QGridBase = QGrid * KERNEL_Q_K_ELEMS_PER_GRID;
struct kValues* kValsTile = kVals + QGridBase;
int numElems = MIN(KERNEL_Q_K_ELEMS_PER_GRID, numK - QGridBase);
clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,ck,CL_TRUE,0,numElems*sizeof(struct kValues),kValsTile,0,NULL,NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
clStatus = clSetKernelArg(clPrm->clKernel,0,sizeof(int),&numK);
clStatus = clSetKernelArg(clPrm->clKernel,1,sizeof(int),&QGridBase);
clStatus = clSetKernelArg(clPrm->clKernel,2,sizeof(cl_mem),&x_d);
clStatus = clSetKernelArg(clPrm->clKernel,3,sizeof(cl_mem),&y_d);
clStatus = clSetKernelArg(clPrm->clKernel,4,sizeof(cl_mem),&z_d);
clStatus = clSetKernelArg(clPrm->clKernel,5,sizeof(cl_mem),&Qr_d);
clStatus = clSetKernelArg(clPrm->clKernel,6,sizeof(cl_mem),&Qi_d);
clStatus = clSetKernelArg(clPrm->clKernel,7,sizeof(cl_mem),&ck);
CHECK_ERROR("clSetKernelArg")
printf ("Grid: %d, Block: %d\n", DimQGrid, DimQBlock);
#define TIMED_EXECUTION
#ifdef TIMED_EXECUTION
cl_event e;
clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimQGrid,&DimQBlock,0,NULL,&e);
CHECK_ERROR("clEnqueueNDRangeKernel")
clWaitForEvents(1, &e);
printf ("%llu\n", readElapsedTime(e));
#else
clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimQGrid,&DimQBlock,0,NULL,NULL);
CHECK_ERROR("clEnqueueNDRangeKernel")
#endif
}
}
void createDataStructsCPU(int numK, int numX, float** phiMag,
float** Qr, float** Qi)
{
*phiMag = (float* ) memalign(16, numK * sizeof(float));
*Qr = (float*) memalign(16, numX * sizeof (float));
*Qi = (float*) memalign(16, numX * sizeof (float));
}