mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
merging changes from OPAE branch making this branch
This commit is contained in:
parent
39516a6f98
commit
9b1b8789ac
267 changed files with 498191 additions and 166 deletions
848
benchmarks/new_opencl/bfs/CLHelper.h
Executable file
848
benchmarks/new_opencl/bfs/CLHelper.h
Executable file
|
@ -0,0 +1,848 @@
|
|||
//------------------------------------------
|
||||
//--cambine:helper function for OpenCL
|
||||
//--programmer: Jianbin Fang
|
||||
//--date: 27/12/2010
|
||||
//------------------------------------------
|
||||
#ifndef _CL_HELPER_
|
||||
#define _CL_HELPER_
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
||||
using std::string;
|
||||
using std::ifstream;
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
using std::cout;
|
||||
//#pragma OPENCL EXTENSION cl_nv_compiler_options:enable
|
||||
#define WORK_DIM 2 // work-items dimensions
|
||||
|
||||
struct oclHandleStruct {
|
||||
cl_context context;
|
||||
cl_device_id *devices;
|
||||
cl_command_queue queue;
|
||||
cl_program program;
|
||||
cl_int cl_status;
|
||||
std::string error_str;
|
||||
std::vector<cl_kernel> kernel;
|
||||
};
|
||||
|
||||
struct oclHandleStruct oclHandles;
|
||||
|
||||
char kernel_file[100] = "Kernels.cl";
|
||||
int total_kernels = 2;
|
||||
string kernel_names[2] = {"BFS_1", "BFS_2"};
|
||||
int work_group_size = 512;
|
||||
int device_id_inused = 0; // deviced id used (default : 0)
|
||||
|
||||
int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return -1;
|
||||
}
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Converts the contents of a file into a string
|
||||
*/
|
||||
string FileToString(const string fileName) {
|
||||
ifstream f(fileName.c_str(), ifstream::in | ifstream::binary);
|
||||
|
||||
try {
|
||||
size_t size;
|
||||
char *str;
|
||||
string s;
|
||||
|
||||
if (f.is_open()) {
|
||||
size_t fileSize;
|
||||
f.seekg(0, ifstream::end);
|
||||
size = fileSize = f.tellg();
|
||||
f.seekg(0, ifstream::beg);
|
||||
|
||||
str = new char[size + 1];
|
||||
if (!str)
|
||||
throw(string("Could not allocate memory"));
|
||||
|
||||
f.read(str, fileSize);
|
||||
f.close();
|
||||
str[size] = '\0';
|
||||
|
||||
s = str;
|
||||
delete[] str;
|
||||
return s;
|
||||
}
|
||||
} catch (std::string msg) {
|
||||
cerr << "Exception caught in FileToString(): " << msg << endl;
|
||||
if (f.is_open())
|
||||
f.close();
|
||||
} catch (...) {
|
||||
cerr << "Exception caught in FileToString()" << endl;
|
||||
if (f.is_open())
|
||||
f.close();
|
||||
}
|
||||
string errorMsg = "FileToString()::Error: Unable to open file " + fileName;
|
||||
throw(errorMsg);
|
||||
}
|
||||
//---------------------------------------
|
||||
// Read command line parameters
|
||||
//
|
||||
void _clCmdParams(int argc, char *argv[]) {
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
switch (argv[i][1]) {
|
||||
case 'g': //--g stands for size of work group
|
||||
if (++i < argc) {
|
||||
sscanf(argv[i], "%u", &work_group_size);
|
||||
} else {
|
||||
std::cerr << "Could not read argument after option " << argv[i - 1]
|
||||
<< std::endl;
|
||||
throw;
|
||||
}
|
||||
break;
|
||||
case 'd': //--d stands for device id used in computaion
|
||||
if (++i < argc) {
|
||||
sscanf(argv[i], "%u", &device_id_inused);
|
||||
} else {
|
||||
std::cerr << "Could not read argument after option " << argv[i - 1]
|
||||
<< std::endl;
|
||||
throw;
|
||||
}
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//---------------------------------------
|
||||
// Initlize CL objects
|
||||
//--description: there are 5 steps to initialize all the OpenCL objects needed
|
||||
//--revised on 04/01/2011: get the number of devices and
|
||||
// devices have no relationship with context
|
||||
void _clInit() {
|
||||
printf("_clInit()\n");
|
||||
|
||||
int DEVICE_ID_INUSED = device_id_inused;
|
||||
cl_int resultCL;
|
||||
|
||||
oclHandles.context = NULL;
|
||||
oclHandles.devices = NULL;
|
||||
oclHandles.queue = NULL;
|
||||
oclHandles.program = NULL;
|
||||
|
||||
cl_uint deviceListSize;
|
||||
|
||||
//-----------------------------------------------
|
||||
//--cambine-1: find the available platforms and select one
|
||||
|
||||
cl_uint numPlatforms = 1;
|
||||
cl_platform_id targetPlatform = NULL;
|
||||
|
||||
cl_platform_id *allPlatforms =
|
||||
(cl_platform_id *)malloc(numPlatforms * sizeof(cl_platform_id));
|
||||
|
||||
resultCL = clGetPlatformIDs(numPlatforms, allPlatforms, NULL);
|
||||
if (resultCL != CL_SUCCESS)
|
||||
throw(string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)"));
|
||||
|
||||
// Select the target platform. Default: first platform
|
||||
targetPlatform = allPlatforms[0];
|
||||
|
||||
/*for (int i = 0; i < numPlatforms; i++)
|
||||
{
|
||||
char pbuff[128];
|
||||
resultCL = clGetPlatformInfo( allPlatforms[i],
|
||||
CL_PLATFORM_VENDOR,
|
||||
sizeof(pbuff),
|
||||
pbuff,
|
||||
NULL);
|
||||
if (resultCL != CL_SUCCESS)
|
||||
throw (string("InitCL()::Error: Getting platform info (clGetPlatformInfo)"));
|
||||
|
||||
//printf("vedor is %s\n",pbuff);
|
||||
|
||||
}
|
||||
free(allPlatforms);*/
|
||||
|
||||
//-----------------------------------------------
|
||||
//--cambine-2: create an OpenCL context
|
||||
/*cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM,
|
||||
(cl_context_properties)targetPlatform, 0 };
|
||||
oclHandles.context = clCreateContextFromType(cprops,
|
||||
CL_DEVICE_TYPE_GPU,
|
||||
NULL,
|
||||
NULL,
|
||||
&resultCL);
|
||||
|
||||
if ((resultCL != CL_SUCCESS) || (oclHandles.context == NULL))
|
||||
throw (string("InitCL()::Error: Creating Context
|
||||
(clCreateContextFromType)"));
|
||||
|
||||
//-----------------------------------------------
|
||||
//--cambine-3: detect OpenCL devices
|
||||
// First, get the size of device list
|
||||
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, 0,
|
||||
NULL, &deviceListSize);
|
||||
if(oclHandles.cl_status!=CL_SUCCESS){
|
||||
throw(string("exception in _clInit -> clGetDeviceIDs"));
|
||||
}
|
||||
if (deviceListSize == 0)
|
||||
throw(string("InitCL()::Error: No devices found."));
|
||||
|
||||
printf("OK1()\n");
|
||||
|
||||
//std::cout<<"device number:"<<deviceListSize<<std::endl;*/
|
||||
|
||||
// Now, allocate the device list
|
||||
deviceListSize = 1;
|
||||
oclHandles.devices =
|
||||
(cl_device_id *)malloc(deviceListSize * sizeof(cl_device_id));
|
||||
if (oclHandles.devices == 0)
|
||||
throw(string("InitCL()::Error: Could not allocate memory."));
|
||||
|
||||
//* Next, get the device list data
|
||||
oclHandles.cl_status =
|
||||
clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_DEFAULT, deviceListSize,
|
||||
oclHandles.devices, NULL);
|
||||
if (oclHandles.cl_status != CL_SUCCESS) {
|
||||
throw(string("exception in _clInit -> clGetDeviceIDs-2"));
|
||||
}
|
||||
|
||||
oclHandles.context = clCreateContext(NULL, deviceListSize, oclHandles.devices,
|
||||
NULL, NULL, &resultCL);
|
||||
if ((resultCL != CL_SUCCESS) || (oclHandles.context == NULL))
|
||||
throw(string("InitCL()::Error: Creating Context (clCreateContext)"));
|
||||
|
||||
//-----------------------------------------------
|
||||
//--cambine-4: Create an OpenCL command queue
|
||||
oclHandles.queue = clCreateCommandQueue(
|
||||
oclHandles.context, oclHandles.devices[DEVICE_ID_INUSED], 0, &resultCL);
|
||||
printf("resultCL=%d, queue=0x%x\n", resultCL, oclHandles.queue);
|
||||
|
||||
if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))
|
||||
throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
|
||||
//-----------------------------------------------
|
||||
//--cambine-5: Load CL file, build CL program object, create CL kernel object
|
||||
/*std::string source_str = FileToString(kernel_file);
|
||||
const char * source = source_str.c_str();
|
||||
size_t sourceSize[] = { source_str.length() };*/
|
||||
|
||||
//oclHandles.program = clCreateProgramWithBuiltInKernels(
|
||||
// oclHandles.context, 1, &oclHandles.devices[DEVICE_ID_INUSED],
|
||||
// "BFS_1;BFS_2", &resultCL);
|
||||
/*oclHandles.program = clCreateProgramWithSource(oclHandles.context,
|
||||
1,
|
||||
&source,
|
||||
sourceSize,
|
||||
&resultCL);*/
|
||||
// read kernel binary from file
|
||||
uint8_t *kernel_bin = NULL;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status = 0;
|
||||
if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size))
|
||||
std::abort();
|
||||
|
||||
oclHandles.program = clCreateProgramWithBinary(
|
||||
oclHandles.context, 1, &oclHandles.devices[DEVICE_ID_INUSED], &kernel_size, &kernel_bin, &binary_status, &resultCL);
|
||||
free(kernel_bin);
|
||||
|
||||
if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL))
|
||||
throw(string("InitCL()::Error: Loading Binary into cl_program. "
|
||||
"(clCreateProgramWithBinary)"));
|
||||
|
||||
// insert debug information
|
||||
// std::string options= "-cl-nv-verbose"; //Doesn't work on AMD machines
|
||||
// options += " -cl-nv-opt-level=3";
|
||||
resultCL = clBuildProgram(oclHandles.program, deviceListSize,
|
||||
oclHandles.devices, NULL, NULL, NULL);
|
||||
if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL)) {
|
||||
cerr << "InitCL()::Error: In clBuildProgram" << endl;
|
||||
|
||||
size_t length;
|
||||
resultCL = clGetProgramBuildInfo(oclHandles.program,
|
||||
oclHandles.devices[DEVICE_ID_INUSED],
|
||||
CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
|
||||
if (resultCL != CL_SUCCESS)
|
||||
throw(string("InitCL()::Error: Getting Program build "
|
||||
"info(clGetProgramBuildInfo)"));
|
||||
|
||||
char *buffer = (char *)malloc(length);
|
||||
resultCL = clGetProgramBuildInfo(
|
||||
oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED],
|
||||
CL_PROGRAM_BUILD_LOG, length, buffer, NULL);
|
||||
if (resultCL != CL_SUCCESS)
|
||||
throw(string("InitCL()::Error: Getting Program build "
|
||||
"info(clGetProgramBuildInfo)"));
|
||||
|
||||
cerr << buffer << endl;
|
||||
free(buffer);
|
||||
|
||||
throw(string("InitCL()::Error: Building Program (clBuildProgram)"));
|
||||
}
|
||||
|
||||
// get program information in intermediate representation
|
||||
#ifdef PTX_MSG
|
||||
size_t binary_sizes[deviceListSize];
|
||||
char *binaries[deviceListSize];
|
||||
// figure out number of devices and the sizes of the binary for each device.
|
||||
oclHandles.cl_status =
|
||||
clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARY_SIZES,
|
||||
sizeof(size_t) * deviceListSize, &binary_sizes, NULL);
|
||||
if (oclHandles.cl_status != CL_SUCCESS) {
|
||||
throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-2"));
|
||||
}
|
||||
|
||||
std::cout << "--cambine:" << binary_sizes << std::endl;
|
||||
// copy over all of the generated binaries.
|
||||
for (int i = 0; i < deviceListSize; i++)
|
||||
binaries[i] = (char *)malloc(sizeof(char) * (binary_sizes[i] + 1));
|
||||
oclHandles.cl_status =
|
||||
clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARIES,
|
||||
sizeof(char *) * deviceListSize, binaries, NULL);
|
||||
if (oclHandles.cl_status != CL_SUCCESS) {
|
||||
throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-3"));
|
||||
}
|
||||
for (int i = 0; i < deviceListSize; i++)
|
||||
binaries[i][binary_sizes[i]] = '\0';
|
||||
std::cout << "--cambine:writing ptd information..." << std::endl;
|
||||
FILE *ptx_file = fopen("cl.ptx", "w");
|
||||
if (ptx_file == NULL) {
|
||||
throw(string("exceptions in allocate ptx file."));
|
||||
}
|
||||
fprintf(ptx_file, "%s", binaries[DEVICE_ID_INUSED]);
|
||||
fclose(ptx_file);
|
||||
std::cout << "--cambine:writing ptd information done." << std::endl;
|
||||
for (int i = 0; i < deviceListSize; i++)
|
||||
free(binaries[i]);
|
||||
#endif
|
||||
|
||||
for (int nKernel = 0; nKernel < total_kernels; nKernel++) {
|
||||
/* get a kernel object handle for a kernel with the given name */
|
||||
cl_kernel kernel = clCreateKernel(
|
||||
oclHandles.program, (kernel_names[nKernel]).c_str(), &resultCL);
|
||||
|
||||
if ((resultCL != CL_SUCCESS) || (kernel == NULL)) {
|
||||
string errorMsg = "InitCL()::Error: Creating Kernel (clCreateKernel) \"" +
|
||||
kernel_names[nKernel] + "\"";
|
||||
throw(errorMsg);
|
||||
}
|
||||
|
||||
oclHandles.kernel.push_back(kernel);
|
||||
}
|
||||
// get resource alocation information
|
||||
#ifdef RES_MSG
|
||||
char *build_log;
|
||||
size_t ret_val_size;
|
||||
oclHandles.cl_status = clGetProgramBuildInfo(
|
||||
oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED],
|
||||
CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
|
||||
if (oclHandles.cl_status != CL_SUCCESS) {
|
||||
throw(string("exceptions in _InitCL -> getting resource information"));
|
||||
}
|
||||
|
||||
build_log = (char *)malloc(ret_val_size + 1);
|
||||
oclHandles.cl_status = clGetProgramBuildInfo(
|
||||
oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED],
|
||||
CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
|
||||
if (oclHandles.cl_status != CL_SUCCESS) {
|
||||
throw(string(
|
||||
"exceptions in _InitCL -> getting resources allocation information-2"));
|
||||
}
|
||||
build_log[ret_val_size] = '\0';
|
||||
std::cout << "--cambine:" << build_log << std::endl;
|
||||
free(build_log);
|
||||
#endif
|
||||
}
|
||||
|
||||
//---------------------------------------
|
||||
// release CL objects
|
||||
void _clRelease() {
|
||||
char errorFlag = false;
|
||||
|
||||
for (int nKernel = 0; nKernel < oclHandles.kernel.size(); nKernel++) {
|
||||
if (oclHandles.kernel[nKernel] != NULL) {
|
||||
cl_int resultCL = clReleaseKernel(oclHandles.kernel[nKernel]);
|
||||
if (resultCL != CL_SUCCESS) {
|
||||
cerr << "ReleaseCL()::Error: In clReleaseKernel" << endl;
|
||||
errorFlag = true;
|
||||
}
|
||||
oclHandles.kernel[nKernel] = NULL;
|
||||
}
|
||||
oclHandles.kernel.clear();
|
||||
}
|
||||
|
||||
if (oclHandles.program != NULL) {
|
||||
cl_int resultCL = clReleaseProgram(oclHandles.program);
|
||||
if (resultCL != CL_SUCCESS) {
|
||||
cerr << "ReleaseCL()::Error: In clReleaseProgram" << endl;
|
||||
errorFlag = true;
|
||||
}
|
||||
oclHandles.program = NULL;
|
||||
}
|
||||
|
||||
if (oclHandles.queue != NULL) {
|
||||
cl_int resultCL = clReleaseCommandQueue(oclHandles.queue);
|
||||
if (resultCL != CL_SUCCESS) {
|
||||
cerr << "ReleaseCL()::Error: In clReleaseCommandQueue" << endl;
|
||||
errorFlag = true;
|
||||
}
|
||||
oclHandles.queue = NULL;
|
||||
}
|
||||
|
||||
free(oclHandles.devices);
|
||||
|
||||
if (oclHandles.context != NULL) {
|
||||
cl_int resultCL = clReleaseContext(oclHandles.context);
|
||||
if (resultCL != CL_SUCCESS) {
|
||||
cerr << "ReleaseCL()::Error: In clReleaseContext" << endl;
|
||||
errorFlag = true;
|
||||
}
|
||||
oclHandles.context = NULL;
|
||||
}
|
||||
|
||||
if (errorFlag)
|
||||
throw(string("ReleaseCL()::Error encountered."));
|
||||
}
|
||||
//--------------------------------------------------------
|
||||
//--cambine:create buffer and then copy data from host to device
|
||||
cl_mem _clCreateAndCpyMem(int size, void *h_mem_source) throw(string) {
|
||||
cl_mem d_mem;
|
||||
d_mem = clCreateBuffer(oclHandles.context,
|
||||
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size,
|
||||
h_mem_source, &oclHandles.cl_status);
|
||||
#ifdef ERRMSG
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(string("excpetion in _clCreateAndCpyMem()"));
|
||||
#endif
|
||||
return d_mem;
|
||||
}
|
||||
//-------------------------------------------------------
|
||||
//--cambine: create read only buffer for devices
|
||||
//--date: 17/01/2011
|
||||
cl_mem _clMallocRW(int size, void *h_mem_ptr) throw(string) {
|
||||
cl_mem d_mem;
|
||||
d_mem = clCreateBuffer(oclHandles.context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size,
|
||||
h_mem_ptr, &oclHandles.cl_status);
|
||||
#ifdef ERRMSG
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(string("excpetion in _clMallocRW"));
|
||||
#endif
|
||||
return d_mem;
|
||||
}
|
||||
//-------------------------------------------------------
|
||||
//--cambine: create read and write buffer for devices
|
||||
//--date: 17/01/2011
|
||||
cl_mem _clMalloc(int size, void *h_mem_ptr) throw(string) {
|
||||
cl_mem d_mem;
|
||||
d_mem = clCreateBuffer(oclHandles.context,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, size,
|
||||
h_mem_ptr, &oclHandles.cl_status);
|
||||
#ifdef ERRMSG
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(string("excpetion in _clMalloc"));
|
||||
#endif
|
||||
return d_mem;
|
||||
}
|
||||
|
||||
//-------------------------------------------------------
|
||||
//--cambine: transfer data from host to device
|
||||
//--date: 17/01/2011
|
||||
void _clMemcpyH2D(cl_mem d_mem, int size, const void *h_mem_ptr) throw(string) {
|
||||
oclHandles.cl_status = clEnqueueWriteBuffer(
|
||||
oclHandles.queue, d_mem, CL_TRUE, 0, size, h_mem_ptr, 0, NULL, NULL);
|
||||
#ifdef ERRMSG
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(string("excpetion in _clMemcpyH2D"));
|
||||
#endif
|
||||
}
|
||||
//--------------------------------------------------------
|
||||
//--cambine:create buffer and then copy data from host to device with pinned
|
||||
// memory
|
||||
cl_mem _clCreateAndCpyPinnedMem(int size, float *h_mem_source) throw(string) {
|
||||
cl_mem d_mem, d_mem_pinned;
|
||||
float *h_mem_pinned = NULL;
|
||||
d_mem_pinned = clCreateBuffer(oclHandles.context,
|
||||
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, size,
|
||||
NULL, &oclHandles.cl_status);
|
||||
#ifdef ERRMSG
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(string("excpetion in _clCreateAndCpyMem()->d_mem_pinned"));
|
||||
#endif
|
||||
//------------
|
||||
d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_ONLY, size, NULL,
|
||||
&oclHandles.cl_status);
|
||||
#ifdef ERRMSG
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(string("excpetion in _clCreateAndCpyMem() -> d_mem "));
|
||||
#endif
|
||||
//----------
|
||||
h_mem_pinned = (cl_float *)clEnqueueMapBuffer(
|
||||
oclHandles.queue, d_mem_pinned, CL_TRUE, CL_MAP_WRITE, 0, size, 0, NULL,
|
||||
NULL, &oclHandles.cl_status);
|
||||
#ifdef ERRMSG
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(string("excpetion in _clCreateAndCpyMem() -> clEnqueueMapBuffer"));
|
||||
#endif
|
||||
int element_number = size / sizeof(float);
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < element_number; i++) {
|
||||
h_mem_pinned[i] = h_mem_source[i];
|
||||
}
|
||||
//----------
|
||||
oclHandles.cl_status = clEnqueueWriteBuffer(
|
||||
oclHandles.queue, d_mem, CL_TRUE, 0, size, h_mem_pinned, 0, NULL, NULL);
|
||||
#ifdef ERRMSG
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(string("excpetion in _clCreateAndCpyMem() -> clEnqueueWriteBuffer"));
|
||||
#endif
|
||||
|
||||
return d_mem;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------
|
||||
//--cambine:create write only buffer on device
|
||||
cl_mem _clMallocWO(int size) throw(string) {
|
||||
cl_mem d_mem;
|
||||
d_mem = clCreateBuffer(oclHandles.context, CL_MEM_WRITE_ONLY, size, 0,
|
||||
&oclHandles.cl_status);
|
||||
#ifdef ERRMSG
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(string("excpetion in _clCreateMem()"));
|
||||
#endif
|
||||
return d_mem;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------
|
||||
// transfer data from device to host
|
||||
void _clMemcpyD2H(cl_mem d_mem, int size, void *h_mem) throw(string) {
|
||||
oclHandles.cl_status = clEnqueueReadBuffer(oclHandles.queue, d_mem, CL_TRUE,
|
||||
0, size, h_mem, 0, 0, 0);
|
||||
#ifdef ERRMSG
|
||||
oclHandles.error_str = "excpetion in _clCpyMemD2H -> ";
|
||||
switch (oclHandles.cl_status) {
|
||||
case CL_INVALID_COMMAND_QUEUE:
|
||||
oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
|
||||
break;
|
||||
case CL_INVALID_CONTEXT:
|
||||
oclHandles.error_str += "CL_INVALID_CONTEXT";
|
||||
break;
|
||||
case CL_INVALID_MEM_OBJECT:
|
||||
oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
|
||||
break;
|
||||
case CL_INVALID_VALUE:
|
||||
oclHandles.error_str += "CL_INVALID_VALUE";
|
||||
break;
|
||||
case CL_INVALID_EVENT_WAIT_LIST:
|
||||
oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
|
||||
break;
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
|
||||
oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||
break;
|
||||
case CL_OUT_OF_HOST_MEMORY:
|
||||
oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
|
||||
break;
|
||||
default:
|
||||
oclHandles.error_str += "Unknown reason";
|
||||
break;
|
||||
}
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(oclHandles.error_str);
|
||||
#endif
|
||||
}
|
||||
|
||||
//--------------------------------------------------------
|
||||
// set kernel arguments
|
||||
void _clSetArgs(int kernel_id, int arg_idx, void *d_mem,
|
||||
int size = 0) throw(string) {
|
||||
if (!size) {
|
||||
oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx,
|
||||
sizeof(d_mem), &d_mem);
|
||||
#ifdef ERRMSG
|
||||
oclHandles.error_str = "excpetion in _clSetKernelArg() ";
|
||||
switch (oclHandles.cl_status) {
|
||||
case CL_INVALID_KERNEL:
|
||||
oclHandles.error_str += "CL_INVALID_KERNEL";
|
||||
break;
|
||||
case CL_INVALID_ARG_INDEX:
|
||||
oclHandles.error_str += "CL_INVALID_ARG_INDEX";
|
||||
break;
|
||||
case CL_INVALID_ARG_VALUE:
|
||||
oclHandles.error_str += "CL_INVALID_ARG_VALUE";
|
||||
break;
|
||||
case CL_INVALID_MEM_OBJECT:
|
||||
oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
|
||||
break;
|
||||
case CL_INVALID_SAMPLER:
|
||||
oclHandles.error_str += "CL_INVALID_SAMPLER";
|
||||
break;
|
||||
case CL_INVALID_ARG_SIZE:
|
||||
oclHandles.error_str += "CL_INVALID_ARG_SIZE";
|
||||
break;
|
||||
case CL_OUT_OF_RESOURCES:
|
||||
oclHandles.error_str += "CL_OUT_OF_RESOURCES";
|
||||
break;
|
||||
case CL_OUT_OF_HOST_MEMORY:
|
||||
oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
|
||||
break;
|
||||
default:
|
||||
oclHandles.error_str += "Unknown reason";
|
||||
break;
|
||||
}
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(oclHandles.error_str);
|
||||
#endif
|
||||
} else {
|
||||
oclHandles.cl_status =
|
||||
clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, size, d_mem);
|
||||
#ifdef ERRMSG
|
||||
oclHandles.error_str = "excpetion in _clSetKernelArg() ";
|
||||
switch (oclHandles.cl_status) {
|
||||
case CL_INVALID_KERNEL:
|
||||
oclHandles.error_str += "CL_INVALID_KERNEL";
|
||||
break;
|
||||
case CL_INVALID_ARG_INDEX:
|
||||
oclHandles.error_str += "CL_INVALID_ARG_INDEX";
|
||||
break;
|
||||
case CL_INVALID_ARG_VALUE:
|
||||
oclHandles.error_str += "CL_INVALID_ARG_VALUE";
|
||||
break;
|
||||
case CL_INVALID_MEM_OBJECT:
|
||||
oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
|
||||
break;
|
||||
case CL_INVALID_SAMPLER:
|
||||
oclHandles.error_str += "CL_INVALID_SAMPLER";
|
||||
break;
|
||||
case CL_INVALID_ARG_SIZE:
|
||||
oclHandles.error_str += "CL_INVALID_ARG_SIZE";
|
||||
break;
|
||||
case CL_OUT_OF_RESOURCES:
|
||||
oclHandles.error_str += "CL_OUT_OF_RESOURCES";
|
||||
break;
|
||||
case CL_OUT_OF_HOST_MEMORY:
|
||||
oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
|
||||
break;
|
||||
default:
|
||||
oclHandles.error_str += "Unknown reason";
|
||||
break;
|
||||
}
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(oclHandles.error_str);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
void _clFinish() throw(string) {
|
||||
oclHandles.cl_status = clFinish(oclHandles.queue);
|
||||
#ifdef ERRMSG
|
||||
oclHandles.error_str = "excpetion in _clFinish";
|
||||
switch (oclHandles.cl_status) {
|
||||
case CL_INVALID_COMMAND_QUEUE:
|
||||
oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
|
||||
break;
|
||||
case CL_OUT_OF_RESOURCES:
|
||||
oclHandles.error_str += "CL_OUT_OF_RESOURCES";
|
||||
break;
|
||||
case CL_OUT_OF_HOST_MEMORY:
|
||||
oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
|
||||
break;
|
||||
default:
|
||||
oclHandles.error_str += "Unknown reasons";
|
||||
break;
|
||||
}
|
||||
if (oclHandles.cl_status != CL_SUCCESS) {
|
||||
throw(oclHandles.error_str);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
//--------------------------------------------------------
|
||||
//--cambine:enqueue kernel
|
||||
void _clInvokeKernel(int kernel_id, int work_items,
|
||||
int work_group_size) throw(string) {
|
||||
cl_uint work_dim = WORK_DIM;
|
||||
cl_event e[1];
|
||||
if (work_items % work_group_size != 0) // process situations that work_items
|
||||
// cannot be divided by work_group_size
|
||||
work_items =
|
||||
work_items + (work_group_size - (work_items % work_group_size));
|
||||
size_t local_work_size[] = {work_group_size, 1};
|
||||
size_t global_work_size[] = {work_items, 1};
|
||||
oclHandles.cl_status = clEnqueueNDRangeKernel(
|
||||
oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0,
|
||||
global_work_size, local_work_size, 0, 0, &(e[0]));
|
||||
#ifdef ERRMSG
|
||||
oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
|
||||
switch (oclHandles.cl_status) {
|
||||
case CL_INVALID_PROGRAM_EXECUTABLE:
|
||||
oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE";
|
||||
break;
|
||||
case CL_INVALID_COMMAND_QUEUE:
|
||||
oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
|
||||
break;
|
||||
case CL_INVALID_KERNEL:
|
||||
oclHandles.error_str += "CL_INVALID_KERNEL";
|
||||
break;
|
||||
case CL_INVALID_CONTEXT:
|
||||
oclHandles.error_str += "CL_INVALID_CONTEXT";
|
||||
break;
|
||||
case CL_INVALID_KERNEL_ARGS:
|
||||
oclHandles.error_str += "CL_INVALID_KERNEL_ARGS";
|
||||
break;
|
||||
case CL_INVALID_WORK_DIMENSION:
|
||||
oclHandles.error_str += "CL_INVALID_WORK_DIMENSION";
|
||||
break;
|
||||
case CL_INVALID_GLOBAL_WORK_SIZE:
|
||||
oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE";
|
||||
break;
|
||||
case CL_INVALID_WORK_GROUP_SIZE:
|
||||
oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE";
|
||||
break;
|
||||
case CL_INVALID_WORK_ITEM_SIZE:
|
||||
oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE";
|
||||
break;
|
||||
case CL_INVALID_GLOBAL_OFFSET:
|
||||
oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET";
|
||||
break;
|
||||
case CL_OUT_OF_RESOURCES:
|
||||
oclHandles.error_str += "CL_OUT_OF_RESOURCES";
|
||||
break;
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
|
||||
oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||
break;
|
||||
case CL_INVALID_EVENT_WAIT_LIST:
|
||||
oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
|
||||
break;
|
||||
case CL_OUT_OF_HOST_MEMORY:
|
||||
oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
|
||||
break;
|
||||
default:
|
||||
oclHandles.error_str += "Unkown reseason";
|
||||
break;
|
||||
}
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(oclHandles.error_str);
|
||||
#endif
|
||||
//_clFinish();
|
||||
// oclHandles.cl_status = clWaitForEvents(1, &e[0]);
|
||||
// #ifdef ERRMSG
|
||||
// if (oclHandles.cl_status!= CL_SUCCESS)
|
||||
// throw(string("excpetion in _clEnqueueNDRange() -> clWaitForEvents"));
|
||||
// #endif
|
||||
}
|
||||
void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x,
|
||||
int group_y) throw(string) {
|
||||
cl_uint work_dim = WORK_DIM;
|
||||
size_t local_work_size[] = {group_x, group_y};
|
||||
size_t global_work_size[] = {range_x, range_y};
|
||||
cl_event e[1];
|
||||
/*if(work_items%work_group_size != 0) //process situations that work_items
|
||||
cannot be divided by work_group_size
|
||||
work_items = work_items + (work_group_size-(work_items%work_group_size));*/
|
||||
oclHandles.cl_status = clEnqueueNDRangeKernel(
|
||||
oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0,
|
||||
global_work_size, local_work_size, 0, 0, &(e[0]));
|
||||
#ifdef ERRMSG
|
||||
oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
|
||||
switch (oclHandles.cl_status) {
|
||||
case CL_INVALID_PROGRAM_EXECUTABLE:
|
||||
oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE";
|
||||
break;
|
||||
case CL_INVALID_COMMAND_QUEUE:
|
||||
oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
|
||||
break;
|
||||
case CL_INVALID_KERNEL:
|
||||
oclHandles.error_str += "CL_INVALID_KERNEL";
|
||||
break;
|
||||
case CL_INVALID_CONTEXT:
|
||||
oclHandles.error_str += "CL_INVALID_CONTEXT";
|
||||
break;
|
||||
case CL_INVALID_KERNEL_ARGS:
|
||||
oclHandles.error_str += "CL_INVALID_KERNEL_ARGS";
|
||||
break;
|
||||
case CL_INVALID_WORK_DIMENSION:
|
||||
oclHandles.error_str += "CL_INVALID_WORK_DIMENSION";
|
||||
break;
|
||||
case CL_INVALID_GLOBAL_WORK_SIZE:
|
||||
oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE";
|
||||
break;
|
||||
case CL_INVALID_WORK_GROUP_SIZE:
|
||||
oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE";
|
||||
break;
|
||||
case CL_INVALID_WORK_ITEM_SIZE:
|
||||
oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE";
|
||||
break;
|
||||
case CL_INVALID_GLOBAL_OFFSET:
|
||||
oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET";
|
||||
break;
|
||||
case CL_OUT_OF_RESOURCES:
|
||||
oclHandles.error_str += "CL_OUT_OF_RESOURCES";
|
||||
break;
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
|
||||
oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||
break;
|
||||
case CL_INVALID_EVENT_WAIT_LIST:
|
||||
oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
|
||||
break;
|
||||
case CL_OUT_OF_HOST_MEMORY:
|
||||
oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
|
||||
break;
|
||||
default:
|
||||
oclHandles.error_str += "Unkown reseason";
|
||||
break;
|
||||
}
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(oclHandles.error_str);
|
||||
#endif
|
||||
//_clFinish();
|
||||
/*oclHandles.cl_status = clWaitForEvents(1, &e[0]);
|
||||
|
||||
#ifdef ERRMSG
|
||||
|
||||
if (oclHandles.cl_status!= CL_SUCCESS)
|
||||
|
||||
throw(string("excpetion in _clEnqueueNDRange() -> clWaitForEvents"));
|
||||
|
||||
#endif*/
|
||||
}
|
||||
|
||||
//--------------------------------------------------------
|
||||
// release OpenCL objects
|
||||
void _clFree(cl_mem ob) throw(string) {
|
||||
if (ob != NULL)
|
||||
oclHandles.cl_status = clReleaseMemObject(ob);
|
||||
#ifdef ERRMSG
|
||||
oclHandles.error_str = "excpetion in _clFree() ->";
|
||||
switch (oclHandles.cl_status) {
|
||||
case CL_INVALID_MEM_OBJECT:
|
||||
oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
|
||||
break;
|
||||
case CL_OUT_OF_RESOURCES:
|
||||
oclHandles.error_str += "CL_OUT_OF_RESOURCES";
|
||||
break;
|
||||
case CL_OUT_OF_HOST_MEMORY:
|
||||
oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
|
||||
break;
|
||||
default:
|
||||
oclHandles.error_str += "Unkown reseason";
|
||||
break;
|
||||
}
|
||||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(oclHandles.error_str);
|
||||
#endif
|
||||
}
|
||||
#endif //_CL_HELPER_
|
44
benchmarks/new_opencl/bfs/Makefile
Normal file
44
benchmarks/new_opencl/bfs/Makefile
Normal file
|
@ -0,0 +1,44 @@
|
|||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
|
||||
POCLRT_PATH ?= $(wildcard ..)
|
||||
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
|
||||
PROJECT = bfs
|
||||
|
||||
SRCS = main.cc
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o *.dump .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
0
benchmarks/new_opencl/bfs/README
Normal file
0
benchmarks/new_opencl/bfs/README
Normal file
28677
benchmarks/new_opencl/bfs/graph4096.txt
Executable file
28677
benchmarks/new_opencl/bfs/graph4096.txt
Executable file
File diff suppressed because it is too large
Load diff
53
benchmarks/new_opencl/bfs/kernel.cl
Executable file
53
benchmarks/new_opencl/bfs/kernel.cl
Executable file
|
@ -0,0 +1,53 @@
|
|||
/* ============================================================
|
||||
//--cambine: kernel funtion of Breadth-First-Search
|
||||
//--author: created by Jianbin Fang
|
||||
//--date: 06/12/2010
|
||||
============================================================ */
|
||||
|
||||
//#pragma OPENCL EXTENSION cl_khr_byte_addressable_store: enable
|
||||
|
||||
//Structure to hold a node information
|
||||
typedef struct{
|
||||
int starting;
|
||||
int no_of_edges;
|
||||
} Node;
|
||||
|
||||
//--7 parameters
|
||||
__kernel void BFS_1( const __global Node* g_graph_nodes,
|
||||
const __global int* g_graph_edges,
|
||||
__global char* g_graph_mask,
|
||||
__global char* g_updating_graph_mask,
|
||||
__global char* g_graph_visited,
|
||||
__global int* g_cost,
|
||||
const int no_of_nodes){
|
||||
int tid = get_global_id(0);
|
||||
if( tid<no_of_nodes && g_graph_mask[tid]){
|
||||
g_graph_mask[tid]=false;
|
||||
for(int i=g_graph_nodes[tid].starting; i<(g_graph_nodes[tid].no_of_edges + g_graph_nodes[tid].starting); i++){
|
||||
int id = g_graph_edges[i];
|
||||
if(!g_graph_visited[id]){
|
||||
g_cost[id]=g_cost[tid]+1;
|
||||
g_updating_graph_mask[id]=true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//--5 parameters
|
||||
__kernel void BFS_2(__global char* g_graph_mask,
|
||||
__global char* g_updating_graph_mask,
|
||||
__global char* g_graph_visited,
|
||||
__global char* g_over,
|
||||
const int no_of_nodes
|
||||
) {
|
||||
int tid = get_global_id(0);
|
||||
if( tid<no_of_nodes && g_updating_graph_mask[tid]){
|
||||
|
||||
g_graph_mask[tid]=true;
|
||||
g_graph_visited[tid]=true;
|
||||
*g_over=true;
|
||||
g_updating_graph_mask[tid]=false;
|
||||
}
|
||||
}
|
||||
|
||||
|
BIN
benchmarks/new_opencl/bfs/kernel.pocl
Normal file
BIN
benchmarks/new_opencl/bfs/kernel.pocl
Normal file
Binary file not shown.
297
benchmarks/new_opencl/bfs/main.cc
Executable file
297
benchmarks/new_opencl/bfs/main.cc
Executable file
|
@ -0,0 +1,297 @@
|
|||
//--by Jianbin Fang
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#ifdef PROFILING
|
||||
#include "timer.h"
|
||||
#endif
|
||||
|
||||
#include "CLHelper.h"
|
||||
#include "util.h"
|
||||
|
||||
#define MAX_THREADS_PER_BLOCK 256
|
||||
|
||||
// Structure to hold a node information
|
||||
struct Node {
|
||||
int starting;
|
||||
int no_of_edges;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------
|
||||
//--bfs on cpu
|
||||
//--programmer: jianbin
|
||||
//--date: 26/01/2011
|
||||
//--note: width is changed to the new_width
|
||||
//----------------------------------------------------------
|
||||
void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
|
||||
int *h_graph_edges, char *h_graph_mask,
|
||||
char *h_updating_graph_mask, char *h_graph_visited,
|
||||
int *h_cost_ref) {
|
||||
char stop;
|
||||
int k = 0;
|
||||
do {
|
||||
// if no thread changes this value then the loop stops
|
||||
stop = false;
|
||||
for (int tid = 0; tid < no_of_nodes; tid++) {
|
||||
if (h_graph_mask[tid] == true) {
|
||||
h_graph_mask[tid] = false;
|
||||
for (int i = h_graph_nodes[tid].starting;
|
||||
i < (h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting);
|
||||
i++) {
|
||||
int id =
|
||||
h_graph_edges[i]; //--cambine: node id is connected with node tid
|
||||
if (!h_graph_visited[id]) { //--cambine: if node id has not been
|
||||
//visited, enter the body below
|
||||
h_cost_ref[id] = h_cost_ref[tid] + 1;
|
||||
h_updating_graph_mask[id] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int tid = 0; tid < no_of_nodes; tid++) {
|
||||
if (h_updating_graph_mask[tid] == true) {
|
||||
h_graph_mask[tid] = true;
|
||||
h_graph_visited[tid] = true;
|
||||
stop = true;
|
||||
h_updating_graph_mask[tid] = false;
|
||||
}
|
||||
}
|
||||
k++;
|
||||
} while (stop);
|
||||
}
|
||||
//----------------------------------------------------------
|
||||
//--breadth first search on GPUs
|
||||
//----------------------------------------------------------
|
||||
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
|
||||
int *h_graph_edges, char *h_graph_mask,
|
||||
char *h_updating_graph_mask, char *h_graph_visited,
|
||||
int *h_cost) throw(std::string) {
|
||||
|
||||
// int number_elements = height*width;
|
||||
char h_over;
|
||||
cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask,
|
||||
d_graph_visited, d_cost, d_over;
|
||||
try {
|
||||
//--1 transfer data from host to device
|
||||
_clInit();
|
||||
d_graph_nodes = _clMalloc(no_of_nodes * sizeof(Node), h_graph_nodes);
|
||||
d_graph_edges = _clMalloc(edge_list_size * sizeof(int), h_graph_edges);
|
||||
d_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_graph_mask);
|
||||
d_updating_graph_mask =
|
||||
_clMallocRW(no_of_nodes * sizeof(char), h_updating_graph_mask);
|
||||
d_graph_visited = _clMallocRW(no_of_nodes * sizeof(char), h_graph_visited);
|
||||
|
||||
d_cost = _clMallocRW(no_of_nodes * sizeof(int), h_cost);
|
||||
d_over = _clMallocRW(sizeof(char), &h_over);
|
||||
|
||||
_clMemcpyH2D(d_graph_nodes, no_of_nodes * sizeof(Node), h_graph_nodes);
|
||||
_clMemcpyH2D(d_graph_edges, edge_list_size * sizeof(int), h_graph_edges);
|
||||
_clMemcpyH2D(d_graph_mask, no_of_nodes * sizeof(char), h_graph_mask);
|
||||
_clMemcpyH2D(d_updating_graph_mask, no_of_nodes * sizeof(char),
|
||||
h_updating_graph_mask);
|
||||
_clMemcpyH2D(d_graph_visited, no_of_nodes * sizeof(char), h_graph_visited);
|
||||
_clMemcpyH2D(d_cost, no_of_nodes * sizeof(int), h_cost);
|
||||
|
||||
//--2 invoke kernel
|
||||
#ifdef PROFILING
|
||||
timer kernel_timer;
|
||||
double kernel_time = 0.0;
|
||||
kernel_timer.reset();
|
||||
kernel_timer.start();
|
||||
#endif
|
||||
do {
|
||||
h_over = false;
|
||||
_clMemcpyH2D(d_over, sizeof(char), &h_over);
|
||||
//--kernel 0
|
||||
int kernel_id = 0;
|
||||
int kernel_idx = 0;
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_cost);
|
||||
_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
|
||||
|
||||
// int work_items = no_of_nodes;
|
||||
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
|
||||
|
||||
//--kernel 1
|
||||
kernel_id = 1;
|
||||
kernel_idx = 0;
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_over);
|
||||
_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
|
||||
|
||||
// work_items = no_of_nodes;
|
||||
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
|
||||
|
||||
_clMemcpyD2H(d_over, sizeof(char), &h_over);
|
||||
} while (h_over);
|
||||
|
||||
_clFinish();
|
||||
#ifdef PROFILING
|
||||
kernel_timer.stop();
|
||||
kernel_time = kernel_timer.getTimeInSeconds();
|
||||
#endif
|
||||
//--3 transfer data from device to host
|
||||
_clMemcpyD2H(d_cost, no_of_nodes * sizeof(int), h_cost);
|
||||
//--statistics
|
||||
#ifdef PROFILING
|
||||
std::cout << "kernel time(s):" << kernel_time << std::endl;
|
||||
#endif
|
||||
//--4 release cl resources.
|
||||
_clFree(d_graph_nodes);
|
||||
_clFree(d_graph_edges);
|
||||
_clFree(d_graph_mask);
|
||||
_clFree(d_updating_graph_mask);
|
||||
_clFree(d_graph_visited);
|
||||
_clFree(d_cost);
|
||||
_clFree(d_over);
|
||||
_clRelease();
|
||||
} catch (std::string msg) {
|
||||
_clFree(d_graph_nodes);
|
||||
_clFree(d_graph_edges);
|
||||
_clFree(d_graph_mask);
|
||||
_clFree(d_updating_graph_mask);
|
||||
_clFree(d_graph_visited);
|
||||
_clFree(d_cost);
|
||||
_clFree(d_over);
|
||||
_clRelease();
|
||||
std::string e_str = "in run_transpose_gpu -> ";
|
||||
e_str += msg;
|
||||
throw(e_str);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------
|
||||
//--cambine: main function
|
||||
//--author: created by Jianbin Fang
|
||||
//--date: 25/01/2011
|
||||
//----------------------------------------------------------
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("enter demo main\n");
|
||||
|
||||
int no_of_nodes;
|
||||
int edge_list_size;
|
||||
FILE *fp;
|
||||
Node *h_graph_nodes;
|
||||
char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
|
||||
|
||||
try {
|
||||
char *input_f = "graph4096.txt";
|
||||
printf("Reading File\n");
|
||||
// Read in Graph from a file
|
||||
fp = fopen(input_f, "r");
|
||||
if (!fp) {
|
||||
printf("Error Reading graph file\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
printf("Reading File completed!\n");
|
||||
|
||||
int source = 0;
|
||||
|
||||
fscanf(fp, "%d", &no_of_nodes);
|
||||
|
||||
int num_of_blocks = 1;
|
||||
int num_of_threads_per_block = no_of_nodes;
|
||||
|
||||
// Make execution Parameters according to the number of nodes
|
||||
// Distribute threads across multiple Blocks if necessary
|
||||
if (no_of_nodes > MAX_THREADS_PER_BLOCK) {
|
||||
num_of_blocks = (int)ceil(no_of_nodes / (double)MAX_THREADS_PER_BLOCK);
|
||||
num_of_threads_per_block = MAX_THREADS_PER_BLOCK;
|
||||
}
|
||||
work_group_size = num_of_threads_per_block;
|
||||
// allocate host memory
|
||||
h_graph_nodes = (Node *)malloc(sizeof(Node) * no_of_nodes);
|
||||
h_graph_mask = (char *)malloc(sizeof(char) * no_of_nodes);
|
||||
h_updating_graph_mask = (char *)malloc(sizeof(char) * no_of_nodes);
|
||||
h_graph_visited = (char *)malloc(sizeof(char) * no_of_nodes);
|
||||
|
||||
int start, edgeno;
|
||||
// initalize the memory
|
||||
for (int i = 0; i < no_of_nodes; i++) {
|
||||
fscanf(fp, "%d %d", &start, &edgeno);
|
||||
h_graph_nodes[i].starting = start;
|
||||
h_graph_nodes[i].no_of_edges = edgeno;
|
||||
h_graph_mask[i] = false;
|
||||
h_updating_graph_mask[i] = false;
|
||||
h_graph_visited[i] = false;
|
||||
}
|
||||
// read the source node from the file
|
||||
fscanf(fp, "%d", &source);
|
||||
source = 0;
|
||||
// set the source node as true in the mask
|
||||
h_graph_mask[source] = true;
|
||||
h_graph_visited[source] = true;
|
||||
fscanf(fp, "%d", &edge_list_size);
|
||||
int id, cost;
|
||||
int *h_graph_edges = (int *)malloc(sizeof(int) * edge_list_size);
|
||||
for (int i = 0; i < edge_list_size; i++) {
|
||||
fscanf(fp, "%d", &id);
|
||||
fscanf(fp, "%d", &cost);
|
||||
h_graph_edges[i] = id;
|
||||
}
|
||||
|
||||
if (fp)
|
||||
fclose(fp);
|
||||
// allocate mem for the result on host side
|
||||
int *h_cost = (int *)malloc(sizeof(int) * no_of_nodes);
|
||||
int *h_cost_ref = (int *)malloc(sizeof(int) * no_of_nodes);
|
||||
for (int i = 0; i < no_of_nodes; i++) {
|
||||
h_cost[i] = -1;
|
||||
h_cost_ref[i] = -1;
|
||||
}
|
||||
h_cost[source] = 0;
|
||||
h_cost_ref[source] = 0;
|
||||
//---------------------------------------------------------
|
||||
//--gpu entry
|
||||
run_bfs_gpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges,
|
||||
h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);
|
||||
//---------------------------------------------------------
|
||||
//--cpu entry
|
||||
// initalize the memory again
|
||||
for (int i = 0; i < no_of_nodes; i++) {
|
||||
h_graph_mask[i] = false;
|
||||
h_updating_graph_mask[i] = false;
|
||||
h_graph_visited[i] = false;
|
||||
}
|
||||
// set the source node as true in the mask
|
||||
source = 0;
|
||||
h_graph_mask[source] = true;
|
||||
h_graph_visited[source] = true;
|
||||
run_bfs_cpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges,
|
||||
h_graph_mask, h_updating_graph_mask, h_graph_visited,
|
||||
h_cost_ref);
|
||||
//---------------------------------------------------------
|
||||
//--result varification
|
||||
compare_results<int>(h_cost_ref, h_cost, no_of_nodes);
|
||||
// release host memory
|
||||
free(h_graph_nodes);
|
||||
free(h_graph_mask);
|
||||
free(h_updating_graph_mask);
|
||||
free(h_graph_visited);
|
||||
|
||||
} catch (std::string msg) {
|
||||
std::cout << "--cambine: exception in main ->" << msg << std::endl;
|
||||
// release host memory
|
||||
free(h_graph_nodes);
|
||||
free(h_graph_mask);
|
||||
free(h_updating_graph_mask);
|
||||
free(h_graph_visited);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
1
benchmarks/new_opencl/bfs/run
Executable file
1
benchmarks/new_opencl/bfs/run
Executable file
|
@ -0,0 +1 @@
|
|||
./bfs ../../data/bfs/graph1MW_6.txt
|
78
benchmarks/new_opencl/bfs/timer.cc
Executable file
78
benchmarks/new_opencl/bfs/timer.cc
Executable file
|
@ -0,0 +1,78 @@
|
|||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
|
||||
#include "timer.h"
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
double timer::CPU_speed_in_MHz = timer::get_CPU_speed_in_MHz();
|
||||
|
||||
|
||||
double timer::get_CPU_speed_in_MHz()
|
||||
{
|
||||
#if defined __linux__
|
||||
ifstream infile("/proc/cpuinfo");
|
||||
char buffer[256], *colon;
|
||||
|
||||
while (infile.good()) {
|
||||
infile.getline(buffer, 256);
|
||||
|
||||
if (strncmp("cpu MHz", buffer, 7) == 0 && (colon = strchr(buffer, ':')) != 0)
|
||||
return atof(colon + 2);
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
|
||||
void timer::print_time(ostream &str, const char *which, double time) const
|
||||
{
|
||||
static const char *units[] = { " ns", " us", " ms", " s", " ks", 0 };
|
||||
const char **unit = units;
|
||||
|
||||
time = 1000.0 * time / CPU_speed_in_MHz;
|
||||
|
||||
while (time >= 999.5 && unit[1] != 0) {
|
||||
time /= 1000.0;
|
||||
++ unit;
|
||||
}
|
||||
|
||||
str << which << " = " << setprecision(3) << setw(4) << time << *unit;
|
||||
}
|
||||
|
||||
|
||||
ostream &timer::print(ostream &str)
|
||||
{
|
||||
str << left << setw(25) << (name != 0 ? name : "timer") << ": " << right;
|
||||
|
||||
if (CPU_speed_in_MHz == 0)
|
||||
str << "could not determine CPU speed\n";
|
||||
else if (count > 0) {
|
||||
double total = static_cast<double>(total_time);
|
||||
|
||||
print_time(str, "avg", total / static_cast<double>(count));
|
||||
print_time(str, ", total", total);
|
||||
str << ", count = " << setw(9) << count << '\n';
|
||||
}
|
||||
else
|
||||
str << "not used\n";
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
|
||||
ostream &operator << (ostream &str, class timer &timer)
|
||||
{
|
||||
return timer.print(str);
|
||||
}
|
||||
|
||||
double timer::getTimeInSeconds()
|
||||
{
|
||||
double total = static_cast<double>(total_time);
|
||||
double res = (total / 1000000.0) / CPU_speed_in_MHz;
|
||||
return res;
|
||||
}
|
101
benchmarks/new_opencl/bfs/timer.h
Executable file
101
benchmarks/new_opencl/bfs/timer.h
Executable file
|
@ -0,0 +1,101 @@
|
|||
#ifndef timer_h
|
||||
#define timer_h
|
||||
|
||||
#include <iostream>
|
||||
|
||||
class timer {
|
||||
public:
|
||||
timer(const char *name = 0);
|
||||
timer(const char *name, std::ostream &write_on_exit);
|
||||
|
||||
~timer();
|
||||
|
||||
void start(), stop();
|
||||
void reset();
|
||||
std::ostream &print(std::ostream &);
|
||||
|
||||
double getTimeInSeconds();
|
||||
|
||||
private:
|
||||
void print_time(std::ostream &, const char *which, double time) const;
|
||||
|
||||
union {
|
||||
long long total_time;
|
||||
struct {
|
||||
#if defined __PPC__
|
||||
int high, low;
|
||||
#else
|
||||
int low, high;
|
||||
#endif
|
||||
};
|
||||
};
|
||||
|
||||
unsigned long long count;
|
||||
const char *const name;
|
||||
std::ostream *const write_on_exit;
|
||||
|
||||
static double CPU_speed_in_MHz, get_CPU_speed_in_MHz();
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &, class timer &);
|
||||
|
||||
inline void timer::reset() {
|
||||
total_time = 0;
|
||||
count = 0;
|
||||
}
|
||||
|
||||
inline timer::timer(const char *name) : name(name), write_on_exit(0) {
|
||||
reset();
|
||||
}
|
||||
|
||||
inline timer::timer(const char *name, std::ostream &write_on_exit)
|
||||
: name(name), write_on_exit(&write_on_exit) {
|
||||
reset();
|
||||
}
|
||||
|
||||
inline timer::~timer() {
|
||||
if (write_on_exit != 0)
|
||||
print(*write_on_exit);
|
||||
}
|
||||
|
||||
inline void timer::start() {
|
||||
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
|
||||
unsigned eax, edx;
|
||||
|
||||
asm volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||
|
||||
total_time -= ((unsigned long long)edx << 32) + eax;
|
||||
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && \
|
||||
(defined __i386 || defined __x86_64)
|
||||
asm volatile("rdtsc\n\t"
|
||||
"subl %%eax, %0\n\t"
|
||||
"sbbl %%edx, %1"
|
||||
: "+m"(low), "+m"(high)
|
||||
:
|
||||
: "eax", "edx");
|
||||
#else
|
||||
#error Compiler/Architecture not recognized
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void timer::stop() {
|
||||
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
|
||||
unsigned eax, edx;
|
||||
|
||||
asm volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||
|
||||
total_time += ((unsigned long long)edx << 32) + eax;
|
||||
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && \
|
||||
(defined __i386 || defined __x86_64)
|
||||
asm volatile("rdtsc\n\t"
|
||||
"addl %%eax, %0\n\t"
|
||||
"adcl %%edx, %1"
|
||||
: "+m"(low), "+m"(high)
|
||||
:
|
||||
: "eax", "edx");
|
||||
#endif
|
||||
|
||||
++count;
|
||||
}
|
||||
|
||||
#endif
|
72
benchmarks/new_opencl/bfs/util.h
Executable file
72
benchmarks/new_opencl/bfs/util.h
Executable file
|
@ -0,0 +1,72 @@
|
|||
#ifndef _C_UTIL_
|
||||
#define _C_UTIL_
|
||||
#include <math.h>
|
||||
#include <iostream>
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
//--initialize array with maximum limit
|
||||
//-------------------------------------------------------------------
|
||||
template<typename datatype>
|
||||
void fill(datatype *A, const int n, const datatype maxi){
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
A[j] = ((datatype) maxi * (rand() / (RAND_MAX + 1.0f)));
|
||||
}
|
||||
}
|
||||
|
||||
//--print matrix
|
||||
template<typename datatype>
|
||||
void print_matrix(datatype *A, int height, int width){
|
||||
for(int i=0; i<height; i++){
|
||||
for(int j=0; j<width; j++){
|
||||
int idx = i*width + j;
|
||||
std::cout<<A[idx]<<" ";
|
||||
}
|
||||
std::cout<<std::endl;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
//-------------------------------------------------------------------
|
||||
//--verify results
|
||||
//-------------------------------------------------------------------
|
||||
#define MAX_RELATIVE_ERROR .002
|
||||
template<typename datatype>
|
||||
void verify_array(const datatype *cpuResults, const datatype *gpuResults, const int size){
|
||||
|
||||
char passed = true;
|
||||
#pragma omp parallel for
|
||||
for (int i=0; i<size; i++){
|
||||
if (fabs(cpuResults[i] - gpuResults[i]) / cpuResults[i] > MAX_RELATIVE_ERROR){
|
||||
passed = false;
|
||||
}
|
||||
}
|
||||
if (passed){
|
||||
std::cout << "--cambine:passed:-)" << endl;
|
||||
}
|
||||
else{
|
||||
std::cout << "--cambine: failed:-(" << endl;
|
||||
}
|
||||
return ;
|
||||
}
|
||||
template<typename datatype>
|
||||
void compare_results(const datatype *cpu_results, const datatype *gpu_results, const int size){
|
||||
|
||||
char passed = true;
|
||||
//#pragma omp parallel for
|
||||
for (int i=0; i<size; i++){
|
||||
if (cpu_results[i]!=gpu_results[i]){
|
||||
passed = false;
|
||||
}
|
||||
}
|
||||
if (passed){
|
||||
std::cout << "--cambine:passed:-)" << endl;
|
||||
}
|
||||
else{
|
||||
std::cout << "--cambine: failed:-(" << endl;
|
||||
}
|
||||
return ;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
44
benchmarks/new_opencl/guassian/Makefile
Normal file
44
benchmarks/new_opencl/guassian/Makefile
Normal file
|
@ -0,0 +1,44 @@
|
|||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
|
||||
POCLRT_PATH ?= $(wildcard ..)
|
||||
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
|
||||
PROJECT = guassian
|
||||
|
||||
SRCS = main.cc clutils.cpp utils.cpp
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o *.dump .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
241
benchmarks/new_opencl/guassian/OriginalParallel.c
Executable file
241
benchmarks/new_opencl/guassian/OriginalParallel.c
Executable file
|
@ -0,0 +1,241 @@
|
|||
/*-----------------------------------------------------------
|
||||
** ge_p.c -- The program is to solve a linear system Ax = b
|
||||
** by using Gaussian Elimination. The algorithm on page 101
|
||||
** ("Foundations of Parallel Programming") is used.
|
||||
** The sequential version is ge_s.c. This parallel
|
||||
** implementation converts three independent for() loops
|
||||
** into three Fans. Use the data file ge_3.dat to verify
|
||||
** the correction of the output.
|
||||
**
|
||||
** Written by Andreas Kura, 02/15/95
|
||||
** Modified by Chong-wei Xu, /04/20/95
|
||||
**-----------------------------------------------------------
|
||||
*/
|
||||
#include <us.h>
|
||||
#include <stdio.h>
|
||||
|
||||
int Size, t;
|
||||
float **a, *b;
|
||||
BEGIN_SHARED_DECL
|
||||
float **m;
|
||||
END_SHARED_DECL;
|
||||
FILE *fp;
|
||||
|
||||
void InitProblemOnce();
|
||||
void InitPerRun();
|
||||
void ForwardSub();
|
||||
void Fan1();
|
||||
void Fan2();
|
||||
void Fan3();
|
||||
void InitMat();
|
||||
void InitAry();
|
||||
void PrintMat();
|
||||
void PrintAry();
|
||||
|
||||
main ()
|
||||
{
|
||||
InitializeUs();
|
||||
MakeSharedVariables; /* to make SHARED m */
|
||||
|
||||
InitProblemOnce();
|
||||
InitPerRun();
|
||||
ForwardSub();
|
||||
|
||||
printf("The result of matrix m is: \n");
|
||||
PrintMat(SHARED m, Size, Size);
|
||||
printf("The result of matrix a is: \n");
|
||||
PrintMat(a, Size, Size);
|
||||
printf("The result of array b is: \n");
|
||||
PrintAry(b, Size);
|
||||
}
|
||||
|
||||
/*------------------------------------------------------
|
||||
** InitProblemOnce -- Initialize all of matrices and
|
||||
** vectors by opening a data file specified by the user.
|
||||
**
|
||||
** We used dynamic array **a, *b, and **m to allocate
|
||||
** the memory storages.
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void InitProblemOnce()
|
||||
{
|
||||
char filename[30];
|
||||
|
||||
printf("Enter the data file name: ");
|
||||
scanf("%s", filename);
|
||||
printf("The file name is: %s\n", filename);
|
||||
|
||||
fp = fopen(filename, "r");
|
||||
|
||||
fscanf(fp, "%d", &Size);
|
||||
a = (float **) UsAllocScatterMatrix(Size, Size, sizeof(float));
|
||||
/*
|
||||
a = (float **) malloc(Size * sizeof(float *));
|
||||
for (i=0; i<Size; i++) {
|
||||
a[i] = (float *) malloc(Size * sizeof(float));
|
||||
}
|
||||
*/
|
||||
InitMat(a, Size, Size);
|
||||
printf("The input matrix a is:\n");
|
||||
PrintMat(a, Size, Size);
|
||||
|
||||
b = (float *) UsAlloc(Size * sizeof(float));
|
||||
/*
|
||||
b = (float *) malloc(Size * sizeof(float));
|
||||
*/
|
||||
InitAry(b, Size);
|
||||
printf("The input array b is:\n");
|
||||
PrintAry(b, Size);
|
||||
|
||||
SHARED m = (float **) UsAllocScatterMatrix(Size, Size, sizeof(float));
|
||||
/*
|
||||
m = (float **) malloc(Size * sizeof(float *));
|
||||
for (i=0; i<Size; i++) {
|
||||
m[i] = (float *) malloc(Size * sizeof(float));
|
||||
}
|
||||
*/
|
||||
|
||||
Share(&Size);
|
||||
Share(&a);
|
||||
Share(&b);
|
||||
}
|
||||
|
||||
/*------------------------------------------------------
|
||||
** InitPerRun() -- Initialize the contents of the
|
||||
** multipier matrix **m
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void InitPerRun()
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i=0; i<Size; i++)
|
||||
for (j=0; j<Size; j++)
|
||||
SHARED m[i][j] = 0.0;
|
||||
}
|
||||
|
||||
/*------------------------------------------------------
|
||||
** ForwardSub() -- Forward substitution of Gaussian
|
||||
** elimination.
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void ForwardSub()
|
||||
{
|
||||
for (t=0; t<(Size-1); t++) {
|
||||
Share(&t);
|
||||
GenOnI(Fan1, Size-1-t); /* t=0 to (Size-2), the range is
|
||||
** Size-2-t+1 = Size-1-t
|
||||
*/
|
||||
GenOnA(Fan2, Size-1-t, Size-t);
|
||||
GenOnI(Fan3, Size-1-t);
|
||||
}
|
||||
}
|
||||
|
||||
/*-------------------------------------------------------
|
||||
** Fan1() -- Calculate multiplier matrix
|
||||
** Pay attention to the index. Index i give the range
|
||||
** which starts from 0 to range-1. The real values of
|
||||
** the index should be adjust and related with the value
|
||||
** of t which is defined on the ForwardSub().
|
||||
**-------------------------------------------------------
|
||||
*/
|
||||
void Fan1(dummy, i)
|
||||
int dummy, i;
|
||||
{
|
||||
/* Use these printf() to display the nodes and index */
|
||||
printf("from node #%d\n", PhysProcToUsProc(Proc_Node));
|
||||
SHARED m[i+t+1][t] = a[i+t+1][t] / a[t][t];
|
||||
printf("i=%d, a[%d][%d]=%.2f, a[%d][%d]=%.2f, m[%d][%d]=%.2f\n",
|
||||
(i+t+1),t,t,a[t][t],(i+t+1),t,a[i+t+1][t],(i+t+1),t,
|
||||
SHARED m[i+t+1][t]);
|
||||
}
|
||||
|
||||
/*-------------------------------------------------------
|
||||
** Fan2() -- Modify the matrix A into LUD
|
||||
**-------------------------------------------------------
|
||||
*/
|
||||
void Fan2(dummy, i, j)
|
||||
int dummy, i, j;
|
||||
{
|
||||
a[i+1+t][j+t] -= SHARED m[i+1+t][t] * a[t][j+t];
|
||||
Share (&a);
|
||||
}
|
||||
|
||||
/*-------------------------------------------------------
|
||||
** Fan3() -- Modify the array b
|
||||
**-------------------------------------------------------
|
||||
*/
|
||||
void Fan3(dummy, i)
|
||||
int dummy, i;
|
||||
{
|
||||
b[i+1+t] -= SHARED m[i+1+t][t] * b[t];
|
||||
}
|
||||
|
||||
/*------------------------------------------------------
|
||||
** InitMat() -- Initialize the matrix by reading data
|
||||
** from the data file
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void InitMat(ary, nrow, ncol)
|
||||
float **ary;
|
||||
int nrow, ncol;
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i=0; i<nrow; i++) {
|
||||
for (j=0; j<ncol; j++) {
|
||||
fscanf(fp, "%f", &ary[i][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*------------------------------------------------------
|
||||
** PrintMat() -- Print the contents of the matrix
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void PrintMat(ary, nrow, ncol)
|
||||
float **ary;
|
||||
int nrow, ncol;
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i=0; i<nrow; i++) {
|
||||
for (j=0; j<ncol; j++) {
|
||||
printf("%8.2f ", ary[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/*------------------------------------------------------
|
||||
** InitAry() -- Initialize the array (vector) by reading
|
||||
** data from the data file
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void InitAry(ary, ary_size)
|
||||
float *ary;
|
||||
int ary_size;
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<ary_size; i++) {
|
||||
fscanf(fp, "%f", &ary[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/*------------------------------------------------------
|
||||
** PrintAry() -- Print the contents of the array (vector)
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void PrintAry(ary, ary_size)
|
||||
float *ary;
|
||||
int ary_size;
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<ary_size; i++) {
|
||||
printf("%.2f ", ary[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
60
benchmarks/new_opencl/guassian/README.txt
Executable file
60
benchmarks/new_opencl/guassian/README.txt
Executable file
|
@ -0,0 +1,60 @@
|
|||
The Gaussian Elimination application solves systems of equations using the
|
||||
gaussian elimination method.
|
||||
|
||||
The application analyzes an n x n matrix and an associated 1 x n vector to solve a
|
||||
set of equations with n variables and n unknowns. The matrix and vector describe equations
|
||||
of the form:
|
||||
|
||||
a0x + b0y + c0z + d0w = e0
|
||||
a1x + b1y + c1z + d1w = e1
|
||||
a2x + b2y + c2z + d2w = e2
|
||||
a3x + b3y + c3z + d3w = e3
|
||||
|
||||
where in this case n=4. The matrix for the above equations would be as follows:
|
||||
|
||||
[a0 b0 c0 d0]
|
||||
[a1 b1 c1 d1]
|
||||
[a2 b2 c2 d2]
|
||||
[a3 b3 c3 d3]
|
||||
|
||||
and the vector would be:
|
||||
|
||||
[e0]
|
||||
[e1]
|
||||
[e2]
|
||||
[e3]
|
||||
|
||||
The application creates a solution vector:
|
||||
|
||||
[x]
|
||||
[y]
|
||||
[z]
|
||||
[w]
|
||||
|
||||
|
||||
The Makefile may need to be adjusted for different machines, but it was written for Mac OS X and
|
||||
Linux with either NVIDIA or AMD OpenCL SDKs.
|
||||
|
||||
Additional input files can be created with the matrixGenerator.py file in the data folder.
|
||||
|
||||
Gaussian Elimination Usage
|
||||
|
||||
gaussianElimination [filename] [-hqt] [-p [int] -d [int]]
|
||||
|
||||
example:
|
||||
$ ./gaussianElimination matrix4.txt
|
||||
|
||||
filename the filename that holds the matrix data
|
||||
|
||||
-h, --help Display the help file
|
||||
-q Quiet mode. Suppress all text output.
|
||||
-t Print timing information.
|
||||
|
||||
-p [int] Choose the platform (must choose both platform and device)
|
||||
-d [int] Choose the device (must choose both platform and device)
|
||||
|
||||
|
||||
Notes: 1. The filename is required as the first parameter.
|
||||
2. If you declare either the device or the platform,
|
||||
you must declare both.
|
||||
|
1443
benchmarks/new_opencl/guassian/clutils.cpp
Executable file
1443
benchmarks/new_opencl/guassian/clutils.cpp
Executable file
File diff suppressed because it is too large
Load diff
281
benchmarks/new_opencl/guassian/clutils.h
Executable file
281
benchmarks/new_opencl/guassian/clutils.h
Executable file
|
@ -0,0 +1,281 @@
|
|||
/****************************************************************************\
|
||||
* Copyright (c) 2011, Advanced Micro Devices, Inc. *
|
||||
* All rights reserved. *
|
||||
* *
|
||||
* Redistribution and use in source and binary forms, with or without *
|
||||
* modification, are permitted provided that the following conditions *
|
||||
* are met: *
|
||||
* *
|
||||
* Redistributions of source code must retain the above copyright notice, *
|
||||
* this list of conditions and the following disclaimer. *
|
||||
* *
|
||||
* Redistributions in binary form must reproduce the above copyright notice, *
|
||||
* this list of conditions and the following disclaimer in the documentation *
|
||||
* and/or other materials provided with the distribution. *
|
||||
* *
|
||||
* Neither the name of the copyright holder nor the names of its contributors *
|
||||
* may be used to endorse or promote products derived from this software *
|
||||
* without specific prior written permission. *
|
||||
* *
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR *
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
|
||||
* *
|
||||
* If you use the software (in whole or in part), you shall adhere to all *
|
||||
* applicable U.S., European, and other export laws, including but not *
|
||||
* limited to the U.S. Export Administration Regulations (EAR), (15 C.F.R. *
|
||||
* Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 *
|
||||
* of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you *
|
||||
* hereby certify that, except pursuant to a license granted by the United *
|
||||
* States Department of Commerce Bureau of Industry and Security or as *
|
||||
* otherwise permitted pursuant to a License Exception under the U.S. Export *
|
||||
* Administration Regulations ("EAR"), you will not (1) export, re-export or *
|
||||
* release to a national of a country in Country Groups D:1, E:1 or E:2 any *
|
||||
* restricted technology, software, or source code you receive hereunder, *
|
||||
* or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
|
||||
* technology or software, if such foreign produced direct product is subject *
|
||||
* to national security controls as identified on the Commerce Control List *
|
||||
*(currently found in Supplement 1 to Part 774 of EAR). For the most current *
|
||||
* Country Group listings, or for additional information about the EAR or *
|
||||
* your obligations under those regulations, please refer to the U.S. Bureau *
|
||||
* of Industry and Securitys website at http://www.bis.doc.gov/. *
|
||||
\****************************************************************************/
|
||||
|
||||
#ifndef __CL_UTILS_H__
|
||||
#define __CL_UTILS_H__
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
// The cl_time type is OS specific
|
||||
#ifdef _WIN32
|
||||
#include <tchar.h>
|
||||
#include <Windows.h>
|
||||
typedef __int64 cl_time;
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
typedef double cl_time;
|
||||
#endif
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Initialization and Cleanup
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Detects platforms and devices, creates context and command queue
|
||||
cl_context cl_init(char devicePreference='\0');
|
||||
|
||||
// Creates a context given a platform and a device
|
||||
cl_context cl_init_context(int platform,int dev,int quiet=0);
|
||||
|
||||
// Releases resources used by clutils
|
||||
void cl_cleanup();
|
||||
|
||||
// Releases a kernel object
|
||||
void cl_freeKernel(cl_kernel kernel);
|
||||
|
||||
// Releases a memory object
|
||||
void cl_freeMem(cl_mem mem);
|
||||
|
||||
// Releases a program object
|
||||
void cl_freeProgram(cl_program program);
|
||||
|
||||
// Returns the global command queue
|
||||
cl_command_queue cl_getCommandQueue();
|
||||
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Synchronization functions
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Performs a clFinish on the command queue
|
||||
void cl_sync();
|
||||
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Memory allocation
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Allocates a regular buffer on the device
|
||||
cl_mem cl_allocBuffer(size_t mem_size,
|
||||
cl_mem_flags flags = CL_MEM_READ_WRITE);
|
||||
|
||||
// XXX I don't think this does exactly what we want it to do
|
||||
// Allocates a read-only buffer and transfers the data
|
||||
cl_mem cl_allocBufferConst(size_t mem_size, void* host_ptr);
|
||||
|
||||
// Allocates pinned memory on the host
|
||||
cl_mem cl_allocBufferPinned(size_t mem_size);
|
||||
|
||||
// Allocates an image on the device
|
||||
cl_mem cl_allocImage(size_t height, size_t width, char type,
|
||||
cl_mem_flags flags = CL_MEM_READ_WRITE);
|
||||
|
||||
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Data transfers
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Copies a buffer from the device to pinned memory on the host and
|
||||
// maps it so it can be read
|
||||
void* cl_copyAndMapBuffer(cl_mem dst, cl_mem src, size_t size);
|
||||
|
||||
// Copies from one buffer to another
|
||||
void cl_copyBufferToBuffer(cl_mem dst, cl_mem src, size_t size);
|
||||
|
||||
// Copies data to a buffer on the device
|
||||
void cl_copyBufferToDevice(cl_mem dst, void *src, size_t mem_size,
|
||||
cl_bool blocking = CL_TRUE);
|
||||
|
||||
// Copies data to an image on the device
|
||||
void cl_copyImageToDevice(cl_mem dst, void* src, size_t height, size_t width);
|
||||
|
||||
// Copies an image from the device to the host
|
||||
void cl_copyImageToHost(void* dst, cl_mem src, size_t height, size_t width);
|
||||
|
||||
// Copies data from a device buffer to the host
|
||||
void cl_copyBufferToHost(void *dst, cl_mem src, size_t mem_size,
|
||||
cl_bool blocking = CL_TRUE);
|
||||
|
||||
// Copies data from a buffer on the device to an image on the device
|
||||
void cl_copyBufferToImage(cl_mem src, cl_mem dst, int height, int width);
|
||||
|
||||
// Maps a buffer
|
||||
void* cl_mapBuffer(cl_mem mem, size_t mem_size, cl_mem_flags flags);
|
||||
|
||||
// Unmaps a buffer
|
||||
void cl_unmapBuffer(cl_mem mem, void *ptr);
|
||||
|
||||
// Writes data to a zero-copy buffer on the device
|
||||
void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size);
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Program and kernels
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Compiles a program
|
||||
cl_program cl_compileProgram(char* kernelPath, char* compileoptions,
|
||||
bool verboseoptions = 0);
|
||||
|
||||
// Creates a kernel
|
||||
cl_kernel cl_createKernel(cl_program program, const char* kernelName);
|
||||
|
||||
|
||||
// Sets a kernel argument
|
||||
void cl_setKernelArg(cl_kernel kernel, unsigned int index, size_t size,
|
||||
void* data);
|
||||
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Profiling/events
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Computes the execution time (start to end) for an event
|
||||
double cl_computeExecTime(cl_event);
|
||||
|
||||
// Compute the elapsed time between two CPU timer values
|
||||
double cl_computeTime(cl_time start, cl_time end);
|
||||
|
||||
// Creates an event from CPU timers
|
||||
void cl_createUserEvent(cl_time start, cl_time end, char* desc);
|
||||
|
||||
// Disable logging of events
|
||||
void cl_disableEvents();
|
||||
|
||||
// Enable logging of events
|
||||
void cl_enableEvents();
|
||||
|
||||
// Query the current system time
|
||||
void cl_getTime(cl_time* time);
|
||||
|
||||
// Calls a function which prints events to the terminal
|
||||
void cl_printEvents();
|
||||
|
||||
// Calls a function which writes the events to a file
|
||||
void cl_writeEventsToFile(char* path);
|
||||
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Error handling
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Compare a status value to CL_SUCCESS and optionally exit on error
|
||||
int cl_errChk(const cl_int status, const char *msg, bool exitOnErr);
|
||||
|
||||
// Queries the supported image formats for the device and prints
|
||||
// them to the screen
|
||||
void printSupportedImageFormats();
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Platform and device information
|
||||
//-------------------------------------------------------
|
||||
|
||||
bool cl_deviceIsAMD(cl_device_id dev=NULL);
|
||||
bool cl_deviceIsNVIDIA(cl_device_id dev=NULL);
|
||||
bool cl_platformIsNVIDIA(cl_platform_id plat=NULL);
|
||||
char* cl_getDeviceDriverVersion(cl_device_id dev=NULL);
|
||||
char* cl_getDeviceName(cl_device_id dev=NULL);
|
||||
char* cl_getDeviceVendor(cl_device_id dev=NULL);
|
||||
char* cl_getDeviceVersion(cl_device_id dev=NULL);
|
||||
char* cl_getPlatformName(cl_platform_id platform);
|
||||
char* cl_getPlatformVendor(cl_platform_id platform);
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Utility functions
|
||||
//-------------------------------------------------------
|
||||
|
||||
char* catStringWithInt(const char* str, int integer);
|
||||
|
||||
char* itoa_portable(int value, char* result, int base);
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Data types
|
||||
//-------------------------------------------------------
|
||||
typedef struct{
|
||||
int x;
|
||||
int y;
|
||||
} int2;
|
||||
|
||||
typedef struct{
|
||||
float x;
|
||||
float y;
|
||||
}float2;
|
||||
|
||||
typedef struct{
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
float w;
|
||||
}float4;
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Defines
|
||||
//-------------------------------------------------------
|
||||
|
||||
#define MAX_ERR_VAL 64
|
||||
|
||||
#define NUM_PROGRAMS 7
|
||||
|
||||
#define NUM_KERNELS 13
|
||||
#define KERNEL_INIT_DET 0
|
||||
#define KERNEL_BUILD_DET 1
|
||||
#define KERNEL_SURF_DESC 2
|
||||
#define KERNEL_NORM_DESC 3
|
||||
#define KERNEL_NON_MAX_SUP 4
|
||||
#define KERNEL_GET_ORIENT1 5
|
||||
#define KERNEL_GET_ORIENT2 6
|
||||
#define KERNEL_NN 7
|
||||
#define KERNEL_SCAN 8
|
||||
#define KERNEL_SCAN4 9
|
||||
#define KERNEL_TRANSPOSE 10
|
||||
#define KERNEL_SCANIMAGE 11
|
||||
#define KERNEL_TRANSPOSEIMAGE 12
|
||||
|
||||
#endif
|
40
benchmarks/new_opencl/guassian/gaussianElim.h
Executable file
40
benchmarks/new_opencl/guassian/gaussianElim.h
Executable file
|
@ -0,0 +1,40 @@
|
|||
#ifndef _GAUSSIANELIM
|
||||
#define _GAUSSIANELIM
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "clutils.h"
|
||||
|
||||
// All OpenCL headers
|
||||
#if defined (__APPLE__) || defined(MACOSX)
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
float *OpenClGaussianElimination(
|
||||
cl_context context,
|
||||
int timing);
|
||||
|
||||
void printUsage();
|
||||
int parseCommandline(int argc, char *argv[], char* filename,
|
||||
int *q, int *t, int *p, int *d);
|
||||
|
||||
void InitPerRun(int size,float *m);
|
||||
void ForwardSub(cl_context context, float *a, float *b, float *m, int size,int timing);
|
||||
void BackSub(float *a, float *b, float *finalVec, int size);
|
||||
void Fan1(float *m, float *a, int Size, int t);
|
||||
void Fan2(float *m, float *a, float *b,int Size, int j1, int t);
|
||||
//void Fan3(float *m, float *b, int Size, int t);
|
||||
void InitMat(FILE *fp, int size, float *ary, int nrow, int ncol);
|
||||
void InitAry(FILE *fp, float *ary, int ary_size);
|
||||
void PrintMat(float *ary, int size, int nrow, int ncolumn);
|
||||
void PrintAry(float *ary, int ary_size);
|
||||
float eventTime(cl_event event,cl_command_queue command_queue);
|
||||
#endif
|
74
benchmarks/new_opencl/guassian/gettimeofday.cpp
Executable file
74
benchmarks/new_opencl/guassian/gettimeofday.cpp
Executable file
|
@ -0,0 +1,74 @@
|
|||
#include "stdio.h"
|
||||
#include <time.h>
|
||||
#include <windows.h>
|
||||
#include <iostream>
|
||||
//using namespace System;
|
||||
using namespace std;
|
||||
|
||||
#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64
|
||||
#else
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
struct timezone
|
||||
{
|
||||
int tz_minuteswest; /* minutes W of Greenwich */
|
||||
int tz_dsttime; /* type of dst correction */
|
||||
};
|
||||
|
||||
|
||||
// Definition of a gettimeofday function
|
||||
int gettimeofday(struct timeval *tv, struct timezone *tz)
|
||||
{
|
||||
// Define a structure to receive the current Windows filetime
|
||||
FILETIME ft;
|
||||
|
||||
// Initialize the present time to 0 and the timezone to UTC
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag = 0;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
// The GetSystemTimeAsFileTime returns the number of 100 nanosecond
|
||||
// intervals since Jan 1, 1601 in a structure. Copy the high bits to
|
||||
// the 64 bit tmpres, shift it left by 32 then or in the low 32 bits.
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
// Convert to microseconds by dividing by 10
|
||||
tmpres /= 10;
|
||||
|
||||
// The Unix epoch starts on Jan 1 1970. Need to subtract the difference
|
||||
// in seconds from Jan 1 1601.
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
|
||||
// Finally change microseconds to seconds and place in the seconds value.
|
||||
// The modulus picks up the microseconds.
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
if (NULL != tz)
|
||||
{
|
||||
if (!tzflag)
|
||||
{
|
||||
_tzset();
|
||||
tzflag++;
|
||||
}
|
||||
|
||||
// Adjust for the timezone west of Greenwich
|
||||
long seconds_diff;
|
||||
_get_timezone(&seconds_diff);
|
||||
tz->tz_minuteswest = seconds_diff / 60;
|
||||
int hours_offset;
|
||||
_get_daylight(&hours_offset);
|
||||
tz->tz_dsttime = hours_offset;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
17
benchmarks/new_opencl/guassian/gettimeofday.h
Executable file
17
benchmarks/new_opencl/guassian/gettimeofday.h
Executable file
|
@ -0,0 +1,17 @@
|
|||
|
||||
#ifdef _WIN32
|
||||
#include <WinSock.h>
|
||||
/**
|
||||
Based on code seen at.
|
||||
|
||||
http://www.winehq.org/pipermail/wine-devel/2003-June/018082.html
|
||||
|
||||
http://msdn.microsoft.com/en-us/library/ms740560
|
||||
|
||||
*/
|
||||
int gettimeofday(struct timeval *tv, struct timezone *tz);
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
|
49
benchmarks/new_opencl/guassian/kernel.cl
Executable file
49
benchmarks/new_opencl/guassian/kernel.cl
Executable file
|
@ -0,0 +1,49 @@
|
|||
//#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
|
||||
|
||||
typedef struct latLong
|
||||
{
|
||||
float lat;
|
||||
float lng;
|
||||
} LatLong;
|
||||
|
||||
__kernel void Fan1(__global float *m_dev,
|
||||
__global float *a_dev,
|
||||
__global float *b_dev,
|
||||
const int size,
|
||||
const int t) {
|
||||
int globalId = get_global_id(0);
|
||||
|
||||
if (globalId < size-1-t) {
|
||||
*(m_dev + size * (globalId + t + 1)+t) = *(a_dev + size * (globalId + t + 1) + t) / *(a_dev + size * t + t);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void Fan2(__global float *m_dev,
|
||||
__global float *a_dev,
|
||||
__global float *b_dev,
|
||||
const int size,
|
||||
const int t) {
|
||||
int globalId = get_global_id(0);
|
||||
|
||||
int globalIdx = get_global_id(0);
|
||||
int globalIdy = get_global_id(1);
|
||||
if (globalIdx < size-1-t && globalIdy < size-t) {
|
||||
a_dev[size*(globalIdx+1+t)+(globalIdy+t)] -= m_dev[size*(globalIdx+1+t)+t] * a_dev[size*t+(globalIdy+t)];
|
||||
|
||||
if(globalIdy == 0){
|
||||
b_dev[globalIdx+1+t] -= m_dev[size*(globalIdx+1+t)+(globalIdy+t)] * b_dev[t];
|
||||
}
|
||||
}
|
||||
// One dimensional
|
||||
// int globalIdx = globalId % size;
|
||||
// int globalIdy = globalId / size;
|
||||
//
|
||||
// if (globalIdx < size-1-t && globalIdy < size-t) {
|
||||
// a_dev[size*(globalIdx+1+t)+(globalIdy+t)] -= m_dev[size*(globalIdx+1+t)+t] * a_dev[size*t+(globalIdy+t)];
|
||||
// }
|
||||
// if(globalIdy == 0){
|
||||
// b_dev[globalIdx+1+t] -= m_dev[size*(globalIdx+1+t)+(globalIdy+t)] * b_dev[t];
|
||||
// }
|
||||
|
||||
}
|
BIN
benchmarks/new_opencl/guassian/kernel.pocl
Normal file
BIN
benchmarks/new_opencl/guassian/kernel.pocl
Normal file
Binary file not shown.
411
benchmarks/new_opencl/guassian/main.cc
Executable file
411
benchmarks/new_opencl/guassian/main.cc
Executable file
|
@ -0,0 +1,411 @@
|
|||
#ifndef __GAUSSIAN_ELIMINATION__
|
||||
#define __GAUSSIAN_ELIMINATION__
|
||||
|
||||
#include "gaussianElim.h"
|
||||
|
||||
cl_context context = NULL;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("enter demo main\n");
|
||||
float *a = NULL, *b = NULL, *finalVec = NULL;
|
||||
float *m = NULL;
|
||||
int size;
|
||||
|
||||
FILE *fp;
|
||||
|
||||
// args
|
||||
char filename[100];
|
||||
int quiet = 0, timing = 0, platform = -1, device = -1;
|
||||
|
||||
// parse command line
|
||||
if (parseCommandline(argc, argv, filename, &quiet, &timing, &platform,
|
||||
&device)) {
|
||||
printUsage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
context = cl_init_context(platform, device, quiet);
|
||||
|
||||
fp = fopen(filename, "r");
|
||||
fscanf(fp, "%d", &size);
|
||||
|
||||
a = (float *)malloc(size * size * sizeof(float));
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
InitMat(fp, size, a, size, size);
|
||||
// printf("The input matrix a is:\n");
|
||||
// PrintMat(a, size, size, size);
|
||||
b = (float *)malloc(size * sizeof(float));
|
||||
|
||||
InitAry(fp, b, size);
|
||||
// printf("The input array b is:\n");
|
||||
// PrintAry(b, size);
|
||||
|
||||
// create the solution matrix
|
||||
m = (float *)malloc(size * size * sizeof(float));
|
||||
|
||||
// create a new vector to hold the final answer
|
||||
finalVec = (float *)malloc(size * sizeof(float));
|
||||
|
||||
InitPerRun(size, m);
|
||||
|
||||
// begin timing
|
||||
|
||||
// run kernels
|
||||
ForwardSub(context, a, b, m, size, timing);
|
||||
|
||||
// end timing
|
||||
if (!quiet) {
|
||||
printf("The result of matrix m is: \n");
|
||||
|
||||
PrintMat(m, size, size, size);
|
||||
printf("The result of matrix a is: \n");
|
||||
PrintMat(a, size, size, size);
|
||||
printf("The result of array b is: \n");
|
||||
PrintAry(b, size);
|
||||
|
||||
BackSub(a, b, finalVec, size);
|
||||
printf("The final solution is: \n");
|
||||
PrintAry(finalVec, size);
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
free(m);
|
||||
free(a);
|
||||
free(b);
|
||||
free(finalVec);
|
||||
// OpenClGaussianElimination(context,timing);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*------------------------------------------------------
|
||||
** ForwardSub() -- Forward substitution of Gaussian
|
||||
** elimination.
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void ForwardSub(cl_context context, float *a, float *b, float *m, int size,
|
||||
int timing) {
|
||||
// 1. set up kernels
|
||||
cl_kernel fan1_kernel, fan2_kernel;
|
||||
cl_int status = 0;
|
||||
cl_program gaussianElim_program;
|
||||
cl_event writeEvent, kernelEvent, readEvent;
|
||||
float writeTime = 0, readTime = 0, kernelTime = 0;
|
||||
float writeMB = 0, readMB = 0;
|
||||
|
||||
gaussianElim_program = cl_compileProgram((char *)"gaussianElim_kernels.cl", NULL);
|
||||
|
||||
fan1_kernel = clCreateKernel(gaussianElim_program, "Fan1", &status);
|
||||
status = cl_errChk(status, (char *)"Error Creating Fan1 kernel", true);
|
||||
if (status)
|
||||
exit(1);
|
||||
|
||||
fan2_kernel = clCreateKernel(gaussianElim_program, "Fan2", &status);
|
||||
status = cl_errChk(status, (char *)"Error Creating Fan2 kernel", true);
|
||||
if (status)
|
||||
exit(1);
|
||||
|
||||
// 2. set up memory on device and send ipts data to device
|
||||
|
||||
cl_mem a_dev, b_dev, m_dev;
|
||||
|
||||
cl_int error = 0;
|
||||
|
||||
a_dev = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(float) * size * size, NULL, &error);
|
||||
|
||||
b_dev = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * size, NULL,
|
||||
&error);
|
||||
|
||||
m_dev = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(float) * size * size, NULL, &error);
|
||||
|
||||
cl_command_queue command_queue = cl_getCommandQueue();
|
||||
|
||||
error = clEnqueueWriteBuffer(command_queue, a_dev,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * size * size, a, 0, NULL,
|
||||
&writeEvent);
|
||||
|
||||
if (timing)
|
||||
writeTime += eventTime(writeEvent, command_queue);
|
||||
clReleaseEvent(writeEvent);
|
||||
|
||||
error = clEnqueueWriteBuffer(command_queue, b_dev,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * size, b, 0, NULL, &writeEvent);
|
||||
if (timing)
|
||||
writeTime += eventTime(writeEvent, command_queue);
|
||||
clReleaseEvent(writeEvent);
|
||||
|
||||
error = clEnqueueWriteBuffer(command_queue, m_dev,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * size * size, m, 0, NULL,
|
||||
&writeEvent);
|
||||
if (timing)
|
||||
writeTime += eventTime(writeEvent, command_queue);
|
||||
clReleaseEvent(writeEvent);
|
||||
writeMB = (float)(sizeof(float) * size * (size + size + 1) / 1e6);
|
||||
|
||||
// 3. Determine block sizes
|
||||
size_t globalWorksizeFan1[1];
|
||||
size_t globalWorksizeFan2[2];
|
||||
|
||||
globalWorksizeFan1[0] = size;
|
||||
globalWorksizeFan2[0] = size;
|
||||
globalWorksizeFan2[1] = size;
|
||||
|
||||
int t;
|
||||
// 4. Setup and Run kernels
|
||||
for (t = 0; t < (size - 1); t++) {
|
||||
// kernel args
|
||||
cl_int argchk;
|
||||
argchk = clSetKernelArg(fan1_kernel, 0, sizeof(cl_mem), (void *)&m_dev);
|
||||
argchk |= clSetKernelArg(fan1_kernel, 1, sizeof(cl_mem), (void *)&a_dev);
|
||||
argchk |= clSetKernelArg(fan1_kernel, 2, sizeof(cl_mem), (void *)&b_dev);
|
||||
argchk |= clSetKernelArg(fan1_kernel, 3, sizeof(int), (void *)&size);
|
||||
argchk |= clSetKernelArg(fan1_kernel, 4, sizeof(int), (void *)&t);
|
||||
|
||||
cl_errChk(argchk, "ERROR in Setting Fan1 kernel args", true);
|
||||
|
||||
// launch kernel
|
||||
error =
|
||||
clEnqueueNDRangeKernel(command_queue, fan1_kernel, 1, 0,
|
||||
globalWorksizeFan1, NULL, 0, NULL, &kernelEvent);
|
||||
|
||||
cl_errChk(error, "ERROR in Executing Fan1 Kernel", true);
|
||||
if (timing) {
|
||||
// printf("here1a\n");
|
||||
kernelTime += eventTime(kernelEvent, command_queue);
|
||||
// printf("here1b\n");
|
||||
}
|
||||
clReleaseEvent(kernelEvent);
|
||||
// Fan1<<<dimGrid,dimBlock>>>(m_cuda,a_cuda,Size,t);
|
||||
// cudaThreadSynchronize();
|
||||
|
||||
// kernel args
|
||||
argchk = clSetKernelArg(fan2_kernel, 0, sizeof(cl_mem), (void *)&m_dev);
|
||||
argchk |= clSetKernelArg(fan2_kernel, 1, sizeof(cl_mem), (void *)&a_dev);
|
||||
argchk |= clSetKernelArg(fan2_kernel, 2, sizeof(cl_mem), (void *)&b_dev);
|
||||
argchk |= clSetKernelArg(fan2_kernel, 3, sizeof(int), (void *)&size);
|
||||
argchk |= clSetKernelArg(fan2_kernel, 4, sizeof(int), (void *)&t);
|
||||
|
||||
cl_errChk(argchk, "ERROR in Setting Fan2 kernel args", true);
|
||||
|
||||
// launch kernel
|
||||
error =
|
||||
clEnqueueNDRangeKernel(command_queue, fan2_kernel, 2, 0,
|
||||
globalWorksizeFan2, NULL, 0, NULL, &kernelEvent);
|
||||
|
||||
cl_errChk(error, "ERROR in Executing Fan1 Kernel", true);
|
||||
if (timing) {
|
||||
// printf("here2a\n");
|
||||
kernelTime += eventTime(kernelEvent, command_queue);
|
||||
// printf("here2b\n");
|
||||
}
|
||||
clReleaseEvent(kernelEvent);
|
||||
// Fan2<<<dimGridXY,dimBlockXY>>>(m_cuda,a_cuda,b_cuda,Size,Size-t,t);
|
||||
// cudaThreadSynchronize();
|
||||
}
|
||||
// 5. transfer data off of device
|
||||
error =
|
||||
clEnqueueReadBuffer(command_queue, a_dev,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * size * size, a, 0, NULL, &readEvent);
|
||||
|
||||
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
|
||||
if (timing)
|
||||
readTime += eventTime(readEvent, command_queue);
|
||||
clReleaseEvent(readEvent);
|
||||
|
||||
error = clEnqueueReadBuffer(command_queue, b_dev,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * size, b, 0, NULL, &readEvent);
|
||||
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
|
||||
if (timing)
|
||||
readTime += eventTime(readEvent, command_queue);
|
||||
clReleaseEvent(readEvent);
|
||||
|
||||
error =
|
||||
clEnqueueReadBuffer(command_queue, m_dev,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * size * size, m, 0, NULL, &readEvent);
|
||||
|
||||
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
|
||||
if (timing)
|
||||
readTime += eventTime(readEvent, command_queue);
|
||||
clReleaseEvent(readEvent);
|
||||
readMB = (float)(sizeof(float) * size * (size + size + 1) / 1e6);
|
||||
|
||||
if (timing) {
|
||||
printf("Matrix Size\tWrite(s) [size]\t\tKernel(s)\tRead(s) "
|
||||
"[size]\t\tTotal(s)\n");
|
||||
printf("%dx%d \t", size, size);
|
||||
|
||||
printf("%f [%.2fMB]\t", writeTime, writeMB);
|
||||
|
||||
printf("%f\t", kernelTime);
|
||||
|
||||
printf("%f [%.2fMB]\t", readTime, readMB);
|
||||
|
||||
printf("%f\n\n", writeTime + kernelTime + readTime);
|
||||
}
|
||||
}
|
||||
|
||||
float eventTime(cl_event event, cl_command_queue command_queue) {
|
||||
cl_int error = 0;
|
||||
cl_ulong eventStart, eventEnd;
|
||||
clFinish(command_queue);
|
||||
error = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
|
||||
sizeof(cl_ulong), &eventStart, NULL);
|
||||
cl_errChk(error, "ERROR in Event Profiling.", true);
|
||||
error = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
|
||||
sizeof(cl_ulong), &eventEnd, NULL);
|
||||
cl_errChk(error, "ERROR in Event Profiling.", true);
|
||||
|
||||
return (float)((eventEnd - eventStart) / 1e9);
|
||||
}
|
||||
|
||||
int parseCommandline(int argc, char *argv[], char *filename, int *q, int *t,
|
||||
int *p, int *d) {
|
||||
int i;
|
||||
// if (argc < 2) return 1; // error
|
||||
strncpy(filename, "matrix4.txt", 100);
|
||||
char flag;
|
||||
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (argv[i][0] == '-') { // flag
|
||||
flag = argv[i][1];
|
||||
switch (flag) {
|
||||
case 'h': // help
|
||||
return 1;
|
||||
break;
|
||||
case 'q': // quiet
|
||||
*q = 1;
|
||||
break;
|
||||
case 't': // timing
|
||||
*t = 1;
|
||||
break;
|
||||
case 'p': // platform
|
||||
i++;
|
||||
*p = atoi(argv[i]);
|
||||
break;
|
||||
case 'd': // device
|
||||
i++;
|
||||
*d = atoi(argv[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((*d >= 0 && *p < 0) ||
|
||||
(*p >= 0 &&
|
||||
*d < 0)) // both p and d must be specified if either are specified
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void printUsage() {
|
||||
printf("Gaussian Elimination Usage\n");
|
||||
printf("\n");
|
||||
printf("gaussianElimination [filename] [-hqt] [-p [int] -d [int]]\n");
|
||||
printf("\n");
|
||||
printf("example:\n");
|
||||
printf("$ ./gaussianElimination matrix4.txt\n");
|
||||
printf("\n");
|
||||
printf("filename the filename that holds the matrix data\n");
|
||||
printf("\n");
|
||||
printf("-h Display the help file\n");
|
||||
printf("-q Quiet mode. Suppress all text output.\n");
|
||||
printf("-t Print timing information.\n");
|
||||
printf("\n");
|
||||
printf("-p [int] Choose the platform (must choose both platform and "
|
||||
"device)\n");
|
||||
printf("-d [int] Choose the device (must choose both platform and "
|
||||
"device)\n");
|
||||
printf("\n");
|
||||
printf("\n");
|
||||
printf("Notes: 1. The filename is required as the first parameter.\n");
|
||||
printf(" 2. If you declare either the device or the platform,\n");
|
||||
printf(" you must declare both.\n\n");
|
||||
}
|
||||
|
||||
/*------------------------------------------------------
|
||||
** InitPerRun() -- Initialize the contents of the
|
||||
** multipier matrix **m
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void InitPerRun(int size, float *m) {
|
||||
int i;
|
||||
for (i = 0; i < size * size; i++)
|
||||
*(m + i) = 0.0;
|
||||
}
|
||||
void BackSub(float *a, float *b, float *finalVec, int size) {
|
||||
// solve "bottom up"
|
||||
int i, j;
|
||||
for (i = 0; i < size; i++) {
|
||||
finalVec[size - i - 1] = b[size - i - 1];
|
||||
for (j = 0; j < i; j++) {
|
||||
finalVec[size - i - 1] -= *(a + size * (size - i - 1) + (size - j - 1)) *
|
||||
finalVec[size - j - 1];
|
||||
}
|
||||
finalVec[size - i - 1] =
|
||||
finalVec[size - i - 1] / *(a + size * (size - i - 1) + (size - i - 1));
|
||||
}
|
||||
}
|
||||
void InitMat(FILE *fp, int size, float *ary, int nrow, int ncol) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < nrow; i++) {
|
||||
for (j = 0; j < ncol; j++) {
|
||||
fscanf(fp, "%f", ary + size * i + j);
|
||||
}
|
||||
}
|
||||
}
|
||||
/*------------------------------------------------------
|
||||
** InitAry() -- Initialize the array (vector) by reading
|
||||
** data from the data file
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void InitAry(FILE *fp, float *ary, int ary_size) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ary_size; i++) {
|
||||
fscanf(fp, "%f", &ary[i]);
|
||||
}
|
||||
}
|
||||
/*------------------------------------------------------
|
||||
** PrintMat() -- Print the contents of the matrix
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void PrintMat(float *ary, int size, int nrow, int ncol) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < nrow; i++) {
|
||||
for (j = 0; j < ncol; j++) {
|
||||
printf("%8.2f ", *(ary + size * i + j));
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/*------------------------------------------------------
|
||||
** PrintAry() -- Print the contents of the array (vector)
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void PrintAry(float *ary, int ary_size) {
|
||||
int i;
|
||||
for (i = 0; i < ary_size; i++) {
|
||||
printf("%.2f ", ary[i]);
|
||||
}
|
||||
printf("\n\n");
|
||||
}
|
||||
#endif
|
11
benchmarks/new_opencl/guassian/matrix4.txt
Executable file
11
benchmarks/new_opencl/guassian/matrix4.txt
Executable file
|
@ -0,0 +1,11 @@
|
|||
4
|
||||
|
||||
-0.6 -0.5 0.7 0.3
|
||||
-0.3 -0.9 0.3 0.7
|
||||
-0.4 -0.5 -0.3 -0.8
|
||||
0.0 -0.1 0.2 0.9
|
||||
|
||||
-0.85 -0.68 0.24 -0.53
|
||||
|
||||
0.7 0.0 -0.4 -0.5
|
||||
|
1
benchmarks/new_opencl/guassian/run
Executable file
1
benchmarks/new_opencl/guassian/run
Executable file
|
@ -0,0 +1 @@
|
|||
./gaussian ../../data/gaussian/matrix4.txt
|
204
benchmarks/new_opencl/guassian/utils.cpp
Executable file
204
benchmarks/new_opencl/guassian/utils.cpp
Executable file
|
@ -0,0 +1,204 @@
|
|||
/****************************************************************************\
|
||||
* Copyright (c) 2011, Advanced Micro Devices, Inc. *
|
||||
* All rights reserved. *
|
||||
* *
|
||||
* Redistribution and use in source and binary forms, with or without *
|
||||
* modification, are permitted provided that the following conditions *
|
||||
* are met: *
|
||||
* *
|
||||
* Redistributions of source code must retain the above copyright notice, *
|
||||
* this list of conditions and the following disclaimer. *
|
||||
* *
|
||||
* Redistributions in binary form must reproduce the above copyright notice, *
|
||||
* this list of conditions and the following disclaimer in the documentation *
|
||||
* and/or other materials provided with the distribution. *
|
||||
* *
|
||||
* Neither the name of the copyright holder nor the names of its contributors *
|
||||
* may be used to endorse or promote products derived from this software *
|
||||
* without specific prior written permission. *
|
||||
* *
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR *
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
|
||||
* *
|
||||
* If you use the software (in whole or in part), you shall adhere to all *
|
||||
* applicable U.S., European, and other export laws, including but not *
|
||||
* limited to the U.S. Export Administration Regulations (EAR), (15 C.F.R. *
|
||||
* Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 *
|
||||
* of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you *
|
||||
* hereby certify that, except pursuant to a license granted by the United *
|
||||
* States Department of Commerce Bureau of Industry and Security or as *
|
||||
* otherwise permitted pursuant to a License Exception under the U.S. Export *
|
||||
* Administration Regulations ("EAR"), you will not (1) export, re-export or *
|
||||
* release to a national of a country in Country Groups D:1, E:1 or E:2 any *
|
||||
* restricted technology, software, or source code you receive hereunder, *
|
||||
* or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
|
||||
* technology or software, if such foreign produced direct product is subject *
|
||||
* to national security controls as identified on the Commerce Control List *
|
||||
*(currently found in Supplement 1 to Part 774 of EAR). For the most current *
|
||||
* Country Group listings, or for additional information about the EAR or *
|
||||
* your obligations under those regulations, please refer to the U.S. Bureau *
|
||||
* of Industry and Securitys website at http://www.bis.doc.gov/. *
|
||||
\****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
static bool usingImages = true;
|
||||
|
||||
//! A wrapper for malloc that checks the return value
|
||||
void* alloc(size_t size) {
|
||||
|
||||
void* ptr = NULL;
|
||||
ptr = malloc(size);
|
||||
if(ptr == NULL) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// This function checks to make sure a file exists before we open it
|
||||
void checkFile(char* filename)
|
||||
{
|
||||
|
||||
struct stat fileStatus;
|
||||
if(stat(filename, &fileStatus) != 0) {
|
||||
printf("Error opening file: %s\n", filename);
|
||||
exit(-1);
|
||||
}
|
||||
else {
|
||||
if(!(S_IFREG & fileStatus.st_mode)) {
|
||||
printf("File %s is not a regular file\n", filename);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// This function checks to make sure a directory exists
|
||||
void checkDir(char* dirpath)
|
||||
{
|
||||
|
||||
struct stat fileStatus;
|
||||
if(stat(dirpath, &fileStatus) != 0) {
|
||||
printf("Directory does not exist: %s\n", dirpath);
|
||||
exit(-1);
|
||||
}
|
||||
else {
|
||||
if(!(S_IFDIR & fileStatus.st_mode)) {
|
||||
printf("Directory was not provided: %s\n", dirpath);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the command line arguments
|
||||
void parseArguments(int argc, char** argv, char** input, char** events,
|
||||
char** ipts, char* devicePref, bool* verifyResults)
|
||||
{
|
||||
|
||||
for(int i = 2; i < argc; i++) {
|
||||
if(strcmp(argv[i], "-d") == 0) { // Event dump found
|
||||
if(i == argc-1) {
|
||||
printf("Usage: -e Needs directory path\n");
|
||||
exit(-1);
|
||||
}
|
||||
devicePref[0] = argv[i+1][0];
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(argv[i], "-e") == 0) { // Event dump found
|
||||
if(i == argc-1) {
|
||||
printf("Usage: -e Needs directory path\n");
|
||||
exit(-1);
|
||||
}
|
||||
*events = argv[i+1];
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(argv[i], "-i") == 0) { // Input found
|
||||
if(i == argc-1) {
|
||||
printf("Usage: -i Needs directory path\n");
|
||||
exit(-1);
|
||||
}
|
||||
*input = argv[i+1];
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(argv[i], "-l") == 0) { // Ipts dump found
|
||||
if(i == argc-1) {
|
||||
printf("Usage: -l Needs directory path\n");
|
||||
exit(-1);
|
||||
}
|
||||
*ipts = argv[i+1];
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(argv[i], "-n") == 0) { // Don't use OpenCL images
|
||||
setUsingImages(false);
|
||||
continue;
|
||||
}
|
||||
if(strcmp(argv[i], "-v") == 0) { // Verify results
|
||||
*verifyResults = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// This function that takes a positive integer 'value' and returns
|
||||
// the nearest multiple of 'multiple' (used for padding columns)
|
||||
unsigned int roundUp(unsigned int value, unsigned int multiple) {
|
||||
|
||||
unsigned int remainder = value % multiple;
|
||||
|
||||
// Make the value a multiple of multiple
|
||||
if(remainder != 0) {
|
||||
value += (multiple-remainder);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
// Concatenate two strings and return a pointer to the new string
|
||||
char* smartStrcat(char* str1, char* str2)
|
||||
{
|
||||
char* newStr = NULL;
|
||||
|
||||
newStr = (char*)alloc((strlen(str1)+strlen(str2)+1)*sizeof(char));
|
||||
|
||||
strcpy(newStr, str1);
|
||||
strcat(newStr, str2);
|
||||
|
||||
return newStr;
|
||||
}
|
||||
|
||||
|
||||
// Set the value of using images to true if they are being
|
||||
// used, or false if they are not
|
||||
void setUsingImages(bool val)
|
||||
{
|
||||
usingImages = val;
|
||||
}
|
||||
|
||||
|
||||
// Return whether or not images are being used
|
||||
bool isUsingImages()
|
||||
{
|
||||
return usingImages;
|
||||
}
|
84
benchmarks/new_opencl/guassian/utils.h
Executable file
84
benchmarks/new_opencl/guassian/utils.h
Executable file
|
@ -0,0 +1,84 @@
|
|||
/****************************************************************************\
|
||||
* Copyright (c) 2011, Advanced Micro Devices, Inc. *
|
||||
* All rights reserved. *
|
||||
* *
|
||||
* Redistribution and use in source and binary forms, with or without *
|
||||
* modification, are permitted provided that the following conditions *
|
||||
* are met: *
|
||||
* *
|
||||
* Redistributions of source code must retain the above copyright notice, *
|
||||
* this list of conditions and the following disclaimer. *
|
||||
* *
|
||||
* Redistributions in binary form must reproduce the above copyright notice, *
|
||||
* this list of conditions and the following disclaimer in the documentation *
|
||||
* and/or other materials provided with the distribution. *
|
||||
* *
|
||||
* Neither the name of the copyright holder nor the names of its contributors *
|
||||
* may be used to endorse or promote products derived from this software *
|
||||
* without specific prior written permission. *
|
||||
* *
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR *
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
|
||||
* *
|
||||
* If you use the software (in whole or in part), you shall adhere to all *
|
||||
* applicable U.S., European, and other export laws, including but not *
|
||||
* limited to the U.S. Export Administration Regulations (EAR), (15 C.F.R. *
|
||||
* Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 *
|
||||
* of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you *
|
||||
* hereby certify that, except pursuant to a license granted by the United *
|
||||
* States Department of Commerce Bureau of Industry and Security or as *
|
||||
* otherwise permitted pursuant to a License Exception under the U.S. Export *
|
||||
* Administration Regulations ("EAR"), you will not (1) export, re-export or *
|
||||
* release to a national of a country in Country Groups D:1, E:1 or E:2 any *
|
||||
* restricted technology, software, or source code you receive hereunder, *
|
||||
* or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
|
||||
* technology or software, if such foreign produced direct product is subject *
|
||||
* to national security controls as identified on the Commerce Control List *
|
||||
*(currently found in Supplement 1 to Part 774 of EAR). For the most current *
|
||||
* Country Group listings, or for additional information about the EAR or *
|
||||
* your obligations under those regulations, please refer to the U.S. Bureau *
|
||||
* of Industry and Securitys website at http://www.bis.doc.gov/. *
|
||||
\****************************************************************************/
|
||||
|
||||
#ifndef _UTILS_
|
||||
#define _UTILS_
|
||||
|
||||
// Wrapper for malloc
|
||||
void* alloc(size_t size);
|
||||
|
||||
// Checks for existence of directory
|
||||
void checkDir(char* dirpath);
|
||||
|
||||
// Check for existence of file
|
||||
void checkFile(char* filename);
|
||||
|
||||
// Parse the input command line options to the program
|
||||
void parseArguments(int argc, char** argv, char** input, char** events,
|
||||
char** ipts, char* devicePref, bool* verifyResults);
|
||||
|
||||
|
||||
// Print the program usage information
|
||||
void printUsage();
|
||||
|
||||
// Rounds up size to the nearest multiple of multiple
|
||||
unsigned int roundUp(unsigned int value, unsigned int multiple);
|
||||
|
||||
// Concatenate two strings, creating a new one
|
||||
char* smartStrcat(char* str1, char* str2);
|
||||
|
||||
// Set the value of usingImages
|
||||
void setUsingImages(bool val);
|
||||
|
||||
// Return whether or not images are being used
|
||||
bool isUsingImages();
|
||||
|
||||
#endif
|
1804
benchmarks/new_opencl/include/CL/cl.h
Normal file
1804
benchmarks/new_opencl/include/CL/cl.h
Normal file
File diff suppressed because it is too large
Load diff
12459
benchmarks/new_opencl/include/CL/cl.hpp
Normal file
12459
benchmarks/new_opencl/include/CL/cl.hpp
Normal file
File diff suppressed because it is too large
Load diff
10119
benchmarks/new_opencl/include/CL/cl2.hpp
Normal file
10119
benchmarks/new_opencl/include/CL/cl2.hpp
Normal file
File diff suppressed because it is too large
Load diff
131
benchmarks/new_opencl/include/CL/cl_d3d10.h
Normal file
131
benchmarks/new_opencl/include/CL/cl_d3d10.h
Normal file
|
@ -0,0 +1,131 @@
|
|||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_CL_D3D10_H
|
||||
#define __OPENCL_CL_D3D10_H
|
||||
|
||||
#include <d3d10.h>
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* cl_khr_d3d10_sharing */
|
||||
#define cl_khr_d3d10_sharing 1
|
||||
|
||||
typedef cl_uint cl_d3d10_device_source_khr;
|
||||
typedef cl_uint cl_d3d10_device_set_khr;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_D3D10_DEVICE_KHR -1002
|
||||
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
|
||||
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
|
||||
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
|
||||
|
||||
/* cl_d3d10_device_source_nv */
|
||||
#define CL_D3D10_DEVICE_KHR 0x4010
|
||||
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
|
||||
|
||||
/* cl_d3d10_device_set_nv */
|
||||
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
|
||||
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
|
||||
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
|
||||
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_d3d10_device_source_khr d3d_device_source,
|
||||
void * d3d_object,
|
||||
cl_d3d10_device_set_khr d3d_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Buffer * resource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture2D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture3D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_D3D10_H */
|
||||
|
131
benchmarks/new_opencl/include/CL/cl_d3d11.h
Normal file
131
benchmarks/new_opencl/include/CL/cl_d3d11.h
Normal file
|
@ -0,0 +1,131 @@
|
|||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_CL_D3D11_H
|
||||
#define __OPENCL_CL_D3D11_H
|
||||
|
||||
#include <d3d11.h>
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* cl_khr_d3d11_sharing */
|
||||
#define cl_khr_d3d11_sharing 1
|
||||
|
||||
typedef cl_uint cl_d3d11_device_source_khr;
|
||||
typedef cl_uint cl_d3d11_device_set_khr;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_D3D11_DEVICE_KHR -1006
|
||||
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
|
||||
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
|
||||
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
|
||||
|
||||
/* cl_d3d11_device_source */
|
||||
#define CL_D3D11_DEVICE_KHR 0x4019
|
||||
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
|
||||
|
||||
/* cl_d3d11_device_set */
|
||||
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
|
||||
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
|
||||
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
|
||||
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_d3d11_device_source_khr d3d_device_source,
|
||||
void * d3d_object,
|
||||
cl_d3d11_device_set_khr d3d_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Buffer * resource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Texture2D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Texture3D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_D3D11_H */
|
||||
|
132
benchmarks/new_opencl/include/CL/cl_dx9_media_sharing.h
Normal file
132
benchmarks/new_opencl/include/CL/cl_dx9_media_sharing.h
Normal file
|
@ -0,0 +1,132 @@
|
|||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
/* cl_khr_dx9_media_sharing */
|
||||
#define cl_khr_dx9_media_sharing 1
|
||||
|
||||
typedef cl_uint cl_dx9_media_adapter_type_khr;
|
||||
typedef cl_uint cl_dx9_media_adapter_set_khr;
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <d3d9.h>
|
||||
typedef struct _cl_dx9_surface_info_khr
|
||||
{
|
||||
IDirect3DSurface9 *resource;
|
||||
HANDLE shared_handle;
|
||||
} cl_dx9_surface_info_khr;
|
||||
#endif
|
||||
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
|
||||
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
|
||||
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
|
||||
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
|
||||
|
||||
/* cl_media_adapter_type_khr */
|
||||
#define CL_ADAPTER_D3D9_KHR 0x2020
|
||||
#define CL_ADAPTER_D3D9EX_KHR 0x2021
|
||||
#define CL_ADAPTER_DXVA_KHR 0x2022
|
||||
|
||||
/* cl_media_adapter_set_khr */
|
||||
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
|
||||
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
|
||||
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
|
||||
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
|
||||
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
|
||||
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_uint num_media_adapters,
|
||||
cl_dx9_media_adapter_type_khr * media_adapter_type,
|
||||
void * media_adapters,
|
||||
cl_dx9_media_adapter_set_khr media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_dx9_media_adapter_type_khr adapter_type,
|
||||
void * surface_info,
|
||||
cl_uint plane,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */
|
||||
|
182
benchmarks/new_opencl/include/CL/cl_dx9_media_sharing_intel.h
Normal file
182
benchmarks/new_opencl/include/CL/cl_dx9_media_sharing_intel.h
Normal file
|
@ -0,0 +1,182 @@
|
|||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
/*****************************************************************************\
|
||||
|
||||
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
|
||||
|
||||
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
||||
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
File Name: cl_dx9_media_sharing_intel.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Notes:
|
||||
|
||||
\*****************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
|
||||
#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
#include <d3d9.h>
|
||||
#include <dxvahd.h>
|
||||
#include <wtypes.h>
|
||||
#include <d3d9types.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/***************************************
|
||||
* cl_intel_dx9_media_sharing extension *
|
||||
****************************************/
|
||||
|
||||
#define cl_intel_dx9_media_sharing 1
|
||||
|
||||
typedef cl_uint cl_dx9_device_source_intel;
|
||||
typedef cl_uint cl_dx9_device_set_intel;
|
||||
|
||||
/* error codes */
|
||||
#define CL_INVALID_DX9_DEVICE_INTEL -1010
|
||||
#define CL_INVALID_DX9_RESOURCE_INTEL -1011
|
||||
#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
|
||||
#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
|
||||
|
||||
/* cl_dx9_device_source_intel */
|
||||
#define CL_D3D9_DEVICE_INTEL 0x4022
|
||||
#define CL_D3D9EX_DEVICE_INTEL 0x4070
|
||||
#define CL_DXVA_DEVICE_INTEL 0x4071
|
||||
|
||||
/* cl_dx9_device_set_intel */
|
||||
#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
|
||||
#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
|
||||
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
|
||||
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_DX9_RESOURCE_INTEL 0x4027
|
||||
#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_DX9_PLANE_INTEL 0x4075
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
|
||||
#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
|
||||
/******************************************************************************/
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceIDsFromDX9INTEL(
|
||||
cl_platform_id platform,
|
||||
cl_dx9_device_source_intel dx9_device_source,
|
||||
void* dx9_object,
|
||||
cl_dx9_device_set_intel dx9_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_dx9_device_source_intel dx9_device_source,
|
||||
void* dx9_object,
|
||||
cl_dx9_device_set_intel dx9_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromDX9MediaSurfaceINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
IDirect3DSurface9* resource,
|
||||
HANDLE sharedHandle,
|
||||
UINT plane,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
IDirect3DSurface9* resource,
|
||||
HANDLE sharedHandle,
|
||||
UINT plane,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireDX9ObjectsINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseDX9ObjectsINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */
|
||||
|
132
benchmarks/new_opencl/include/CL/cl_egl.h
Normal file
132
benchmarks/new_opencl/include/CL/cl_egl.h
Normal file
|
@ -0,0 +1,132 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_EGL_H
|
||||
#define __OPENCL_CL_EGL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
|
||||
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
|
||||
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
|
||||
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
|
||||
|
||||
/* Error type for clCreateFromEGLImageKHR */
|
||||
#define CL_INVALID_EGL_OBJECT_KHR -1093
|
||||
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
|
||||
|
||||
/* CLeglImageKHR is an opaque handle to an EGLImage */
|
||||
typedef void* CLeglImageKHR;
|
||||
|
||||
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
|
||||
typedef void* CLeglDisplayKHR;
|
||||
|
||||
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
|
||||
typedef void* CLeglSyncKHR;
|
||||
|
||||
/* properties passed to clCreateFromEGLImageKHR */
|
||||
typedef intptr_t cl_egl_image_properties_khr;
|
||||
|
||||
|
||||
#define cl_khr_egl_image 1
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromEGLImageKHR(cl_context context,
|
||||
CLeglDisplayKHR egldisplay,
|
||||
CLeglImageKHR eglimage,
|
||||
cl_mem_flags flags,
|
||||
const cl_egl_image_properties_khr * properties,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglDisplayKHR egldisplay,
|
||||
CLeglImageKHR eglimage,
|
||||
cl_mem_flags flags,
|
||||
const cl_egl_image_properties_khr * properties,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
#define cl_khr_egl_event 1
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromEGLSyncKHR(cl_context context,
|
||||
CLeglSyncKHR sync,
|
||||
CLeglDisplayKHR display,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglSyncKHR sync,
|
||||
CLeglDisplayKHR display,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_EGL_H */
|
762
benchmarks/new_opencl/include/CL/cl_ext.h
Normal file
762
benchmarks/new_opencl/include/CL/cl_ext.h
Normal file
|
@ -0,0 +1,762 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
/* cl_ext.h contains OpenCL extensions which don't have external */
|
||||
/* (OpenGL, D3D) dependencies. */
|
||||
|
||||
#ifndef __CL_EXT_H
|
||||
#define __CL_EXT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
/* cl_khr_fp64 extension - no extension #define since it has no functions */
|
||||
/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */
|
||||
|
||||
#if CL_TARGET_OPENCL_VERSION <= 110
|
||||
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
|
||||
#endif
|
||||
|
||||
/* cl_khr_fp16 extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
|
||||
|
||||
/* Memory object destruction
|
||||
*
|
||||
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
|
||||
*
|
||||
* Registers a user callback function that will be called when the memory object is deleted and its resources
|
||||
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
|
||||
* stack associated with memobj. The registered user callback functions are called in the reverse order in
|
||||
* which they were registered. The user callback functions are called and then the memory object is deleted
|
||||
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
|
||||
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
|
||||
* the storage bits for the memory object, can be reused or freed.
|
||||
*
|
||||
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
|
||||
*
|
||||
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||
* before using.
|
||||
*/
|
||||
#define cl_APPLE_SetMemObjectDestructor 1
|
||||
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj,
|
||||
void (* pfn_notify)(cl_mem memobj, void * user_data),
|
||||
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Context Logging Functions
|
||||
*
|
||||
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
|
||||
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||
* before using.
|
||||
*
|
||||
* clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger
|
||||
*/
|
||||
#define cl_APPLE_ContextLoggingFunctions 1
|
||||
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr,
|
||||
const void * private_info,
|
||||
size_t cb,
|
||||
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
|
||||
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr,
|
||||
const void * private_info,
|
||||
size_t cb,
|
||||
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
|
||||
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr,
|
||||
const void * private_info,
|
||||
size_t cb,
|
||||
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/************************
|
||||
* cl_khr_icd extension *
|
||||
************************/
|
||||
#define cl_khr_icd 1
|
||||
|
||||
/* cl_platform_info */
|
||||
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_PLATFORM_NOT_FOUND_KHR -1001
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clIcdGetPlatformIDsKHR(cl_uint num_entries,
|
||||
cl_platform_id * platforms,
|
||||
cl_uint * num_platforms);
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries,
|
||||
cl_platform_id * platforms,
|
||||
cl_uint * num_platforms);
|
||||
|
||||
|
||||
/*******************************
|
||||
* cl_khr_il_program extension *
|
||||
*******************************/
|
||||
#define cl_khr_il_program 1
|
||||
|
||||
/* New property to clGetDeviceInfo for retrieving supported intermediate
|
||||
* languages
|
||||
*/
|
||||
#define CL_DEVICE_IL_VERSION_KHR 0x105B
|
||||
|
||||
/* New property to clGetProgramInfo for retrieving for retrieving the IL of a
|
||||
* program
|
||||
*/
|
||||
#define CL_PROGRAM_IL_KHR 0x1169
|
||||
|
||||
extern CL_API_ENTRY cl_program CL_API_CALL
|
||||
clCreateProgramWithILKHR(cl_context context,
|
||||
const void * il,
|
||||
size_t length,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
typedef CL_API_ENTRY cl_program
|
||||
(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context,
|
||||
const void * il,
|
||||
size_t length,
|
||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
/* Extension: cl_khr_image2d_from_buffer
|
||||
*
|
||||
* This extension allows a 2D image to be created from a cl_mem buffer without
|
||||
* a copy. The type associated with a 2D image created from a buffer in an
|
||||
* OpenCL program is image2d_t. Both the sampler and sampler-less read_image
|
||||
* built-in functions are supported for 2D images and 2D images created from
|
||||
* a buffer. Similarly, the write_image built-ins are also supported for 2D
|
||||
* images created from a buffer.
|
||||
*
|
||||
* When the 2D image from buffer is created, the client must specify the
|
||||
* width, height, image format (i.e. channel order and channel data type)
|
||||
* and optionally the row pitch.
|
||||
*
|
||||
* The pitch specified must be a multiple of
|
||||
* CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels.
|
||||
* The base address of the buffer must be aligned to
|
||||
* CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels.
|
||||
*/
|
||||
|
||||
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR 0x104A
|
||||
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR 0x104B
|
||||
|
||||
|
||||
/**************************************
|
||||
* cl_khr_initialize_memory extension *
|
||||
**************************************/
|
||||
|
||||
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030
|
||||
|
||||
|
||||
/**************************************
|
||||
* cl_khr_terminate_context extension *
|
||||
**************************************/
|
||||
|
||||
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031
|
||||
#define CL_CONTEXT_TERMINATE_KHR 0x2032
|
||||
|
||||
#define cl_khr_terminate_context 1
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
/*
|
||||
* Extension: cl_khr_spir
|
||||
*
|
||||
* This extension adds support to create an OpenCL program object from a
|
||||
* Standard Portable Intermediate Representation (SPIR) instance
|
||||
*/
|
||||
|
||||
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
|
||||
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
|
||||
|
||||
|
||||
/*****************************************
|
||||
* cl_khr_create_command_queue extension *
|
||||
*****************************************/
|
||||
#define cl_khr_create_command_queue 1
|
||||
|
||||
typedef cl_bitfield cl_queue_properties_khr;
|
||||
|
||||
extern CL_API_ENTRY cl_command_queue CL_API_CALL
|
||||
clCreateCommandQueueWithPropertiesKHR(cl_context context,
|
||||
cl_device_id device,
|
||||
const cl_queue_properties_khr* properties,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_command_queue
|
||||
(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context,
|
||||
cl_device_id device,
|
||||
const cl_queue_properties_khr* properties,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
/******************************************
|
||||
* cl_nv_device_attribute_query extension *
|
||||
******************************************/
|
||||
|
||||
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
|
||||
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
|
||||
#define CL_DEVICE_WARP_SIZE_NV 0x4003
|
||||
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
|
||||
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
|
||||
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_amd_device_attribute_query *
|
||||
*********************************/
|
||||
|
||||
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_arm_printf extension
|
||||
*********************************/
|
||||
|
||||
#define CL_PRINTF_CALLBACK_ARM 0x40B0
|
||||
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
|
||||
|
||||
|
||||
/***********************************
|
||||
* cl_ext_device_fission extension
|
||||
***********************************/
|
||||
#define cl_ext_device_fission 1
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_ulong cl_device_partition_property_ext;
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clCreateSubDevicesEXT(cl_device_id in_device,
|
||||
const cl_device_partition_property_ext * properties,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * out_devices,
|
||||
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device,
|
||||
const cl_device_partition_property_ext * properties,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * out_devices,
|
||||
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
/* cl_device_partition_property_ext */
|
||||
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
|
||||
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
|
||||
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
|
||||
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
|
||||
|
||||
/* clDeviceGetInfo selectors */
|
||||
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
|
||||
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
|
||||
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
|
||||
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
|
||||
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
|
||||
|
||||
/* error codes */
|
||||
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
|
||||
#define CL_INVALID_PARTITION_COUNT_EXT -1058
|
||||
#define CL_INVALID_PARTITION_NAME_EXT -1059
|
||||
|
||||
/* CL_AFFINITY_DOMAINs */
|
||||
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
|
||||
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
|
||||
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
|
||||
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
|
||||
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
|
||||
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
|
||||
|
||||
/* cl_device_partition_property_ext list terminators */
|
||||
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
|
||||
|
||||
|
||||
/***********************************
|
||||
* cl_ext_migrate_memobject extension definitions
|
||||
***********************************/
|
||||
#define cl_ext_migrate_memobject 1
|
||||
|
||||
typedef cl_bitfield cl_mem_migration_flags_ext;
|
||||
|
||||
#define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1
|
||||
|
||||
#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue,
|
||||
cl_uint num_mem_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_mem_migration_flags_ext flags,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue,
|
||||
cl_uint num_mem_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_mem_migration_flags_ext flags,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ext_host_ptr extension
|
||||
*********************************/
|
||||
#define cl_qcom_ext_host_ptr 1
|
||||
|
||||
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
|
||||
|
||||
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
|
||||
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
|
||||
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
|
||||
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
|
||||
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
|
||||
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
|
||||
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
|
||||
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
|
||||
|
||||
typedef cl_uint cl_image_pitch_info_qcom;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceImageInfoQCOM(cl_device_id device,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
const cl_image_format *image_format,
|
||||
cl_image_pitch_info_qcom param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
typedef struct _cl_mem_ext_host_ptr
|
||||
{
|
||||
/* Type of external memory allocation. */
|
||||
/* Legal values will be defined in layered extensions. */
|
||||
cl_uint allocation_type;
|
||||
|
||||
/* Host cache policy for this external memory allocation. */
|
||||
cl_uint host_cache_policy;
|
||||
|
||||
} cl_mem_ext_host_ptr;
|
||||
|
||||
|
||||
/*******************************************
|
||||
* cl_qcom_ext_host_ptr_iocoherent extension
|
||||
********************************************/
|
||||
|
||||
/* Cache policy specifying io-coherence */
|
||||
#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ion_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
|
||||
|
||||
typedef struct _cl_mem_ion_host_ptr
|
||||
{
|
||||
/* Type of external memory allocation. */
|
||||
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
|
||||
cl_mem_ext_host_ptr ext_host_ptr;
|
||||
|
||||
/* ION file descriptor */
|
||||
int ion_filedesc;
|
||||
|
||||
/* Host pointer to the ION allocated memory */
|
||||
void* ion_hostptr;
|
||||
|
||||
} cl_mem_ion_host_ptr;
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_android_native_buffer_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6
|
||||
|
||||
typedef struct _cl_mem_android_native_buffer_host_ptr
|
||||
{
|
||||
/* Type of external memory allocation. */
|
||||
/* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */
|
||||
cl_mem_ext_host_ptr ext_host_ptr;
|
||||
|
||||
/* Virtual pointer to the android native buffer */
|
||||
void* anb_ptr;
|
||||
|
||||
} cl_mem_android_native_buffer_host_ptr;
|
||||
|
||||
|
||||
/******************************************
|
||||
* cl_img_yuv_image extension *
|
||||
******************************************/
|
||||
|
||||
/* Image formats used in clCreateImage */
|
||||
#define CL_NV21_IMG 0x40D0
|
||||
#define CL_YV12_IMG 0x40D1
|
||||
|
||||
|
||||
/******************************************
|
||||
* cl_img_cached_allocations extension *
|
||||
******************************************/
|
||||
|
||||
/* Flag values used by clCreateBuffer */
|
||||
#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26)
|
||||
#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27)
|
||||
|
||||
|
||||
/******************************************
|
||||
* cl_img_use_gralloc_ptr extension *
|
||||
******************************************/
|
||||
#define cl_img_use_gralloc_ptr 1
|
||||
|
||||
/* Flag values used by clCreateBuffer */
|
||||
#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28)
|
||||
|
||||
/* To be used by clGetEventInfo: */
|
||||
#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2
|
||||
#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3
|
||||
|
||||
/* Error code from clEnqueueReleaseGrallocObjectsIMG */
|
||||
#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_khr_subgroups extension
|
||||
*********************************/
|
||||
#define cl_khr_subgroups 1
|
||||
|
||||
#if !defined(CL_VERSION_2_1)
|
||||
/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h.
|
||||
In hindsight, there should have been a khr suffix on this type for
|
||||
the extension, but keeping it un-suffixed to maintain backwards
|
||||
compatibility. */
|
||||
typedef cl_uint cl_kernel_sub_group_info;
|
||||
#endif
|
||||
|
||||
/* cl_kernel_sub_group_info */
|
||||
#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
|
||||
#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetKernelSubGroupInfoKHR(cl_kernel in_kernel,
|
||||
cl_device_id in_device,
|
||||
cl_kernel_sub_group_info param_name,
|
||||
size_t input_value_size,
|
||||
const void * input_value,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel,
|
||||
cl_device_id in_device,
|
||||
cl_kernel_sub_group_info param_name,
|
||||
size_t input_value_size,
|
||||
const void * input_value,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_khr_mipmap_image extension
|
||||
*********************************/
|
||||
|
||||
/* cl_sampler_properties */
|
||||
#define CL_SAMPLER_MIP_FILTER_MODE_KHR 0x1155
|
||||
#define CL_SAMPLER_LOD_MIN_KHR 0x1156
|
||||
#define CL_SAMPLER_LOD_MAX_KHR 0x1157
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_khr_priority_hints extension
|
||||
*********************************/
|
||||
/* This extension define is for backwards compatibility.
|
||||
It shouldn't be required since this extension has no new functions. */
|
||||
#define cl_khr_priority_hints 1
|
||||
|
||||
typedef cl_uint cl_queue_priority_khr;
|
||||
|
||||
/* cl_command_queue_properties */
|
||||
#define CL_QUEUE_PRIORITY_KHR 0x1096
|
||||
|
||||
/* cl_queue_priority_khr */
|
||||
#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
|
||||
#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
|
||||
#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_khr_throttle_hints extension
|
||||
*********************************/
|
||||
/* This extension define is for backwards compatibility.
|
||||
It shouldn't be required since this extension has no new functions. */
|
||||
#define cl_khr_throttle_hints 1
|
||||
|
||||
typedef cl_uint cl_queue_throttle_khr;
|
||||
|
||||
/* cl_command_queue_properties */
|
||||
#define CL_QUEUE_THROTTLE_KHR 0x1097
|
||||
|
||||
/* cl_queue_throttle_khr */
|
||||
#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
|
||||
#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
|
||||
#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_khr_subgroup_named_barrier
|
||||
*********************************/
|
||||
/* This extension define is for backwards compatibility.
|
||||
It shouldn't be required since this extension has no new functions. */
|
||||
#define cl_khr_subgroup_named_barrier 1
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035
|
||||
|
||||
|
||||
/**********************************
|
||||
* cl_arm_import_memory extension *
|
||||
**********************************/
|
||||
#define cl_arm_import_memory 1
|
||||
|
||||
typedef intptr_t cl_import_properties_arm;
|
||||
|
||||
/* Default and valid proporties name for cl_arm_import_memory */
|
||||
#define CL_IMPORT_TYPE_ARM 0x40B2
|
||||
|
||||
/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */
|
||||
#define CL_IMPORT_TYPE_HOST_ARM 0x40B3
|
||||
|
||||
/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
|
||||
#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4
|
||||
|
||||
/* Protected DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
|
||||
#define CL_IMPORT_TYPE_PROTECTED_ARM 0x40B5
|
||||
|
||||
/* This extension adds a new function that allows for direct memory import into
|
||||
* OpenCL via the clImportMemoryARM function.
|
||||
*
|
||||
* Memory imported through this interface will be mapped into the device's page
|
||||
* tables directly, providing zero copy access. It will never fall back to copy
|
||||
* operations and aliased buffers.
|
||||
*
|
||||
* Types of memory supported for import are specified as additional extension
|
||||
* strings.
|
||||
*
|
||||
* This extension produces cl_mem allocations which are compatible with all other
|
||||
* users of cl_mem in the standard API.
|
||||
*
|
||||
* This extension maps pages with the same properties as the normal buffer creation
|
||||
* function clCreateBuffer.
|
||||
*/
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clImportMemoryARM( cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_import_properties_arm *properties,
|
||||
void *memory,
|
||||
size_t size,
|
||||
cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/******************************************
|
||||
* cl_arm_shared_virtual_memory extension *
|
||||
******************************************/
|
||||
#define cl_arm_shared_virtual_memory 1
|
||||
|
||||
/* Used by clGetDeviceInfo */
|
||||
#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6
|
||||
|
||||
/* Used by clGetMemObjectInfo */
|
||||
#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7
|
||||
|
||||
/* Used by clSetKernelExecInfoARM: */
|
||||
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8
|
||||
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9
|
||||
|
||||
/* To be used by clGetEventInfo: */
|
||||
#define CL_COMMAND_SVM_FREE_ARM 0x40BA
|
||||
#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB
|
||||
#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC
|
||||
#define CL_COMMAND_SVM_MAP_ARM 0x40BD
|
||||
#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE
|
||||
|
||||
/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */
|
||||
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0)
|
||||
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1)
|
||||
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2)
|
||||
#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3)
|
||||
|
||||
/* Flag values used by clSVMAllocARM: */
|
||||
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10)
|
||||
#define CL_MEM_SVM_ATOMICS_ARM (1 << 11)
|
||||
|
||||
typedef cl_bitfield cl_svm_mem_flags_arm;
|
||||
typedef cl_uint cl_kernel_exec_info_arm;
|
||||
typedef cl_bitfield cl_device_svm_capabilities_arm;
|
||||
|
||||
extern CL_API_ENTRY void * CL_API_CALL
|
||||
clSVMAllocARM(cl_context context,
|
||||
cl_svm_mem_flags_arm flags,
|
||||
size_t size,
|
||||
cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY void CL_API_CALL
|
||||
clSVMFreeARM(cl_context context,
|
||||
void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMFreeARM(cl_command_queue command_queue,
|
||||
cl_uint num_svm_pointers,
|
||||
void * svm_pointers[],
|
||||
void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,
|
||||
cl_uint num_svm_pointers,
|
||||
void * svm_pointers[],
|
||||
void * user_data),
|
||||
void * user_data,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMMemcpyARM(cl_command_queue command_queue,
|
||||
cl_bool blocking_copy,
|
||||
void * dst_ptr,
|
||||
const void * src_ptr,
|
||||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMMemFillARM(cl_command_queue command_queue,
|
||||
void * svm_ptr,
|
||||
const void * pattern,
|
||||
size_t pattern_size,
|
||||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMMapARM(cl_command_queue command_queue,
|
||||
cl_bool blocking_map,
|
||||
cl_map_flags flags,
|
||||
void * svm_ptr,
|
||||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMUnmapARM(cl_command_queue command_queue,
|
||||
void * svm_ptr,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clSetKernelArgSVMPointerARM(cl_kernel kernel,
|
||||
cl_uint arg_index,
|
||||
const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clSetKernelExecInfoARM(cl_kernel kernel,
|
||||
cl_kernel_exec_info_arm param_name,
|
||||
size_t param_value_size,
|
||||
const void * param_value) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
/********************************
|
||||
* cl_arm_get_core_id extension *
|
||||
********************************/
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
|
||||
#define cl_arm_get_core_id 1
|
||||
|
||||
/* Device info property for bitfield of cores present */
|
||||
#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF
|
||||
|
||||
#endif /* CL_VERSION_1_2 */
|
||||
|
||||
/*********************************
|
||||
* cl_arm_job_slot_selection
|
||||
*********************************/
|
||||
|
||||
#define cl_arm_job_slot_selection 1
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_JOB_SLOTS_ARM 0x41E0
|
||||
|
||||
/* cl_command_queue_properties */
|
||||
#define CL_QUEUE_JOB_SLOT_ARM 0x41E1
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __CL_EXT_H */
|
423
benchmarks/new_opencl/include/CL/cl_ext_intel.h
Normal file
423
benchmarks/new_opencl/include/CL/cl_ext_intel.h
Normal file
|
@ -0,0 +1,423 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
/*****************************************************************************\
|
||||
|
||||
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
|
||||
|
||||
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
||||
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
File Name: cl_ext_intel.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Notes:
|
||||
|
||||
\*****************************************************************************/
|
||||
|
||||
#ifndef __CL_EXT_INTEL_H
|
||||
#define __CL_EXT_INTEL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/***************************************
|
||||
* cl_intel_thread_local_exec extension *
|
||||
****************************************/
|
||||
|
||||
#define cl_intel_thread_local_exec 1
|
||||
|
||||
#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31)
|
||||
|
||||
/***********************************************
|
||||
* cl_intel_device_partition_by_names extension *
|
||||
************************************************/
|
||||
|
||||
#define cl_intel_device_partition_by_names 1
|
||||
|
||||
#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052
|
||||
#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1
|
||||
|
||||
/************************************************
|
||||
* cl_intel_accelerator extension *
|
||||
* cl_intel_motion_estimation extension *
|
||||
* cl_intel_advanced_motion_estimation extension *
|
||||
*************************************************/
|
||||
|
||||
#define cl_intel_accelerator 1
|
||||
#define cl_intel_motion_estimation 1
|
||||
#define cl_intel_advanced_motion_estimation 1
|
||||
|
||||
typedef struct _cl_accelerator_intel* cl_accelerator_intel;
|
||||
typedef cl_uint cl_accelerator_type_intel;
|
||||
typedef cl_uint cl_accelerator_info_intel;
|
||||
|
||||
typedef struct _cl_motion_estimation_desc_intel {
|
||||
cl_uint mb_block_type;
|
||||
cl_uint subpixel_mode;
|
||||
cl_uint sad_adjust_mode;
|
||||
cl_uint search_path_type;
|
||||
} cl_motion_estimation_desc_intel;
|
||||
|
||||
/* error codes */
|
||||
#define CL_INVALID_ACCELERATOR_INTEL -1094
|
||||
#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095
|
||||
#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096
|
||||
#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097
|
||||
|
||||
/* cl_accelerator_type_intel */
|
||||
#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0
|
||||
|
||||
/* cl_accelerator_info_intel */
|
||||
#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090
|
||||
#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091
|
||||
#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092
|
||||
#define CL_ACCELERATOR_TYPE_INTEL 0x4093
|
||||
|
||||
/* cl_motion_detect_desc_intel flags */
|
||||
#define CL_ME_MB_TYPE_16x16_INTEL 0x0
|
||||
#define CL_ME_MB_TYPE_8x8_INTEL 0x1
|
||||
#define CL_ME_MB_TYPE_4x4_INTEL 0x2
|
||||
|
||||
#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
|
||||
#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
|
||||
#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2
|
||||
|
||||
#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
|
||||
#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1
|
||||
|
||||
#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0
|
||||
#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1
|
||||
#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5
|
||||
|
||||
#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0
|
||||
#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1
|
||||
#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2
|
||||
#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4
|
||||
|
||||
#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1
|
||||
#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2
|
||||
#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3
|
||||
|
||||
#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16
|
||||
#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21
|
||||
#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32
|
||||
#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43
|
||||
#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48
|
||||
|
||||
#define CL_ME_COST_PENALTY_NONE_INTEL 0x0
|
||||
#define CL_ME_COST_PENALTY_LOW_INTEL 0x1
|
||||
#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2
|
||||
#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3
|
||||
|
||||
#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0
|
||||
#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1
|
||||
#define CL_ME_COST_PRECISION_PEL_INTEL 0x2
|
||||
#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3
|
||||
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
|
||||
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
|
||||
|
||||
#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
|
||||
#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
|
||||
#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_ME_VERSION_INTEL 0x407E
|
||||
|
||||
#define CL_ME_VERSION_LEGACY_INTEL 0x0
|
||||
#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1
|
||||
#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2
|
||||
|
||||
extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
|
||||
clCreateAcceleratorINTEL(
|
||||
cl_context context,
|
||||
cl_accelerator_type_intel accelerator_type,
|
||||
size_t descriptor_size,
|
||||
const void* descriptor,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_accelerator_type_intel accelerator_type,
|
||||
size_t descriptor_size,
|
||||
const void* descriptor,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetAcceleratorInfoINTEL(
|
||||
cl_accelerator_intel accelerator,
|
||||
cl_accelerator_info_intel param_name,
|
||||
size_t param_value_size,
|
||||
void* param_value,
|
||||
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
|
||||
cl_accelerator_intel accelerator,
|
||||
cl_accelerator_info_intel param_name,
|
||||
size_t param_value_size,
|
||||
void* param_value,
|
||||
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clRetainAcceleratorINTEL(
|
||||
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
|
||||
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clReleaseAcceleratorINTEL(
|
||||
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
|
||||
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
/******************************************
|
||||
* cl_intel_simultaneous_sharing extension *
|
||||
*******************************************/
|
||||
|
||||
#define cl_intel_simultaneous_sharing 1
|
||||
|
||||
#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104
|
||||
#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105
|
||||
|
||||
/***********************************
|
||||
* cl_intel_egl_image_yuv extension *
|
||||
************************************/
|
||||
|
||||
#define cl_intel_egl_image_yuv 1
|
||||
|
||||
#define CL_EGL_YUV_PLANE_INTEL 0x4107
|
||||
|
||||
/********************************
|
||||
* cl_intel_packed_yuv extension *
|
||||
*********************************/
|
||||
|
||||
#define cl_intel_packed_yuv 1
|
||||
|
||||
#define CL_YUYV_INTEL 0x4076
|
||||
#define CL_UYVY_INTEL 0x4077
|
||||
#define CL_YVYU_INTEL 0x4078
|
||||
#define CL_VYUY_INTEL 0x4079
|
||||
|
||||
/********************************************
|
||||
* cl_intel_required_subgroup_size extension *
|
||||
*********************************************/
|
||||
|
||||
#define cl_intel_required_subgroup_size 1
|
||||
|
||||
#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108
|
||||
#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109
|
||||
#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A
|
||||
|
||||
/****************************************
|
||||
* cl_intel_driver_diagnostics extension *
|
||||
*****************************************/
|
||||
|
||||
#define cl_intel_driver_diagnostics 1
|
||||
|
||||
typedef cl_uint cl_diagnostics_verbose_level;
|
||||
|
||||
#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106
|
||||
|
||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff )
|
||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 )
|
||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 )
|
||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 )
|
||||
|
||||
/********************************
|
||||
* cl_intel_planar_yuv extension *
|
||||
*********************************/
|
||||
|
||||
#define CL_NV12_INTEL 0x410E
|
||||
|
||||
#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 )
|
||||
#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 )
|
||||
|
||||
#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E
|
||||
#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F
|
||||
|
||||
/*******************************************************
|
||||
* cl_intel_device_side_avc_motion_estimation extension *
|
||||
********************************************************/
|
||||
|
||||
#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B
|
||||
#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
|
||||
#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D
|
||||
|
||||
#define CL_AVC_ME_VERSION_0_INTEL 0x0; // No support.
|
||||
#define CL_AVC_ME_VERSION_1_INTEL 0x1; // First supported version.
|
||||
|
||||
#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0
|
||||
#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1
|
||||
#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2
|
||||
#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_MINOR_8x8_INTEL 0x0
|
||||
#define CL_AVC_ME_MINOR_8x4_INTEL 0x1
|
||||
#define CL_AVC_ME_MINOR_4x8_INTEL 0x2
|
||||
#define CL_AVC_ME_MINOR_4x4_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0
|
||||
#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
|
||||
#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
|
||||
|
||||
#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
|
||||
#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
|
||||
#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
|
||||
#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
|
||||
#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
|
||||
#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
|
||||
#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
|
||||
#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
|
||||
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa
|
||||
|
||||
#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
|
||||
#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
|
||||
|
||||
#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
|
||||
#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
|
||||
#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
|
||||
#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
|
||||
#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
|
||||
#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
|
||||
|
||||
#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
|
||||
#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
|
||||
#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
|
||||
#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
|
||||
|
||||
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
|
||||
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
|
||||
|
||||
#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 )
|
||||
|
||||
#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
|
||||
#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
|
||||
|
||||
#define CL_AVC_ME_INTRA_16x16_INTEL 0x0
|
||||
#define CL_AVC_ME_INTRA_8x8_INTEL 0x1
|
||||
#define CL_AVC_ME_INTRA_4x4_INTEL 0x2
|
||||
|
||||
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
|
||||
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
|
||||
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
|
||||
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
|
||||
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
|
||||
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
|
||||
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
|
||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
|
||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
|
||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1
|
||||
#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2
|
||||
#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
|
||||
#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
|
||||
#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
|
||||
|
||||
#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
|
||||
#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __CL_EXT_INTEL_H */
|
171
benchmarks/new_opencl/include/CL/cl_gl.h
Normal file
171
benchmarks/new_opencl/include/CL/cl_gl.h
Normal file
|
@ -0,0 +1,171 @@
|
|||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_GL_H
|
||||
#define __OPENCL_CL_GL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef cl_uint cl_gl_object_type;
|
||||
typedef cl_uint cl_gl_texture_info;
|
||||
typedef cl_uint cl_gl_platform_info;
|
||||
typedef struct __GLsync *cl_GLsync;
|
||||
|
||||
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
|
||||
#define CL_GL_OBJECT_BUFFER 0x2000
|
||||
#define CL_GL_OBJECT_TEXTURE2D 0x2001
|
||||
#define CL_GL_OBJECT_TEXTURE3D 0x2002
|
||||
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
|
||||
#ifdef CL_VERSION_1_2
|
||||
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
|
||||
#define CL_GL_OBJECT_TEXTURE1D 0x200F
|
||||
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
|
||||
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
|
||||
#endif
|
||||
|
||||
/* cl_gl_texture_info */
|
||||
#define CL_GL_TEXTURE_TARGET 0x2004
|
||||
#define CL_GL_MIPMAP_LEVEL 0x2005
|
||||
#ifdef CL_VERSION_1_2
|
||||
#define CL_GL_NUM_SAMPLES 0x2012
|
||||
#endif
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLBuffer(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLuint bufobj,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#endif
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLRenderbuffer(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLuint renderbuffer,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLObjectInfo(cl_mem memobj,
|
||||
cl_gl_object_type * gl_object_type,
|
||||
cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLTextureInfo(cl_mem memobj,
|
||||
cl_gl_texture_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Deprecated OpenCL 1.1 APIs */
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture2D(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture3D(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
/* cl_khr_gl_sharing extension */
|
||||
|
||||
#define cl_khr_gl_sharing 1
|
||||
|
||||
typedef cl_uint cl_gl_context_info;
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
|
||||
|
||||
/* cl_gl_context_info */
|
||||
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
|
||||
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
|
||||
|
||||
/* Additional cl_context_properties */
|
||||
#define CL_GL_CONTEXT_KHR 0x2008
|
||||
#define CL_EGL_DISPLAY_KHR 0x2009
|
||||
#define CL_GLX_DISPLAY_KHR 0x200A
|
||||
#define CL_WGL_HDC_KHR 0x200B
|
||||
#define CL_CGL_SHAREGROUP_KHR 0x200C
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLContextInfoKHR(const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||
const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_H */
|
52
benchmarks/new_opencl/include/CL/cl_gl_ext.h
Normal file
52
benchmarks/new_opencl/include/CL/cl_gl_ext.h
Normal file
|
@ -0,0 +1,52 @@
|
|||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_GL_EXT_H
|
||||
#define __OPENCL_CL_GL_EXT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <CL/cl_gl.h>
|
||||
|
||||
/*
|
||||
* cl_khr_gl_event extension
|
||||
*/
|
||||
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromGLsyncKHR(cl_context context,
|
||||
cl_GLsync cl_GLsync,
|
||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_EXT_H */
|
1384
benchmarks/new_opencl/include/CL/cl_platform.h
Normal file
1384
benchmarks/new_opencl/include/CL/cl_platform.h
Normal file
File diff suppressed because it is too large
Load diff
172
benchmarks/new_opencl/include/CL/cl_va_api_media_sharing_intel.h
Normal file
172
benchmarks/new_opencl/include/CL/cl_va_api_media_sharing_intel.h
Normal file
|
@ -0,0 +1,172 @@
|
|||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
/*****************************************************************************\
|
||||
|
||||
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
|
||||
|
||||
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
||||
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
File Name: cl_va_api_media_sharing_intel.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Notes:
|
||||
|
||||
\*****************************************************************************/
|
||||
|
||||
|
||||
#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
||||
#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
#include <va/va.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************
|
||||
* cl_intel_va_api_media_sharing extension *
|
||||
*******************************************/
|
||||
|
||||
#define cl_intel_va_api_media_sharing 1
|
||||
|
||||
/* error codes */
|
||||
#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
|
||||
#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
|
||||
#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
|
||||
#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
|
||||
|
||||
/* cl_va_api_device_source_intel */
|
||||
#define CL_VA_API_DISPLAY_INTEL 0x4094
|
||||
|
||||
/* cl_va_api_device_set_intel */
|
||||
#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
|
||||
#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
|
||||
#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
|
||||
|
||||
typedef cl_uint cl_va_api_device_source_intel;
|
||||
typedef cl_uint cl_va_api_device_set_intel;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
|
||||
cl_platform_id platform,
|
||||
cl_va_api_device_source_intel media_adapter_type,
|
||||
void* media_adapter,
|
||||
cl_va_api_device_set_intel media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_va_api_device_source_intel media_adapter_type,
|
||||
void* media_adapter,
|
||||
cl_va_api_device_set_intel media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromVA_APIMediaSurfaceINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
VASurfaceID* surface,
|
||||
cl_uint plane,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
VASurfaceID* surface,
|
||||
cl_uint plane,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireVA_APIMediaSurfacesINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseVA_APIMediaSurfacesINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */
|
||||
|
86
benchmarks/new_opencl/include/CL/cl_version.h
Normal file
86
benchmarks/new_opencl/include/CL/cl_version.h
Normal file
|
@ -0,0 +1,86 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2018 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __CL_VERSION_H
|
||||
#define __CL_VERSION_H
|
||||
|
||||
/* Detect which version to target */
|
||||
#if !defined(CL_TARGET_OPENCL_VERSION)
|
||||
#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)")
|
||||
#define CL_TARGET_OPENCL_VERSION 220
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION != 100 && \
|
||||
CL_TARGET_OPENCL_VERSION != 110 && \
|
||||
CL_TARGET_OPENCL_VERSION != 120 && \
|
||||
CL_TARGET_OPENCL_VERSION != 200 && \
|
||||
CL_TARGET_OPENCL_VERSION != 210 && \
|
||||
CL_TARGET_OPENCL_VERSION != 220
|
||||
#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220). Defaulting to 220 (OpenCL 2.2)")
|
||||
#undef CL_TARGET_OPENCL_VERSION
|
||||
#define CL_TARGET_OPENCL_VERSION 220
|
||||
#endif
|
||||
|
||||
|
||||
/* OpenCL Version */
|
||||
#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
|
||||
#define CL_VERSION_2_2 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
|
||||
#define CL_VERSION_2_1 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
|
||||
#define CL_VERSION_2_0 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
|
||||
#define CL_VERSION_1_2 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
|
||||
#define CL_VERSION_1_1 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
|
||||
#define CL_VERSION_1_0 1
|
||||
#endif
|
||||
|
||||
/* Allow deprecated APIs for older OpenCL versions. */
|
||||
#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
|
||||
#endif
|
||||
|
||||
#endif /* __CL_VERSION_H */
|
47
benchmarks/new_opencl/include/CL/opencl.h
Normal file
47
benchmarks/new_opencl/include/CL/opencl.h
Normal file
|
@ -0,0 +1,47 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_H
|
||||
#define __OPENCL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_gl.h>
|
||||
#include <CL/cl_gl_ext.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_H */
|
BIN
benchmarks/new_opencl/lib/libOpenCL.so
Normal file
BIN
benchmarks/new_opencl/lib/libOpenCL.so
Normal file
Binary file not shown.
BIN
benchmarks/new_opencl/lib/libOpenCL.so.2
Normal file
BIN
benchmarks/new_opencl/lib/libOpenCL.so.2
Normal file
Binary file not shown.
BIN
benchmarks/new_opencl/lib/libOpenCL.so.2.5.0
Normal file
BIN
benchmarks/new_opencl/lib/libOpenCL.so.2.5.0
Normal file
Binary file not shown.
44
benchmarks/new_opencl/nearn/Makefile
Normal file
44
benchmarks/new_opencl/nearn/Makefile
Normal file
|
@ -0,0 +1,44 @@
|
|||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
|
||||
POCLRT_PATH ?= $(wildcard ..)
|
||||
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
|
||||
PROJECT = nearn
|
||||
|
||||
SRCS = main.cc clutils.cpp utils.cpp
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o *.dump .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
33
benchmarks/new_opencl/nearn/README.txt
Executable file
33
benchmarks/new_opencl/nearn/README.txt
Executable file
|
@ -0,0 +1,33 @@
|
|||
The Nearest Neighbor application computes the nearest location to a specific
|
||||
latitude and longitude for a number of hurricanes (data from: http://weather.unisys.com/hurricane/).
|
||||
|
||||
The Makefile may need to be adjusted for different machines, but it was written for Mac OS X and
|
||||
Linux with either NVIDIA or AMD OpenCL SDKs.
|
||||
|
||||
The hurricane data is located in a number of data files that are copied into the working
|
||||
directory by the Makefile. A separate text file lists the names of the data files that
|
||||
will be used, and it is this text file that should be passed to the application (see usage, below).
|
||||
|
||||
Nearest Neighbor Usage
|
||||
|
||||
nearestNeighbor [filename] -r [int] -lat [float] -lng [float] [-hqt] [-p [int] -d [int]]
|
||||
|
||||
example:
|
||||
$ ./nearestNeighbor filelist.txt -r 5 -lat 30 -lng 90
|
||||
|
||||
filename the filename that lists the data input files
|
||||
-r [int] the number of records to return (default: 10)
|
||||
-lat [float] the latitude for nearest neighbors (default: 0)
|
||||
-lng [float] the longitude for nearest neighbors (default: 0)
|
||||
|
||||
-h, --help Display the help file
|
||||
-q Quiet mode. Suppress all text output.
|
||||
-t Print timing information.
|
||||
|
||||
-p [int] Choose the platform (must choose both platform and device)
|
||||
-d [int] Choose the device (must choose both platform and device)
|
||||
|
||||
|
||||
Notes: 1. The filename is required as the first parameter.
|
||||
2. If you declare either the device or the platform,
|
||||
you must declare both.
|
10691
benchmarks/new_opencl/nearn/cane4_0.db
Executable file
10691
benchmarks/new_opencl/nearn/cane4_0.db
Executable file
File diff suppressed because it is too large
Load diff
10691
benchmarks/new_opencl/nearn/cane4_1.db
Executable file
10691
benchmarks/new_opencl/nearn/cane4_1.db
Executable file
File diff suppressed because it is too large
Load diff
10691
benchmarks/new_opencl/nearn/cane4_2.db
Executable file
10691
benchmarks/new_opencl/nearn/cane4_2.db
Executable file
File diff suppressed because it is too large
Load diff
10691
benchmarks/new_opencl/nearn/cane4_3.db
Executable file
10691
benchmarks/new_opencl/nearn/cane4_3.db
Executable file
File diff suppressed because it is too large
Load diff
1443
benchmarks/new_opencl/nearn/clutils.cpp
Executable file
1443
benchmarks/new_opencl/nearn/clutils.cpp
Executable file
File diff suppressed because it is too large
Load diff
281
benchmarks/new_opencl/nearn/clutils.h
Executable file
281
benchmarks/new_opencl/nearn/clutils.h
Executable file
|
@ -0,0 +1,281 @@
|
|||
/****************************************************************************\
|
||||
* Copyright (c) 2011, Advanced Micro Devices, Inc. *
|
||||
* All rights reserved. *
|
||||
* *
|
||||
* Redistribution and use in source and binary forms, with or without *
|
||||
* modification, are permitted provided that the following conditions *
|
||||
* are met: *
|
||||
* *
|
||||
* Redistributions of source code must retain the above copyright notice, *
|
||||
* this list of conditions and the following disclaimer. *
|
||||
* *
|
||||
* Redistributions in binary form must reproduce the above copyright notice, *
|
||||
* this list of conditions and the following disclaimer in the documentation *
|
||||
* and/or other materials provided with the distribution. *
|
||||
* *
|
||||
* Neither the name of the copyright holder nor the names of its contributors *
|
||||
* may be used to endorse or promote products derived from this software *
|
||||
* without specific prior written permission. *
|
||||
* *
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR *
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
|
||||
* *
|
||||
* If you use the software (in whole or in part), you shall adhere to all *
|
||||
* applicable U.S., European, and other export laws, including but not *
|
||||
* limited to the U.S. Export Administration Regulations (EAR), (15 C.F.R. *
|
||||
* Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 *
|
||||
* of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you *
|
||||
* hereby certify that, except pursuant to a license granted by the United *
|
||||
* States Department of Commerce Bureau of Industry and Security or as *
|
||||
* otherwise permitted pursuant to a License Exception under the U.S. Export *
|
||||
* Administration Regulations ("EAR"), you will not (1) export, re-export or *
|
||||
* release to a national of a country in Country Groups D:1, E:1 or E:2 any *
|
||||
* restricted technology, software, or source code you receive hereunder, *
|
||||
* or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
|
||||
* technology or software, if such foreign produced direct product is subject *
|
||||
* to national security controls as identified on the Commerce Control List *
|
||||
*(currently found in Supplement 1 to Part 774 of EAR). For the most current *
|
||||
* Country Group listings, or for additional information about the EAR or *
|
||||
* your obligations under those regulations, please refer to the U.S. Bureau *
|
||||
* of Industry and Securitys website at http://www.bis.doc.gov/. *
|
||||
\****************************************************************************/
|
||||
|
||||
#ifndef __CL_UTILS_H__
|
||||
#define __CL_UTILS_H__
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
// The cl_time type is OS specific
|
||||
#ifdef _WIN32
|
||||
#include <tchar.h>
|
||||
#include <Windows.h>
|
||||
typedef __int64 cl_time;
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
typedef double cl_time;
|
||||
#endif
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Initialization and Cleanup
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Detects platforms and devices, creates context and command queue
|
||||
cl_context cl_init(char devicePreference='\0');
|
||||
|
||||
// Creates a context given a platform and a device
|
||||
cl_context cl_init_context(int platform,int dev,int quiet=0);
|
||||
|
||||
// Releases resources used by clutils
|
||||
void cl_cleanup();
|
||||
|
||||
// Releases a kernel object
|
||||
void cl_freeKernel(cl_kernel kernel);
|
||||
|
||||
// Releases a memory object
|
||||
void cl_freeMem(cl_mem mem);
|
||||
|
||||
// Releases a program object
|
||||
void cl_freeProgram(cl_program program);
|
||||
|
||||
// Returns the global command queue
|
||||
cl_command_queue cl_getCommandQueue();
|
||||
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Synchronization functions
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Performs a clFinish on the command queue
|
||||
void cl_sync();
|
||||
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Memory allocation
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Allocates a regular buffer on the device
|
||||
cl_mem cl_allocBuffer(size_t mem_size,
|
||||
cl_mem_flags flags = CL_MEM_READ_WRITE);
|
||||
|
||||
// XXX I don't think this does exactly what we want it to do
|
||||
// Allocates a read-only buffer and transfers the data
|
||||
cl_mem cl_allocBufferConst(size_t mem_size, void* host_ptr);
|
||||
|
||||
// Allocates pinned memory on the host
|
||||
cl_mem cl_allocBufferPinned(size_t mem_size);
|
||||
|
||||
// Allocates an image on the device
|
||||
cl_mem cl_allocImage(size_t height, size_t width, char type,
|
||||
cl_mem_flags flags = CL_MEM_READ_WRITE);
|
||||
|
||||
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Data transfers
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Copies a buffer from the device to pinned memory on the host and
|
||||
// maps it so it can be read
|
||||
void* cl_copyAndMapBuffer(cl_mem dst, cl_mem src, size_t size);
|
||||
|
||||
// Copies from one buffer to another
|
||||
void cl_copyBufferToBuffer(cl_mem dst, cl_mem src, size_t size);
|
||||
|
||||
// Copies data to a buffer on the device
|
||||
void cl_copyBufferToDevice(cl_mem dst, void *src, size_t mem_size,
|
||||
cl_bool blocking = CL_TRUE);
|
||||
|
||||
// Copies data to an image on the device
|
||||
void cl_copyImageToDevice(cl_mem dst, void* src, size_t height, size_t width);
|
||||
|
||||
// Copies an image from the device to the host
|
||||
void cl_copyImageToHost(void* dst, cl_mem src, size_t height, size_t width);
|
||||
|
||||
// Copies data from a device buffer to the host
|
||||
void cl_copyBufferToHost(void *dst, cl_mem src, size_t mem_size,
|
||||
cl_bool blocking = CL_TRUE);
|
||||
|
||||
// Copies data from a buffer on the device to an image on the device
|
||||
void cl_copyBufferToImage(cl_mem src, cl_mem dst, int height, int width);
|
||||
|
||||
// Maps a buffer
|
||||
void* cl_mapBuffer(cl_mem mem, size_t mem_size, cl_mem_flags flags);
|
||||
|
||||
// Unmaps a buffer
|
||||
void cl_unmapBuffer(cl_mem mem, void *ptr);
|
||||
|
||||
// Writes data to a zero-copy buffer on the device
|
||||
void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size);
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Program and kernels
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Compiles a program
|
||||
cl_program cl_compileProgram(char* kernelPath, char* compileoptions,
|
||||
bool verboseoptions = 0);
|
||||
|
||||
// Creates a kernel
|
||||
cl_kernel cl_createKernel(cl_program program, const char* kernelName);
|
||||
|
||||
|
||||
// Sets a kernel argument
|
||||
void cl_setKernelArg(cl_kernel kernel, unsigned int index, size_t size,
|
||||
void* data);
|
||||
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Profiling/events
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Computes the execution time (start to end) for an event
|
||||
double cl_computeExecTime(cl_event);
|
||||
|
||||
// Compute the elapsed time between two CPU timer values
|
||||
double cl_computeTime(cl_time start, cl_time end);
|
||||
|
||||
// Creates an event from CPU timers
|
||||
void cl_createUserEvent(cl_time start, cl_time end, char* desc);
|
||||
|
||||
// Disable logging of events
|
||||
void cl_disableEvents();
|
||||
|
||||
// Enable logging of events
|
||||
void cl_enableEvents();
|
||||
|
||||
// Query the current system time
|
||||
void cl_getTime(cl_time* time);
|
||||
|
||||
// Calls a function which prints events to the terminal
|
||||
void cl_printEvents();
|
||||
|
||||
// Calls a function which writes the events to a file
|
||||
void cl_writeEventsToFile(char* path);
|
||||
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Error handling
|
||||
//-------------------------------------------------------
|
||||
|
||||
// Compare a status value to CL_SUCCESS and optionally exit on error
|
||||
int cl_errChk(const cl_int status, const char *msg, bool exitOnErr);
|
||||
|
||||
// Queries the supported image formats for the device and prints
|
||||
// them to the screen
|
||||
void printSupportedImageFormats();
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Platform and device information
|
||||
//-------------------------------------------------------
|
||||
|
||||
bool cl_deviceIsAMD(cl_device_id dev=NULL);
|
||||
bool cl_deviceIsNVIDIA(cl_device_id dev=NULL);
|
||||
bool cl_platformIsNVIDIA(cl_platform_id plat=NULL);
|
||||
char* cl_getDeviceDriverVersion(cl_device_id dev=NULL);
|
||||
char* cl_getDeviceName(cl_device_id dev=NULL);
|
||||
char* cl_getDeviceVendor(cl_device_id dev=NULL);
|
||||
char* cl_getDeviceVersion(cl_device_id dev=NULL);
|
||||
char* cl_getPlatformName(cl_platform_id platform);
|
||||
char* cl_getPlatformVendor(cl_platform_id platform);
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Utility functions
|
||||
//-------------------------------------------------------
|
||||
|
||||
char* catStringWithInt(const char* str, int integer);
|
||||
|
||||
char* itoa_portable(int value, char* result, int base);
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Data types
|
||||
//-------------------------------------------------------
|
||||
typedef struct{
|
||||
int x;
|
||||
int y;
|
||||
} int2;
|
||||
|
||||
typedef struct{
|
||||
float x;
|
||||
float y;
|
||||
}float2;
|
||||
|
||||
typedef struct{
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
float w;
|
||||
}float4;
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Defines
|
||||
//-------------------------------------------------------
|
||||
|
||||
#define MAX_ERR_VAL 64
|
||||
|
||||
#define NUM_PROGRAMS 7
|
||||
|
||||
#define NUM_KERNELS 13
|
||||
#define KERNEL_INIT_DET 0
|
||||
#define KERNEL_BUILD_DET 1
|
||||
#define KERNEL_SURF_DESC 2
|
||||
#define KERNEL_NORM_DESC 3
|
||||
#define KERNEL_NON_MAX_SUP 4
|
||||
#define KERNEL_GET_ORIENT1 5
|
||||
#define KERNEL_GET_ORIENT2 6
|
||||
#define KERNEL_NN 7
|
||||
#define KERNEL_SCAN 8
|
||||
#define KERNEL_SCAN4 9
|
||||
#define KERNEL_TRANSPOSE 10
|
||||
#define KERNEL_SCANIMAGE 11
|
||||
#define KERNEL_TRANSPOSEIMAGE 12
|
||||
|
||||
#endif
|
4
benchmarks/new_opencl/nearn/filelist.txt
Executable file
4
benchmarks/new_opencl/nearn/filelist.txt
Executable file
|
@ -0,0 +1,4 @@
|
|||
cane4_0.db
|
||||
cane4_1.db
|
||||
cane4_2.db
|
||||
cane4_3.db
|
29
benchmarks/new_opencl/nearn/ipoint.h
Executable file
29
benchmarks/new_opencl/nearn/ipoint.h
Executable file
|
@ -0,0 +1,29 @@
|
|||
/***********************************************************
|
||||
* --- OpenSURF --- *
|
||||
* This library is distributed under the GNU GPL. Please *
|
||||
* contact chris.evans@irisys.co.uk for more information. *
|
||||
* *
|
||||
* C. Evans, Research Into Robust Visual Features, *
|
||||
* MSc University of Bristol, 2008. *
|
||||
* *
|
||||
************************************************************/
|
||||
|
||||
#ifndef IPOINT_H
|
||||
#define IPOINT_H
|
||||
|
||||
#include <vector>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
|
||||
//-------------------------------------------------------
|
||||
typedef struct{
|
||||
int x;
|
||||
int y;
|
||||
float descriptor[64];
|
||||
} Ipoint;
|
||||
|
||||
//-------------------------------------------------------
|
||||
|
||||
typedef std::vector<Ipoint> IpVec;
|
||||
#endif
|
22
benchmarks/new_opencl/nearn/kernel.cl
Executable file
22
benchmarks/new_opencl/nearn/kernel.cl
Executable file
|
@ -0,0 +1,22 @@
|
|||
//#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
|
||||
|
||||
typedef struct latLong
|
||||
{
|
||||
float lat;
|
||||
float lng;
|
||||
} LatLong;
|
||||
|
||||
__kernel void NearestNeighbor(__global LatLong *d_locations,
|
||||
__global float *d_distances,
|
||||
const int numRecords,
|
||||
const float lat,
|
||||
const float lng) {
|
||||
int globalId = get_global_id(0);
|
||||
|
||||
if (globalId < numRecords) {
|
||||
__global LatLong *latLong = d_locations+globalId;
|
||||
|
||||
__global float *dist=d_distances+globalId;
|
||||
*dist = (float)sqrt((lat-latLong->lat)*(lat-latLong->lat)+(lng-latLong->lng)*(lng-latLong->lng));
|
||||
}
|
||||
}
|
BIN
benchmarks/new_opencl/nearn/kernel.pocl
Normal file
BIN
benchmarks/new_opencl/nearn/kernel.pocl
Normal file
Binary file not shown.
346
benchmarks/new_opencl/nearn/main.cc
Executable file
346
benchmarks/new_opencl/nearn/main.cc
Executable file
|
@ -0,0 +1,346 @@
|
|||
#ifndef __NEAREST_NEIGHBOR__
|
||||
#define __NEAREST_NEIGHBOR__
|
||||
|
||||
#include "nearestNeighbor.h"
|
||||
|
||||
cl_context context = NULL;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
std::vector<Record> records;
|
||||
float *recordDistances;
|
||||
// LatLong locations[REC_WINDOW];
|
||||
std::vector<LatLong> locations;
|
||||
int i;
|
||||
// args
|
||||
char filename[100];
|
||||
int resultsCount = 5, quiet = 0, timing = 0, platform = -1, device = -1;
|
||||
float lat = 30, lng = 90;
|
||||
|
||||
// parse command line
|
||||
if (parseCommandline(argc, argv, filename, &resultsCount, &lat, &lng, &quiet,
|
||||
&timing, &platform, &device)) {
|
||||
printUsage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int numRecords = loadData(filename, records, locations);
|
||||
|
||||
// for(i=0;i<numRecords;i++)
|
||||
// printf("%s, %f,
|
||||
// %f\n",(records[i].recString),locations[i].lat,locations[i].lng);
|
||||
|
||||
printf("Number of records: %d\n", numRecords);
|
||||
printf("Finding the %d closest neighbors.\n", resultsCount);
|
||||
|
||||
if (resultsCount > numRecords)
|
||||
resultsCount = numRecords;
|
||||
|
||||
context = cl_init_context(platform, device, quiet);
|
||||
|
||||
recordDistances = OpenClFindNearestNeighbors(context, numRecords, locations,
|
||||
lat, lng, timing);
|
||||
|
||||
// find the resultsCount least distances
|
||||
findLowest(records, recordDistances, numRecords, resultsCount);
|
||||
|
||||
// print out results
|
||||
if (!quiet)
|
||||
for (i = 0; i < resultsCount; i++) {
|
||||
printf("%s --> Distance=%f\n", records[i].recString, records[i].distance);
|
||||
}
|
||||
free(recordDistances);
|
||||
return 0;
|
||||
}
|
||||
|
||||
float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
|
||||
std::vector<LatLong> &locations, float lat,
|
||||
float lng, int timing) {
|
||||
|
||||
// 1. set up kernel
|
||||
cl_kernel NN_kernel;
|
||||
cl_int status;
|
||||
cl_program cl_NN_program;
|
||||
cl_NN_program = cl_compileProgram((char *)"nearestNeighbor_kernel.cl", NULL);
|
||||
|
||||
NN_kernel = clCreateKernel(cl_NN_program, "NearestNeighbor", &status);
|
||||
status =
|
||||
cl_errChk(status, (char *)"Error Creating Nearest Neighbor kernel", true);
|
||||
if (status)
|
||||
exit(1);
|
||||
// 2. set up memory on device and send ipts data to device
|
||||
// copy ipts(1,2) to device
|
||||
// also need to alloate memory for the distancePoints
|
||||
cl_mem d_locations;
|
||||
cl_mem d_distances;
|
||||
|
||||
cl_int error = 0;
|
||||
|
||||
d_locations = clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||
sizeof(LatLong) * numRecords, NULL, &error);
|
||||
|
||||
d_distances = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(float) * numRecords, NULL, &error);
|
||||
|
||||
cl_command_queue command_queue = cl_getCommandQueue();
|
||||
cl_event writeEvent, kernelEvent, readEvent;
|
||||
error = clEnqueueWriteBuffer(command_queue, d_locations,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(LatLong) * numRecords, &locations[0], 0,
|
||||
NULL, &writeEvent);
|
||||
|
||||
// 3. send arguments to device
|
||||
cl_int argchk;
|
||||
argchk = clSetKernelArg(NN_kernel, 0, sizeof(cl_mem), (void *)&d_locations);
|
||||
argchk |= clSetKernelArg(NN_kernel, 1, sizeof(cl_mem), (void *)&d_distances);
|
||||
argchk |= clSetKernelArg(NN_kernel, 2, sizeof(int), (void *)&numRecords);
|
||||
argchk |= clSetKernelArg(NN_kernel, 3, sizeof(float), (void *)&lat);
|
||||
argchk |= clSetKernelArg(NN_kernel, 4, sizeof(float), (void *)&lng);
|
||||
|
||||
cl_errChk(argchk, "ERROR in Setting Nearest Neighbor kernel args", true);
|
||||
|
||||
// 4. enqueue kernel
|
||||
size_t globalWorkSize[1];
|
||||
globalWorkSize[0] = numRecords;
|
||||
if (numRecords % 64)
|
||||
globalWorkSize[0] += 64 - (numRecords % 64);
|
||||
// printf("Global Work Size: %zu\n",globalWorkSize[0]);
|
||||
|
||||
error = clEnqueueNDRangeKernel(command_queue, NN_kernel, 1, 0, globalWorkSize,
|
||||
NULL, 0, NULL, &kernelEvent);
|
||||
|
||||
cl_errChk(error, "ERROR in Executing Kernel NearestNeighbor", true);
|
||||
|
||||
// 5. transfer data off of device
|
||||
|
||||
// create distances std::vector
|
||||
float *distances = (float *)malloc(sizeof(float) * numRecords);
|
||||
|
||||
error = clEnqueueReadBuffer(command_queue, d_distances,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * numRecords, distances, 0, NULL,
|
||||
&readEvent);
|
||||
|
||||
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
|
||||
if (timing) {
|
||||
clFinish(command_queue);
|
||||
cl_ulong eventStart, eventEnd, totalTime = 0;
|
||||
printf("# Records\tWrite(s) [size]\t\tKernel(s)\tRead(s) "
|
||||
"[size]\t\tTotal(s)\n");
|
||||
printf("%d \t", numRecords);
|
||||
// Write Buffer
|
||||
error = clGetEventProfilingInfo(writeEvent, CL_PROFILING_COMMAND_START,
|
||||
sizeof(cl_ulong), &eventStart, NULL);
|
||||
cl_errChk(error, "ERROR in Event Profiling (Write Start)", true);
|
||||
error = clGetEventProfilingInfo(writeEvent, CL_PROFILING_COMMAND_END,
|
||||
sizeof(cl_ulong), &eventEnd, NULL);
|
||||
cl_errChk(error, "ERROR in Event Profiling (Write End)", true);
|
||||
|
||||
printf("%f [%.2fMB]\t", (float)((eventEnd - eventStart) / 1e9),
|
||||
(float)((sizeof(LatLong) * numRecords) / 1e6));
|
||||
totalTime += eventEnd - eventStart;
|
||||
// Kernel
|
||||
error = clGetEventProfilingInfo(kernelEvent, CL_PROFILING_COMMAND_START,
|
||||
sizeof(cl_ulong), &eventStart, NULL);
|
||||
cl_errChk(error, "ERROR in Event Profiling (Kernel Start)", true);
|
||||
error = clGetEventProfilingInfo(kernelEvent, CL_PROFILING_COMMAND_END,
|
||||
sizeof(cl_ulong), &eventEnd, NULL);
|
||||
cl_errChk(error, "ERROR in Event Profiling (Kernel End)", true);
|
||||
|
||||
printf("%f\t", (float)((eventEnd - eventStart) / 1e9));
|
||||
totalTime += eventEnd - eventStart;
|
||||
// Read Buffer
|
||||
error = clGetEventProfilingInfo(readEvent, CL_PROFILING_COMMAND_START,
|
||||
sizeof(cl_ulong), &eventStart, NULL);
|
||||
cl_errChk(error, "ERROR in Event Profiling (Read Start)", true);
|
||||
error = clGetEventProfilingInfo(readEvent, CL_PROFILING_COMMAND_END,
|
||||
sizeof(cl_ulong), &eventEnd, NULL);
|
||||
cl_errChk(error, "ERROR in Event Profiling (Read End)", true);
|
||||
|
||||
printf("%f [%.2fMB]\t", (float)((eventEnd - eventStart) / 1e9),
|
||||
(float)((sizeof(float) * numRecords) / 1e6));
|
||||
totalTime += eventEnd - eventStart;
|
||||
|
||||
printf("%f\n\n", (float)(totalTime / 1e9));
|
||||
}
|
||||
// 6. return finalized data and release buffers
|
||||
clReleaseMemObject(d_locations);
|
||||
clReleaseMemObject(d_distances);
|
||||
return distances;
|
||||
}
|
||||
|
||||
int loadData(char *filename, std::vector<Record> &records,
|
||||
std::vector<LatLong> &locations) {
|
||||
FILE *flist, *fp;
|
||||
int i = 0;
|
||||
char dbname[64];
|
||||
int recNum = 0;
|
||||
|
||||
/**Main processing **/
|
||||
|
||||
int q = 0;
|
||||
|
||||
flist = fopen(filename, "r");
|
||||
while (!feof(flist)) {
|
||||
/**
|
||||
* Read in REC_WINDOW records of length REC_LENGTH
|
||||
* If this is the last file in the filelist, then done
|
||||
* else open next file to be read next iteration
|
||||
*/
|
||||
if (fscanf(flist, "%s\n", dbname) != 1) {
|
||||
printf("error reading filelist\n");
|
||||
exit(0);
|
||||
}
|
||||
printf("loading db: %s\n", dbname);
|
||||
fp = fopen(dbname, "r");
|
||||
if (!fp) {
|
||||
printf("error opening a db\n");
|
||||
exit(1);
|
||||
}
|
||||
// read each record
|
||||
while (!feof(fp)) {
|
||||
Record record;
|
||||
LatLong latLong;
|
||||
fgets(record.recString, 49, fp);
|
||||
fgetc(fp); // newline
|
||||
if (feof(fp))
|
||||
break;
|
||||
|
||||
// parse for lat and long
|
||||
char substr[6];
|
||||
|
||||
for (i = 0; i < 5; i++)
|
||||
substr[i] = *(record.recString + i + 28);
|
||||
substr[5] = '\0';
|
||||
latLong.lat = atof(substr);
|
||||
|
||||
for (i = 0; i < 5; i++)
|
||||
substr[i] = *(record.recString + i + 33);
|
||||
substr[5] = '\0';
|
||||
latLong.lng = atof(substr);
|
||||
|
||||
locations.push_back(latLong);
|
||||
records.push_back(record);
|
||||
recNum++;
|
||||
if (0 == (recNum % 500))
|
||||
break;
|
||||
}
|
||||
|
||||
if (++q == 3)
|
||||
break;
|
||||
fclose(fp);
|
||||
}
|
||||
fclose(flist);
|
||||
return recNum;
|
||||
}
|
||||
|
||||
void findLowest(std::vector<Record> &records, float *distances, int numRecords,
|
||||
int topN) {
|
||||
int i, j;
|
||||
float val;
|
||||
int minLoc;
|
||||
Record *tempRec;
|
||||
float tempDist;
|
||||
|
||||
for (i = 0; i < topN; i++) {
|
||||
minLoc = i;
|
||||
for (j = i; j < numRecords; j++) {
|
||||
val = distances[j];
|
||||
if (val < distances[minLoc])
|
||||
minLoc = j;
|
||||
}
|
||||
// swap locations and distances
|
||||
tempRec = &records[i];
|
||||
records[i] = records[minLoc];
|
||||
records[minLoc] = *tempRec;
|
||||
|
||||
tempDist = distances[i];
|
||||
distances[i] = distances[minLoc];
|
||||
distances[minLoc] = tempDist;
|
||||
|
||||
// add distance to the min we just found
|
||||
records[i].distance = distances[i];
|
||||
}
|
||||
}
|
||||
|
||||
int parseCommandline(int argc, char *argv[], char *filename, int *r, float *lat,
|
||||
float *lng, int *q, int *t, int *p, int *d) {
|
||||
int i;
|
||||
// if (argc < 2) return 1; // error
|
||||
strncpy(filename, "filelist.txt", 100);
|
||||
char flag;
|
||||
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (argv[i][0] == '-') { // flag
|
||||
flag = argv[i][1];
|
||||
switch (flag) {
|
||||
case 'r': // number of results
|
||||
i++;
|
||||
*r = atoi(argv[i]);
|
||||
break;
|
||||
case 'l': // lat or lng
|
||||
if (argv[i][2] == 'a') { // lat
|
||||
*lat = atof(argv[i + 1]);
|
||||
} else { // lng
|
||||
*lng = atof(argv[i + 1]);
|
||||
}
|
||||
i++;
|
||||
break;
|
||||
case 'h': // help
|
||||
return 1;
|
||||
break;
|
||||
case 'q': // quiet
|
||||
*q = 1;
|
||||
break;
|
||||
case 't': // timing
|
||||
*t = 1;
|
||||
break;
|
||||
case 'p': // platform
|
||||
i++;
|
||||
*p = atoi(argv[i]);
|
||||
break;
|
||||
case 'd': // device
|
||||
i++;
|
||||
*d = atoi(argv[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((*d >= 0 && *p < 0) ||
|
||||
(*p >= 0 &&
|
||||
*d < 0)) // both p and d must be specified if either are specified
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void printUsage() {
|
||||
printf("Nearest Neighbor Usage\n");
|
||||
printf("\n");
|
||||
printf("nearestNeighbor [filename] -r [int] -lat [float] -lng [float] [-hqt] "
|
||||
"[-p [int] -d [int]]\n");
|
||||
printf("\n");
|
||||
printf("example:\n");
|
||||
printf("$ ./nearestNeighbor filelist.txt -r 5 -lat 30 -lng 90\n");
|
||||
printf("\n");
|
||||
printf("filename the filename that lists the data input files\n");
|
||||
printf("-r [int] the number of records to return (default: 10)\n");
|
||||
printf("-lat [float] the latitude for nearest neighbors (default: 0)\n");
|
||||
printf("-lng [float] the longitude for nearest neighbors (default: 0)\n");
|
||||
printf("\n");
|
||||
printf("-h, --help Display the help file\n");
|
||||
printf("-q Quiet mode. Suppress all text output.\n");
|
||||
printf("-t Print timing information.\n");
|
||||
printf("\n");
|
||||
printf("-p [int] Choose the platform (must choose both platform and "
|
||||
"device)\n");
|
||||
printf("-d [int] Choose the device (must choose both platform and "
|
||||
"device)\n");
|
||||
printf("\n");
|
||||
printf("\n");
|
||||
printf("Notes: 1. The filename is required as the first parameter.\n");
|
||||
printf(" 2. If you declare either the device or the platform,\n");
|
||||
printf(" you must declare both.\n\n");
|
||||
}
|
||||
|
||||
#endif
|
50
benchmarks/new_opencl/nearn/nearestNeighbor.h
Executable file
50
benchmarks/new_opencl/nearn/nearestNeighbor.h
Executable file
|
@ -0,0 +1,50 @@
|
|||
#ifndef _NEARESTNEIGHBOR
|
||||
#define _NEARESTNEIGHBOR
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// All OpenCL headers
|
||||
#if defined (__APPLE__) || defined(MACOSX)
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
#include "clutils.h"
|
||||
//#include "utils.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
|
||||
#define REC_LENGTH 49 // size of a record in db
|
||||
|
||||
typedef struct latLong
|
||||
{
|
||||
float lat;
|
||||
float lng;
|
||||
} LatLong;
|
||||
|
||||
typedef struct record
|
||||
{
|
||||
char recString[REC_LENGTH];
|
||||
float distance;
|
||||
} Record;
|
||||
|
||||
float *OpenClFindNearestNeighbors(
|
||||
cl_context context,
|
||||
int numRecords,
|
||||
std::vector<LatLong> &locations,float lat,float lng,
|
||||
int timing);
|
||||
|
||||
int loadData(char *filename,std::vector<Record> &records,std::vector<LatLong> &locations);
|
||||
void findLowest(std::vector<Record> &records,float *distances,int numRecords,int topN);
|
||||
void printUsage();
|
||||
int parseCommandline(int argc, char *argv[], char* filename,int *r,float *lat,float *lng,
|
||||
int *q, int *t, int *p, int *d);
|
||||
#endif
|
1
benchmarks/new_opencl/nearn/run
Executable file
1
benchmarks/new_opencl/nearn/run
Executable file
|
@ -0,0 +1 @@
|
|||
./nn filelist.txt -r 5 -lat 30 -lng 90
|
204
benchmarks/new_opencl/nearn/utils.cpp
Executable file
204
benchmarks/new_opencl/nearn/utils.cpp
Executable file
|
@ -0,0 +1,204 @@
|
|||
/****************************************************************************\
|
||||
* Copyright (c) 2011, Advanced Micro Devices, Inc. *
|
||||
* All rights reserved. *
|
||||
* *
|
||||
* Redistribution and use in source and binary forms, with or without *
|
||||
* modification, are permitted provided that the following conditions *
|
||||
* are met: *
|
||||
* *
|
||||
* Redistributions of source code must retain the above copyright notice, *
|
||||
* this list of conditions and the following disclaimer. *
|
||||
* *
|
||||
* Redistributions in binary form must reproduce the above copyright notice, *
|
||||
* this list of conditions and the following disclaimer in the documentation *
|
||||
* and/or other materials provided with the distribution. *
|
||||
* *
|
||||
* Neither the name of the copyright holder nor the names of its contributors *
|
||||
* may be used to endorse or promote products derived from this software *
|
||||
* without specific prior written permission. *
|
||||
* *
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR *
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
|
||||
* *
|
||||
* If you use the software (in whole or in part), you shall adhere to all *
|
||||
* applicable U.S., European, and other export laws, including but not *
|
||||
* limited to the U.S. Export Administration Regulations (EAR), (15 C.F.R. *
|
||||
* Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 *
|
||||
* of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you *
|
||||
* hereby certify that, except pursuant to a license granted by the United *
|
||||
* States Department of Commerce Bureau of Industry and Security or as *
|
||||
* otherwise permitted pursuant to a License Exception under the U.S. Export *
|
||||
* Administration Regulations ("EAR"), you will not (1) export, re-export or *
|
||||
* release to a national of a country in Country Groups D:1, E:1 or E:2 any *
|
||||
* restricted technology, software, or source code you receive hereunder, *
|
||||
* or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
|
||||
* technology or software, if such foreign produced direct product is subject *
|
||||
* to national security controls as identified on the Commerce Control List *
|
||||
*(currently found in Supplement 1 to Part 774 of EAR). For the most current *
|
||||
* Country Group listings, or for additional information about the EAR or *
|
||||
* your obligations under those regulations, please refer to the U.S. Bureau *
|
||||
* of Industry and Securitys website at http://www.bis.doc.gov/. *
|
||||
\****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
static bool usingImages = true;
|
||||
|
||||
//! A wrapper for malloc that checks the return value
|
||||
void* alloc(size_t size) {
|
||||
|
||||
void* ptr = NULL;
|
||||
ptr = malloc(size);
|
||||
if(ptr == NULL) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// This function checks to make sure a file exists before we open it
|
||||
void checkFile(char* filename)
|
||||
{
|
||||
|
||||
struct stat fileStatus;
|
||||
if(stat(filename, &fileStatus) != 0) {
|
||||
printf("Error opening file: %s\n", filename);
|
||||
exit(-1);
|
||||
}
|
||||
else {
|
||||
if(!(S_IFREG & fileStatus.st_mode)) {
|
||||
printf("File %s is not a regular file\n", filename);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// This function checks to make sure a directory exists
|
||||
void checkDir(char* dirpath)
|
||||
{
|
||||
|
||||
struct stat fileStatus;
|
||||
if(stat(dirpath, &fileStatus) != 0) {
|
||||
printf("Directory does not exist: %s\n", dirpath);
|
||||
exit(-1);
|
||||
}
|
||||
else {
|
||||
if(!(S_IFDIR & fileStatus.st_mode)) {
|
||||
printf("Directory was not provided: %s\n", dirpath);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the command line arguments
|
||||
void parseArguments(int argc, char** argv, char** input, char** events,
|
||||
char** ipts, char* devicePref, bool* verifyResults)
|
||||
{
|
||||
|
||||
for(int i = 2; i < argc; i++) {
|
||||
if(strcmp(argv[i], "-d") == 0) { // Event dump found
|
||||
if(i == argc-1) {
|
||||
printf("Usage: -e Needs directory path\n");
|
||||
exit(-1);
|
||||
}
|
||||
devicePref[0] = argv[i+1][0];
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(argv[i], "-e") == 0) { // Event dump found
|
||||
if(i == argc-1) {
|
||||
printf("Usage: -e Needs directory path\n");
|
||||
exit(-1);
|
||||
}
|
||||
*events = argv[i+1];
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(argv[i], "-i") == 0) { // Input found
|
||||
if(i == argc-1) {
|
||||
printf("Usage: -i Needs directory path\n");
|
||||
exit(-1);
|
||||
}
|
||||
*input = argv[i+1];
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(argv[i], "-l") == 0) { // Ipts dump found
|
||||
if(i == argc-1) {
|
||||
printf("Usage: -l Needs directory path\n");
|
||||
exit(-1);
|
||||
}
|
||||
*ipts = argv[i+1];
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(argv[i], "-n") == 0) { // Don't use OpenCL images
|
||||
setUsingImages(false);
|
||||
continue;
|
||||
}
|
||||
if(strcmp(argv[i], "-v") == 0) { // Verify results
|
||||
*verifyResults = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// This function that takes a positive integer 'value' and returns
|
||||
// the nearest multiple of 'multiple' (used for padding columns)
|
||||
unsigned int roundUp(unsigned int value, unsigned int multiple) {
|
||||
|
||||
unsigned int remainder = value % multiple;
|
||||
|
||||
// Make the value a multiple of multiple
|
||||
if(remainder != 0) {
|
||||
value += (multiple-remainder);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
// Concatenate two strings and return a pointer to the new string
|
||||
char* smartStrcat(char* str1, char* str2)
|
||||
{
|
||||
char* newStr = NULL;
|
||||
|
||||
newStr = (char*)alloc((strlen(str1)+strlen(str2)+1)*sizeof(char));
|
||||
|
||||
strcpy(newStr, str1);
|
||||
strcat(newStr, str2);
|
||||
|
||||
return newStr;
|
||||
}
|
||||
|
||||
|
||||
// Set the value of using images to true if they are being
|
||||
// used, or false if they are not
|
||||
void setUsingImages(bool val)
|
||||
{
|
||||
usingImages = val;
|
||||
}
|
||||
|
||||
|
||||
// Return whether or not images are being used
|
||||
bool isUsingImages()
|
||||
{
|
||||
return usingImages;
|
||||
}
|
84
benchmarks/new_opencl/nearn/utils.h
Executable file
84
benchmarks/new_opencl/nearn/utils.h
Executable file
|
@ -0,0 +1,84 @@
|
|||
/****************************************************************************\
|
||||
* Copyright (c) 2011, Advanced Micro Devices, Inc. *
|
||||
* All rights reserved. *
|
||||
* *
|
||||
* Redistribution and use in source and binary forms, with or without *
|
||||
* modification, are permitted provided that the following conditions *
|
||||
* are met: *
|
||||
* *
|
||||
* Redistributions of source code must retain the above copyright notice, *
|
||||
* this list of conditions and the following disclaimer. *
|
||||
* *
|
||||
* Redistributions in binary form must reproduce the above copyright notice, *
|
||||
* this list of conditions and the following disclaimer in the documentation *
|
||||
* and/or other materials provided with the distribution. *
|
||||
* *
|
||||
* Neither the name of the copyright holder nor the names of its contributors *
|
||||
* may be used to endorse or promote products derived from this software *
|
||||
* without specific prior written permission. *
|
||||
* *
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR *
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
|
||||
* *
|
||||
* If you use the software (in whole or in part), you shall adhere to all *
|
||||
* applicable U.S., European, and other export laws, including but not *
|
||||
* limited to the U.S. Export Administration Regulations (EAR), (15 C.F.R. *
|
||||
* Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 *
|
||||
* of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you *
|
||||
* hereby certify that, except pursuant to a license granted by the United *
|
||||
* States Department of Commerce Bureau of Industry and Security or as *
|
||||
* otherwise permitted pursuant to a License Exception under the U.S. Export *
|
||||
* Administration Regulations ("EAR"), you will not (1) export, re-export or *
|
||||
* release to a national of a country in Country Groups D:1, E:1 or E:2 any *
|
||||
* restricted technology, software, or source code you receive hereunder, *
|
||||
* or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
|
||||
* technology or software, if such foreign produced direct product is subject *
|
||||
* to national security controls as identified on the Commerce Control List *
|
||||
*(currently found in Supplement 1 to Part 774 of EAR). For the most current *
|
||||
* Country Group listings, or for additional information about the EAR or *
|
||||
* your obligations under those regulations, please refer to the U.S. Bureau *
|
||||
* of Industry and Securitys website at http://www.bis.doc.gov/. *
|
||||
\****************************************************************************/
|
||||
|
||||
#ifndef _UTILS_
|
||||
#define _UTILS_
|
||||
|
||||
// Wrapper for malloc
|
||||
void* alloc(size_t size);
|
||||
|
||||
// Checks for existence of directory
|
||||
void checkDir(char* dirpath);
|
||||
|
||||
// Check for existence of file
|
||||
void checkFile(char* filename);
|
||||
|
||||
// Parse the input command line options to the program
|
||||
void parseArguments(int argc, char** argv, char** input, char** events,
|
||||
char** ipts, char* devicePref, bool* verifyResults);
|
||||
|
||||
|
||||
// Print the program usage information
|
||||
void printUsage();
|
||||
|
||||
// Rounds up size to the nearest multiple of multiple
|
||||
unsigned int roundUp(unsigned int value, unsigned int multiple);
|
||||
|
||||
// Concatenate two strings, creating a new one
|
||||
char* smartStrcat(char* str1, char* str2);
|
||||
|
||||
// Set the value of usingImages
|
||||
void setUsingImages(bool val);
|
||||
|
||||
// Return whether or not images are being used
|
||||
bool isUsingImages();
|
||||
|
||||
#endif
|
7
benchmarks/new_opencl/results.txt
Normal file
7
benchmarks/new_opencl/results.txt
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Dynamic Instructions: -1
|
||||
# of total cycles: 2519
|
||||
# of forwarding stalls: 0
|
||||
# of branch stalls: 0
|
||||
# CPI: -2519
|
||||
# time to simulate: 4.94066e-323 milliseconds
|
||||
# GRADE: Failed on test: 0
|
44
benchmarks/new_opencl/saxpy/Makefile
Normal file
44
benchmarks/new_opencl/saxpy/Makefile
Normal file
|
@ -0,0 +1,44 @@
|
|||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
|
||||
POCLRT_PATH ?= $(wildcard ..)
|
||||
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
|
||||
PROJECT = saxpy
|
||||
|
||||
SRCS = main.cc
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o *.dump .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
0
benchmarks/new_opencl/saxpy/README
Normal file
0
benchmarks/new_opencl/saxpy/README
Normal file
5
benchmarks/new_opencl/saxpy/kernel.cl
Normal file
5
benchmarks/new_opencl/saxpy/kernel.cl
Normal file
|
@ -0,0 +1,5 @@
|
|||
__kernel void saxpy(__global float *src, __global float *dst, float factor)
|
||||
{
|
||||
long i = get_global_id(0);
|
||||
dst[i] += src[i] * factor;
|
||||
}
|
BIN
benchmarks/new_opencl/saxpy/kernel.pocl
Normal file
BIN
benchmarks/new_opencl/saxpy/kernel.pocl
Normal file
Binary file not shown.
221
benchmarks/new_opencl/saxpy/main.cc
Normal file
221
benchmarks/new_opencl/saxpy/main.cc
Normal file
|
@ -0,0 +1,221 @@
|
|||
/*
|
||||
* Simple OpenCL demo program
|
||||
*
|
||||
* Copyright (C) 2009 Clifford Wolf <clifford@clifford.at>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c
|
||||
* -lOpenCL
|
||||
*
|
||||
*/
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
//#define NUM_DATA 65536
|
||||
#define NUM_DATA 4096
|
||||
|
||||
#define CL_CHECK(_expr) \
|
||||
do { \
|
||||
cl_int _err = _expr; \
|
||||
if (_err == CL_SUCCESS) \
|
||||
break; \
|
||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
abort(); \
|
||||
} while (0)
|
||||
|
||||
#define CL_CHECK_ERR(_expr) \
|
||||
({ \
|
||||
cl_int _err = CL_INVALID_VALUE; \
|
||||
decltype(_expr) _ret = _expr; \
|
||||
if (_err != CL_SUCCESS) { \
|
||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
abort(); \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
void pfn_notify(const char *errinfo, const void *private_info, size_t cb,
|
||||
void *user_data) {
|
||||
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
|
||||
}
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return -1;
|
||||
}
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t *kernel_bin = NULL;
|
||||
|
||||
///
|
||||
// Cleanup any created OpenCL resources
|
||||
//
|
||||
void Cleanup(cl_context context, cl_command_queue commandQueue,
|
||||
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (memObjects[i] != 0)
|
||||
clReleaseMemObject(memObjects[i]);
|
||||
}
|
||||
if (commandQueue != 0)
|
||||
clReleaseCommandQueue(commandQueue);
|
||||
|
||||
if (kernel != 0)
|
||||
clReleaseKernel(kernel);
|
||||
|
||||
if (program != 0)
|
||||
clReleaseProgram(program);
|
||||
|
||||
if (context != 0)
|
||||
clReleaseContext(context);
|
||||
|
||||
if (kernel_bin) free(kernel_bin);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
printf("enter demo main\n");
|
||||
|
||||
cl_platform_id platform_id;
|
||||
cl_device_id device_id;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status = 0;
|
||||
int i;
|
||||
|
||||
// read kernel binary from file
|
||||
if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size))
|
||||
return -1;
|
||||
|
||||
// Getting platform and device information
|
||||
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
|
||||
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
|
||||
|
||||
cl_context context;
|
||||
context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err));
|
||||
|
||||
cl_command_queue queue;
|
||||
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &_err));
|
||||
|
||||
cl_kernel kernel = 0;
|
||||
cl_mem memObjects[2] = {0, 0};
|
||||
|
||||
// Create OpenCL program - first attempt to load cached binary.
|
||||
// If that is not available, then create the program from source
|
||||
// and store the binary for future use.
|
||||
std::cout << "Attempting to create program from binary..." << std::endl;
|
||||
cl_program program = CL_CHECK_ERR(clCreateProgramWithBinary(
|
||||
context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err));
|
||||
if (program == NULL) {
|
||||
std::cerr << "Failed to write program binary" << std::endl;
|
||||
Cleanup(context, queue, program, kernel, memObjects);
|
||||
return 1;
|
||||
} else {
|
||||
std::cout << "Read program from binary." << std::endl;
|
||||
}
|
||||
|
||||
// Build program
|
||||
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||
|
||||
printf("attempting to create input buffer\n");
|
||||
fflush(stdout);
|
||||
cl_mem input_buffer;
|
||||
input_buffer = CL_CHECK_ERR(clCreateBuffer(
|
||||
context, CL_MEM_READ_ONLY, sizeof(float) * NUM_DATA, NULL, &_err));
|
||||
|
||||
printf("attempting to create output buffer\n");
|
||||
fflush(stdout);
|
||||
cl_mem output_buffer;
|
||||
output_buffer = CL_CHECK_ERR(clCreateBuffer(
|
||||
context, CL_MEM_WRITE_ONLY, sizeof(float) * NUM_DATA, NULL, &_err));
|
||||
|
||||
memObjects[0] = input_buffer;
|
||||
memObjects[1] = output_buffer;
|
||||
|
||||
float factor = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||
|
||||
printf("attempting to create kernel\n");
|
||||
fflush(stdout);
|
||||
kernel = CL_CHECK_ERR(clCreateKernel(program, "saxpy", &_err));
|
||||
printf("setting up kernel args cl_mem:%lx \n", input_buffer);
|
||||
fflush(stdout);
|
||||
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
|
||||
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));
|
||||
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(factor), &factor));
|
||||
|
||||
printf("attempting to enqueue write buffer\n");
|
||||
fflush(stdout);
|
||||
for (int i = 0; i < NUM_DATA; i++) {
|
||||
float in = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE,
|
||||
i * sizeof(float), 4, &in, 0, NULL, NULL));
|
||||
}
|
||||
|
||||
cl_event kernel_completion;
|
||||
size_t global_work_size[] = {NUM_DATA/2,NUM_DATA/2};
|
||||
printf("attempting to enqueue kernel\n");
|
||||
fflush(stdout);
|
||||
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
|
||||
NULL, 0, NULL, &kernel_completion));
|
||||
printf("Enqueue'd kerenel\n");
|
||||
fflush(stdout);
|
||||
cl_ulong time_start, time_end;
|
||||
CL_CHECK(clWaitForEvents(1, &kernel_completion));
|
||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion,
|
||||
CL_PROFILING_COMMAND_START,
|
||||
sizeof(time_start), &time_start, NULL));
|
||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END,
|
||||
sizeof(time_end), &time_end, NULL));
|
||||
double elapsed = time_end - time_start;
|
||||
printf("time(ns):%lg\n", elapsed);
|
||||
CL_CHECK(clReleaseEvent(kernel_completion));
|
||||
|
||||
printf("Result:");
|
||||
for (int i = 0; i < NUM_DATA; i++) {
|
||||
float data;
|
||||
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE,
|
||||
i * sizeof(float), 4, &data, 0, NULL, NULL));
|
||||
// printf(" %f", data);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
CL_CHECK(clReleaseMemObject(memObjects[0]));
|
||||
CL_CHECK(clReleaseMemObject(memObjects[1]));
|
||||
|
||||
CL_CHECK(clReleaseKernel(kernel));
|
||||
CL_CHECK(clReleaseProgram(program));
|
||||
CL_CHECK(clReleaseContext(context));
|
||||
|
||||
return 0;
|
||||
}
|
44
benchmarks/new_opencl/sfilter/Makefile
Normal file
44
benchmarks/new_opencl/sfilter/Makefile
Normal file
|
@ -0,0 +1,44 @@
|
|||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
|
||||
POCLRT_PATH ?= $(wildcard ..)
|
||||
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
|
||||
PROJECT = sfilter
|
||||
|
||||
SRCS = main.cc
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o *.dump .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
0
benchmarks/new_opencl/sfilter/README
Normal file
0
benchmarks/new_opencl/sfilter/README
Normal file
21
benchmarks/new_opencl/sfilter/kernel.cl
Normal file
21
benchmarks/new_opencl/sfilter/kernel.cl
Normal file
|
@ -0,0 +1,21 @@
|
|||
// m0 m1 m2
|
||||
// m3 m4 m5
|
||||
// m6 m7 m8
|
||||
__kernel void sfilter(__global float *src, __global float *dst, long ldc,
|
||||
float m0, float m1, float m2, float m3, float m4, float m5, float m6, float m7, float m8)
|
||||
{
|
||||
long x = get_global_id(0);
|
||||
long y = get_global_id(1);
|
||||
|
||||
float i0 = src[(x-1)+(y-1)*ldc]*m0;
|
||||
float i1 = src[(x) +(y-1)*ldc]*m1;
|
||||
float i2 = src[(x+1)+(y-1)*ldc]*m2;
|
||||
float i3 = src[(x-1)+(y) *ldc]*m3;
|
||||
float i4 = src[(x) + y * ldc]*m4;
|
||||
float i5 = src[(x+1)+(y) *ldc]*m5;
|
||||
float i6 = src[(x-1)+(y+1)*ldc]*m6;
|
||||
float i7 = src[(x) +(y+1)*ldc]*m7;
|
||||
float i8 = src[(x+1)+(y+1)*ldc]*m8;
|
||||
|
||||
dst[x+y*ldc] = i0 + i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8;
|
||||
}
|
BIN
benchmarks/new_opencl/sfilter/kernel.pocl
Normal file
BIN
benchmarks/new_opencl/sfilter/kernel.pocl
Normal file
Binary file not shown.
319
benchmarks/new_opencl/sfilter/main.cc
Normal file
319
benchmarks/new_opencl/sfilter/main.cc
Normal file
|
@ -0,0 +1,319 @@
|
|||
/*
|
||||
* Simple OpenCL demo program
|
||||
*
|
||||
* Copyright (C) 2009 Clifford Wolf <clifford@clifford.at>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c
|
||||
* -lOpenCL
|
||||
*
|
||||
*/
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
#include <sstream>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define NUM_DATA 66
|
||||
|
||||
#define CL_CHECK(_expr) \
|
||||
do { \
|
||||
cl_int _err = _expr; \
|
||||
if (_err == CL_SUCCESS) \
|
||||
break; \
|
||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
abort(); \
|
||||
} while (0)
|
||||
|
||||
#define CL_CHECK_ERR(_expr) \
|
||||
({ \
|
||||
cl_int _err = CL_INVALID_VALUE; \
|
||||
decltype(_expr) _ret = _expr; \
|
||||
if (_err != CL_SUCCESS) { \
|
||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
abort(); \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
void pfn_notify(const char *errinfo, const void *private_info, size_t cb,
|
||||
void *user_data) {
|
||||
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
|
||||
}
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return -1;
|
||||
}
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t *kernel_bin = NULL;
|
||||
|
||||
// inlcude pocl float to half conversions
|
||||
typedef union {
|
||||
int32_t i;
|
||||
float f;
|
||||
} FloatConvUnion;
|
||||
cl_half poclu_float_to_cl_half(float value) {
|
||||
FloatConvUnion u;
|
||||
u.f = value;
|
||||
cl_half half = (u.i >> 16) & 0x8000; // sign
|
||||
cl_half fraction =
|
||||
(u.i >> 12) & 0x007ff; // fraction with extra bit for rounding
|
||||
cl_half exponent = (u.i >> 23) & 0xff; // exponent
|
||||
|
||||
if (exponent < 0x0067) // Return signed zero if zero or value is too small for
|
||||
// denormal half
|
||||
return half;
|
||||
|
||||
if (exponent > 0x008e) { // value was NaN or Inf
|
||||
half |= 0x7c00u; // Make into inf
|
||||
half |= exponent == 255 &&
|
||||
(u.i & 0x007fffffu); // If value was NaN make this into NaN
|
||||
return half;
|
||||
}
|
||||
|
||||
if (exponent < 0x0071) { // Denormal
|
||||
fraction |= 0x0800u;
|
||||
|
||||
// rounding
|
||||
half |= (fraction >> (0x0072 - exponent)) +
|
||||
((fraction >> (0x0071 - exponent)) & 1);
|
||||
return half;
|
||||
}
|
||||
|
||||
half |= ((exponent - 0x0070) << 10) | (fraction >> 1);
|
||||
half += fraction & 1; // rounding
|
||||
return half;
|
||||
}
|
||||
#ifndef INFINITY
|
||||
#define INFINITY 1.0 / 0.0
|
||||
#endif
|
||||
|
||||
#ifndef NAN
|
||||
#define NAN 0.0 / 0.0
|
||||
#endif
|
||||
|
||||
float poclu_cl_half_to_float(cl_half value) {
|
||||
if (value == 0xFC00) {
|
||||
return -INFINITY;
|
||||
}
|
||||
if (value == 0x7C00) {
|
||||
return INFINITY;
|
||||
}
|
||||
|
||||
int sgn = ((value & 0x8000) >> 15);
|
||||
int exp = (value & 0x7C00) >> 10;
|
||||
int mant = value & 0x03FF;
|
||||
|
||||
if (exp == 0x1F && mant != 0) {
|
||||
return NAN;
|
||||
}
|
||||
|
||||
float v = (exp == 0) ? mant : mant | 0x0400; // 1.x if not denormal
|
||||
v /= 0x400;
|
||||
float mul = exp2((float)exp - 15);
|
||||
v *= mul;
|
||||
if (sgn) {
|
||||
v *= -1;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
///
|
||||
// Cleanup any created OpenCL resources
|
||||
//
|
||||
void Cleanup(cl_context context, cl_command_queue commandQueue,
|
||||
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (memObjects[i] != 0)
|
||||
clReleaseMemObject(memObjects[i]);
|
||||
}
|
||||
if (commandQueue != 0)
|
||||
clReleaseCommandQueue(commandQueue);
|
||||
|
||||
if (kernel != 0)
|
||||
clReleaseKernel(kernel);
|
||||
|
||||
if (program != 0)
|
||||
clReleaseProgram(program);
|
||||
|
||||
if (context != 0)
|
||||
clReleaseContext(context);
|
||||
|
||||
if (kernel_bin) free(kernel_bin);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
printf("enter demo main\n");
|
||||
|
||||
cl_platform_id platform_id;
|
||||
cl_device_id device_id;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status = 0;
|
||||
int i;
|
||||
|
||||
// read kernel binary from file
|
||||
if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size))
|
||||
return -1;
|
||||
|
||||
// Getting platform and device information
|
||||
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
|
||||
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
|
||||
|
||||
cl_context context;
|
||||
context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err));
|
||||
|
||||
cl_command_queue queue;
|
||||
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &_err));
|
||||
|
||||
cl_kernel kernel = 0;
|
||||
cl_mem memObjects[2] = {0, 0};
|
||||
|
||||
// Create OpenCL program - first attempt to load cached binary.
|
||||
// If that is not available, then create the program from source
|
||||
// and store the binary for future use.
|
||||
std::cout << "Attempting to create program from binary..." << std::endl;
|
||||
cl_program program = CL_CHECK_ERR(clCreateProgramWithBinary(
|
||||
context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err));
|
||||
if (program == NULL) {
|
||||
std::cerr << "Failed to write program binary" << std::endl;
|
||||
Cleanup(context, queue, program, kernel, memObjects);
|
||||
return 1;
|
||||
} else {
|
||||
std::cout << "Read program from binary." << std::endl;
|
||||
}
|
||||
|
||||
// Build program
|
||||
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||
|
||||
printf("attempting to create input buffer\n");
|
||||
fflush(stdout);
|
||||
cl_mem input_buffer;
|
||||
input_buffer = CL_CHECK_ERR(
|
||||
clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
||||
|
||||
printf("attempting to create output buffer\n");
|
||||
fflush(stdout);
|
||||
cl_mem output_buffer;
|
||||
output_buffer = CL_CHECK_ERR(
|
||||
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
||||
|
||||
memObjects[0] = input_buffer;
|
||||
memObjects[1] = output_buffer;
|
||||
|
||||
long long ldc = NUM_DATA;
|
||||
|
||||
float m0 = 1.0;
|
||||
float m1 = 1.0;
|
||||
float m2 = 1.0;
|
||||
float m3 = 1.0;
|
||||
float m4 = 1.0;
|
||||
float m5 = 1.0;
|
||||
float m6 = 1.0;
|
||||
float m7 = 1.0;
|
||||
float m8 = 1.0;
|
||||
|
||||
printf("attempting to create kernel\n");
|
||||
fflush(stdout);
|
||||
kernel = CL_CHECK_ERR(clCreateKernel(program, "sfilter", &_err));
|
||||
printf("setting up kernel args cl_mem:%lx \n", input_buffer);
|
||||
fflush(stdout);
|
||||
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
|
||||
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));
|
||||
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(ldc), (&ldc)));
|
||||
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(m0), (&m0)));
|
||||
CL_CHECK(clSetKernelArg(kernel, 4, sizeof(m1), (&m1)));
|
||||
CL_CHECK(clSetKernelArg(kernel, 5, sizeof(m2), (&m2)));
|
||||
CL_CHECK(clSetKernelArg(kernel, 6, sizeof(m3), (&m3)));
|
||||
CL_CHECK(clSetKernelArg(kernel, 7, sizeof(m4), (&m4)));
|
||||
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(m5), (&m5)));
|
||||
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(m6), (&m6)));
|
||||
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(m7), (&m7)));
|
||||
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(m8), (&m8)));
|
||||
|
||||
printf("attempting to enqueue write buffer\n");
|
||||
fflush(stdout);
|
||||
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
||||
float in = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE,
|
||||
i * sizeof(float), 4, &in, 0, NULL, NULL));
|
||||
}
|
||||
|
||||
cl_event kernel_completion;
|
||||
size_t global_offset[2] = {1, 1};
|
||||
size_t global_work_size[2] = {NUM_DATA - 2, NUM_DATA - 2}; // avoid the edges
|
||||
const size_t local_work_size[2] = {64, 1};
|
||||
printf("attempting to enqueue kernel\n");
|
||||
fflush(stdout);
|
||||
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, global_offset,
|
||||
global_work_size, local_work_size, 0, NULL,
|
||||
&kernel_completion));
|
||||
printf("Enqueue'd kerenel\n");
|
||||
fflush(stdout);
|
||||
cl_ulong time_start, time_end;
|
||||
CL_CHECK(clWaitForEvents(1, &kernel_completion));
|
||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion,
|
||||
CL_PROFILING_COMMAND_START,
|
||||
sizeof(time_start), &time_start, NULL));
|
||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END,
|
||||
sizeof(time_end), &time_end, NULL));
|
||||
double elapsed = time_end - time_start;
|
||||
printf("time(ns):%lg\n", elapsed);
|
||||
CL_CHECK(clReleaseEvent(kernel_completion));
|
||||
|
||||
printf("Result:");
|
||||
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
||||
float data;
|
||||
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE,
|
||||
i * sizeof(float), 4, &data, 0, NULL, NULL));
|
||||
// printf(" %f", data);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
CL_CHECK(clReleaseMemObject(memObjects[0]));
|
||||
CL_CHECK(clReleaseMemObject(memObjects[1]));
|
||||
|
||||
CL_CHECK(clReleaseKernel(kernel));
|
||||
CL_CHECK(clReleaseProgram(program));
|
||||
CL_CHECK(clReleaseContext(context));
|
||||
|
||||
return 0;
|
||||
}
|
44
benchmarks/new_opencl/sgemm/Makefile
Normal file
44
benchmarks/new_opencl/sgemm/Makefile
Normal file
|
@ -0,0 +1,44 @@
|
|||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
|
||||
POCLRT_PATH ?= $(wildcard ..)
|
||||
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
|
||||
PROJECT = sgemm
|
||||
|
||||
SRCS = main.cc
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o *.dump .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
0
benchmarks/new_opencl/sgemm/README
Normal file
0
benchmarks/new_opencl/sgemm/README
Normal file
9
benchmarks/new_opencl/sgemm/kernel.cl
Normal file
9
benchmarks/new_opencl/sgemm/kernel.cl
Normal file
|
@ -0,0 +1,9 @@
|
|||
__kernel void sgemm(__global float *A, __global float *B, __global float *C, int ldc)
|
||||
{
|
||||
long i = get_global_id(0);
|
||||
long m = get_global_id(1);
|
||||
long n = get_global_id(2);
|
||||
float a = A[m+n*ldc];
|
||||
float b = B[m*ldc+i];
|
||||
C[i+n*ldc] = C[i+n*ldc] + a * b;
|
||||
}
|
BIN
benchmarks/new_opencl/sgemm/kernel.pocl
Normal file
BIN
benchmarks/new_opencl/sgemm/kernel.pocl
Normal file
Binary file not shown.
243
benchmarks/new_opencl/sgemm/main.cc
Normal file
243
benchmarks/new_opencl/sgemm/main.cc
Normal file
|
@ -0,0 +1,243 @@
|
|||
/*
|
||||
* Simple OpenCL demo program
|
||||
*
|
||||
* Copyright (C) 2009 Clifford Wolf <clifford@clifford.at>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c
|
||||
* -lOpenCL
|
||||
*
|
||||
*/
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <errno.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define NUM_DATA 64
|
||||
|
||||
#define CL_CHECK(_expr) \
|
||||
do { \
|
||||
cl_int _err = _expr; \
|
||||
if (_err == CL_SUCCESS) \
|
||||
break; \
|
||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
abort(); \
|
||||
} while (0)
|
||||
|
||||
#define CL_CHECK_ERR(_expr) \
|
||||
({ \
|
||||
cl_int _err = CL_INVALID_VALUE; \
|
||||
decltype(_expr) _ret = _expr; \
|
||||
if (_err != CL_SUCCESS) { \
|
||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
abort(); \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
void pfn_notify(const char *errinfo, const void *private_info, size_t cb,
|
||||
void *user_data) {
|
||||
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
|
||||
}
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return -1;
|
||||
}
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t *kernel_bin = NULL;
|
||||
|
||||
///
|
||||
// Cleanup any created OpenCL resources
|
||||
//
|
||||
void Cleanup(cl_context context, cl_command_queue commandQueue,
|
||||
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (memObjects[i] != 0)
|
||||
clReleaseMemObject(memObjects[i]);
|
||||
}
|
||||
if (commandQueue != 0)
|
||||
clReleaseCommandQueue(commandQueue);
|
||||
|
||||
if (kernel != 0)
|
||||
clReleaseKernel(kernel);
|
||||
|
||||
if (program != 0)
|
||||
clReleaseProgram(program);
|
||||
|
||||
if (context != 0)
|
||||
clReleaseContext(context);
|
||||
|
||||
if (kernel_bin) free(kernel_bin);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
printf("enter demo main\n");
|
||||
|
||||
cl_platform_id platform_id;
|
||||
cl_device_id device_id;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status = 0;
|
||||
int i;
|
||||
|
||||
// read kernel binary from file
|
||||
if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size))
|
||||
return -1;
|
||||
|
||||
// Getting platform and device information
|
||||
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
|
||||
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
|
||||
|
||||
cl_context context;
|
||||
context = CL_CHECK_ERR(
|
||||
clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err));
|
||||
|
||||
cl_command_queue queue;
|
||||
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id,
|
||||
CL_QUEUE_PROFILING_ENABLE, &_err));
|
||||
|
||||
cl_kernel kernel = 0;
|
||||
cl_mem memObjects[3] = {0, 0, 0};
|
||||
|
||||
// Create OpenCL program - first attempt to load cached binary.
|
||||
// If that is not available, then create the program from source
|
||||
// and store the binary for future use.
|
||||
std::cout << "Attempting to create program from binary..." << std::endl;
|
||||
// cl_program program = CreateProgramFromBinary(context, device_id,
|
||||
// "kernel.cl.bin");
|
||||
cl_program program = CL_CHECK_ERR(clCreateProgramWithBinary(
|
||||
context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err));
|
||||
if (program == NULL) {
|
||||
std::cerr << "Failed to write program binary" << std::endl;
|
||||
Cleanup(context, queue, program, kernel, memObjects);
|
||||
return 1;
|
||||
} else {
|
||||
std::cout << "Read program from binary." << std::endl;
|
||||
}
|
||||
|
||||
// Build program
|
||||
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||
|
||||
printf("attempting to create input buffer\n");
|
||||
fflush(stdout);
|
||||
cl_mem input_bufferA;
|
||||
input_bufferA = CL_CHECK_ERR(
|
||||
clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
||||
|
||||
cl_mem input_bufferB;
|
||||
input_bufferB = CL_CHECK_ERR(
|
||||
clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
||||
|
||||
printf("attempting to create output buffer\n");
|
||||
fflush(stdout);
|
||||
cl_mem output_buffer;
|
||||
output_buffer = CL_CHECK_ERR(
|
||||
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
||||
|
||||
memObjects[0] = input_bufferA;
|
||||
memObjects[1] = input_bufferB;
|
||||
memObjects[2] = output_buffer;
|
||||
|
||||
int width = NUM_DATA;
|
||||
|
||||
printf("attempting to create kernel\n");
|
||||
fflush(stdout);
|
||||
kernel = CL_CHECK_ERR(clCreateKernel(program, "sgemm", &_err));
|
||||
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_bufferA), &input_bufferA));
|
||||
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(input_bufferB), &input_bufferB));
|
||||
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer));
|
||||
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(width), &width));
|
||||
|
||||
printf("attempting to enqueue write buffer\n");
|
||||
fflush(stdout);
|
||||
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
||||
|
||||
float in = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||
CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferA, CL_TRUE,
|
||||
i * sizeof(float), 4, &in, 0, NULL, NULL));
|
||||
in = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||
CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferB, CL_TRUE,
|
||||
i * sizeof(float), 4, &in, 0, NULL, NULL));
|
||||
}
|
||||
|
||||
printf("Done enqueueing\n");
|
||||
|
||||
cl_event kernel_completion;
|
||||
const size_t local_work_size[3] = {1, 1, 1};
|
||||
// a_offset
|
||||
size_t global_work_size[3] = {NUM_DATA, NUM_DATA, NUM_DATA};
|
||||
printf("attempting to enqueue kernel\n");
|
||||
fflush(stdout);
|
||||
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
||||
local_work_size, 0, NULL,
|
||||
&kernel_completion));
|
||||
printf("Enqueue'd kerenel\n");
|
||||
fflush(stdout);
|
||||
cl_ulong time_start, time_end;
|
||||
CL_CHECK(clWaitForEvents(1, &kernel_completion));
|
||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion,
|
||||
CL_PROFILING_COMMAND_START,
|
||||
sizeof(time_start), &time_start, NULL));
|
||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END,
|
||||
sizeof(time_end), &time_end, NULL));
|
||||
double elapsed = time_end - time_start;
|
||||
printf("time(ns):%lg\n", elapsed);
|
||||
CL_CHECK(clReleaseEvent(kernel_completion));
|
||||
|
||||
printf("Result:");
|
||||
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
||||
float data;
|
||||
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE,
|
||||
i * sizeof(float), 4, &data, 0, NULL, NULL));
|
||||
// printf(" %f", data);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
CL_CHECK(clReleaseMemObject(memObjects[0]));
|
||||
CL_CHECK(clReleaseMemObject(memObjects[1]));
|
||||
CL_CHECK(clReleaseMemObject(memObjects[2]));
|
||||
|
||||
CL_CHECK(clReleaseKernel(kernel));
|
||||
CL_CHECK(clReleaseProgram(program));
|
||||
CL_CHECK(clReleaseContext(context));
|
||||
|
||||
return 0;
|
||||
}
|
BIN
benchmarks/new_opencl/sgemm/sgemm
Executable file
BIN
benchmarks/new_opencl/sgemm/sgemm
Executable file
Binary file not shown.
44
benchmarks/new_opencl/vecadd/Makefile
Normal file
44
benchmarks/new_opencl/vecadd/Makefile
Normal file
|
@ -0,0 +1,44 @@
|
|||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
|
||||
POCLRT_PATH ?= $(wildcard ..)
|
||||
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
|
||||
PROJECT = vecadd
|
||||
|
||||
SRCS = main.cc
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o *.dump .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
0
benchmarks/new_opencl/vecadd/README
Normal file
0
benchmarks/new_opencl/vecadd/README
Normal file
8
benchmarks/new_opencl/vecadd/kernel.cl
Normal file
8
benchmarks/new_opencl/vecadd/kernel.cl
Normal file
|
@ -0,0 +1,8 @@
|
|||
kernel void
|
||||
vecadd (__global const int *a,
|
||||
__global const int *b,
|
||||
__global int *c)
|
||||
{
|
||||
int gid = get_global_id(0);
|
||||
c[gid] = a[gid] + b[gid];
|
||||
}
|
BIN
benchmarks/new_opencl/vecadd/kernel.pocl
Normal file
BIN
benchmarks/new_opencl/vecadd/kernel.pocl
Normal file
Binary file not shown.
187
benchmarks/new_opencl/vecadd/main.cc
Normal file
187
benchmarks/new_opencl/vecadd/main.cc
Normal file
|
@ -0,0 +1,187 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <CL/opencl.h>
|
||||
#include <string.h>
|
||||
|
||||
#define SIZE 4
|
||||
#define NUM_WORK_GROUPS 2
|
||||
#define KERNEL_NAME "vecadd"
|
||||
|
||||
#define CL_CHECK(_expr) \
|
||||
do { \
|
||||
cl_int _err = _expr; \
|
||||
if (_err == CL_SUCCESS) \
|
||||
break; \
|
||||
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} while (0)
|
||||
|
||||
#define CL_CHECK2(_expr) \
|
||||
({ \
|
||||
cl_int _err = CL_INVALID_VALUE; \
|
||||
decltype(_expr) _ret = _expr; \
|
||||
if (_err != CL_SUCCESS) { \
|
||||
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
int exitcode = 0;
|
||||
cl_context context = NULL;
|
||||
cl_command_queue commandQueue = NULL;
|
||||
cl_program program = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
cl_mem a_memobj = NULL;
|
||||
cl_mem b_memobj = NULL;
|
||||
cl_mem c_memobj = NULL;
|
||||
cl_int *A = NULL;
|
||||
cl_int *B = NULL;
|
||||
cl_int *C = NULL;
|
||||
uint8_t *kernel_bin = NULL;
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return -1;
|
||||
}
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cleanup() {
|
||||
if (commandQueue) clReleaseCommandQueue(commandQueue);
|
||||
if (kernel) clReleaseKernel(kernel);
|
||||
if (program) clReleaseProgram(program);
|
||||
if (a_memobj) clReleaseMemObject(a_memobj);
|
||||
if (b_memobj) clReleaseMemObject(b_memobj);
|
||||
if (c_memobj) clReleaseMemObject(c_memobj);
|
||||
if (context) clReleaseContext(context);
|
||||
if (kernel_bin) free(kernel_bin);
|
||||
if (A) free(A);
|
||||
if (B) free(B);
|
||||
if (C) free(C);
|
||||
}
|
||||
|
||||
static int find_device(char* name, cl_platform_id platform_id, cl_device_id *device_id) {
|
||||
cl_device_id device_ids[64];
|
||||
cl_uint num_devices = 0;
|
||||
|
||||
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 64, device_ids, &num_devices));
|
||||
|
||||
for (int i=0; i<num_devices; i++) {
|
||||
char buffer[1024];
|
||||
cl_uint buf_uint;
|
||||
cl_ulong buf_ulong;
|
||||
|
||||
CL_CHECK(clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL));
|
||||
|
||||
if (0 == strncmp(buffer, name, strlen(name))) {
|
||||
*device_id = device_ids[i];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
printf("enter demo main\n");
|
||||
|
||||
cl_platform_id platform_id;
|
||||
cl_device_id device_id;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status = 0;
|
||||
int i;
|
||||
|
||||
// read kernel binary from file
|
||||
if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size))
|
||||
return -1;
|
||||
|
||||
// Getting platform and device information
|
||||
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
|
||||
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
|
||||
|
||||
// Creating context.
|
||||
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err));
|
||||
|
||||
// Memory buffers for each array
|
||||
a_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, SIZE * sizeof(cl_int), NULL, &_err));
|
||||
b_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, SIZE * sizeof(cl_int), NULL, &_err));
|
||||
c_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, SIZE * sizeof(cl_int), NULL, &_err));
|
||||
|
||||
// Allocate memories for input arrays and output arrays.
|
||||
A = (cl_int*)malloc(sizeof(cl_int)*SIZE);
|
||||
B = (cl_int*)malloc(sizeof(cl_int)*SIZE);
|
||||
C = (cl_int*)malloc(sizeof(cl_int)*SIZE);
|
||||
|
||||
// Initialize values for array members.
|
||||
for (i=0; i<SIZE; ++i) {
|
||||
A[i] = i*2+0;
|
||||
B[i] = i*2+1;
|
||||
}
|
||||
|
||||
// Create program from kernel source
|
||||
program = CL_CHECK2(clCreateProgramWithBinary(
|
||||
context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err));
|
||||
|
||||
// Build program
|
||||
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||
|
||||
// Create kernel
|
||||
kernel = CL_CHECK2(clCreateKernel(program, KERNEL_NAME, &_err));
|
||||
|
||||
// Set arguments for kernel
|
||||
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_memobj));
|
||||
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_memobj));
|
||||
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_memobj));
|
||||
|
||||
// Creating command queue
|
||||
commandQueue = CL_CHECK2(clCreateCommandQueue(context, device_id, 0, &_err));
|
||||
|
||||
// Copy lists to memory buffers
|
||||
CL_CHECK(clEnqueueWriteBuffer(commandQueue, a_memobj, CL_TRUE, 0, SIZE * sizeof(float), A, 0, NULL, NULL));
|
||||
CL_CHECK(clEnqueueWriteBuffer(commandQueue, b_memobj, CL_TRUE, 0, SIZE * sizeof(float), B, 0, NULL, NULL));
|
||||
|
||||
// Execute the kernel
|
||||
size_t globalItemSize = SIZE;
|
||||
size_t localItemSize = SIZE/NUM_WORK_GROUPS;
|
||||
CL_CHECK(clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, &globalItemSize, &localItemSize, 0, NULL, NULL));
|
||||
CL_CHECK(clFinish(commandQueue));
|
||||
|
||||
// Read from device back to host.
|
||||
CL_CHECK(clEnqueueReadBuffer(commandQueue, c_memobj, CL_TRUE, 0, SIZE * sizeof(float), C, 0, NULL, NULL));
|
||||
|
||||
// Test if correct answer
|
||||
int exitcode = 0;
|
||||
for (i=0; i<SIZE; ++i) {
|
||||
if (C[i] != (A[i] + B[i])) {
|
||||
printf("Failed!\n");
|
||||
exitcode = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == SIZE) {
|
||||
printf("Ok!\n");
|
||||
}
|
||||
|
||||
// Clean up
|
||||
cleanup();
|
||||
|
||||
return exitcode;
|
||||
}
|
|
@ -29,7 +29,7 @@ CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|||
CXXFLAGS += -I$(POCL_INC_PATH) -I.
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||
QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
|
||||
PROJECT=BlackScholes
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|||
CXXFLAGS += -I$(POCL_INC_PATH) -I.
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||
QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
|
||||
PROJECT=DotProduct
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|||
CXXFLAGS += -I$(POCL_INC_PATH) -I.
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||
QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
|
||||
PROJECT=VectorHypot
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|||
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||
QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
|
||||
PROJECT = bfs
|
||||
|
||||
|
|
68
benchmarks/opencl/convolution/Makefile
Normal file
68
benchmarks/opencl/convolution/Makefile
Normal file
|
@ -0,0 +1,68 @@
|
|||
RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops)
|
||||
POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc)
|
||||
POCL_INC_PATH ?= $(wildcard ../include)
|
||||
POCL_LIB_PATH ?= $(wildcard ../lib)
|
||||
VX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir)
|
||||
|
||||
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
||||
|
||||
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
||||
|
||||
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
|
||||
PROJECT = convolution
|
||||
|
||||
SRCS = main.cpp utils.cpp
|
||||
|
||||
all: $(PROJECT).dump $(PROJECT).hex
|
||||
|
||||
lib$(PROJECT).a: kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||
|
||||
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
|
||||
|
||||
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||
|
||||
$(PROJECT).hex: $(PROJECT).elf
|
||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||
|
||||
$(PROJECT).dump: $(PROJECT).elf
|
||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||
|
||||
run: $(PROJECT).hex
|
||||
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
||||
|
||||
qemu: $(PROJECT).qemu
|
||||
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
|
||||
|
||||
gdb-s: $(PROJECT).qemu
|
||||
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||
|
||||
gdb-c: $(PROJECT).qemu
|
||||
$(GDB) $(PROJECT).qemu
|
||||
|
||||
clean:
|
||||
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
BIN
benchmarks/opencl/convolution/input.bmp
Normal file
BIN
benchmarks/opencl/convolution/input.bmp
Normal file
Binary file not shown.
After Width: | Height: | Size: 44 KiB |
54
benchmarks/opencl/convolution/kernel.cl
Executable file
54
benchmarks/opencl/convolution/kernel.cl
Executable file
|
@ -0,0 +1,54 @@
|
|||
__kernel
|
||||
void convolution(
|
||||
__read_only image2d_t sourceImage,
|
||||
__write_only image2d_t outputImage,
|
||||
int rows,
|
||||
int cols,
|
||||
__constant float* filter,
|
||||
int filterWidth,
|
||||
sampler_t sampler)
|
||||
{
|
||||
// Store each work-item’s unique row and column
|
||||
int column = get_global_id(0);
|
||||
int row = get_global_id(1);
|
||||
|
||||
// Half the width of the filter is needed for indexing
|
||||
// memory later
|
||||
int halfWidth = (int)(filterWidth/2);
|
||||
|
||||
// All accesses to images return data as four-element vector
|
||||
// (i.e., float4), although only the 'x' component will contain
|
||||
// meaningful data in this code
|
||||
float4 sum = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
|
||||
// Iterator for the filter
|
||||
int filterIdx = 0;
|
||||
|
||||
// Each work-item iterates around its local area based on the
|
||||
// size of the filter
|
||||
int2 coords; // Coordinates for accessing the image
|
||||
// Iterate the filter rows
|
||||
for(int i = -halfWidth; i <= halfWidth; i++) {
|
||||
coords.y = row + i;
|
||||
|
||||
// Iterate over the filter columns
|
||||
for(int j = -halfWidth; j <= halfWidth; j++) {
|
||||
coords.x = column + j;
|
||||
|
||||
float4 pixel;
|
||||
// Read a pixel from the image. A single channel image
|
||||
// stores the pixel in the 'x' coordinate of the returned
|
||||
// vector.
|
||||
pixel = read_imagef(sourceImage, sampler, coords);
|
||||
sum.x += pixel.x * filter[filterIdx++];
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the data to the output image if the
|
||||
// work-item is in bounds
|
||||
if(row < rows && column < cols) {
|
||||
coords.x = column;
|
||||
coords.y = row;
|
||||
write_imagef(outputImage, coords, sum);
|
||||
}
|
||||
}
|
261
benchmarks/opencl/convolution/main.cpp
Executable file
261
benchmarks/opencl/convolution/main.cpp
Executable file
|
@ -0,0 +1,261 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
// This function takes a positive integer and rounds it up to
|
||||
// the nearest multiple of another provided integer
|
||||
unsigned int roundUp(unsigned int value, unsigned int multiple) {
|
||||
|
||||
// Determine how far past the nearest multiple the value is
|
||||
unsigned int remainder = value % multiple;
|
||||
|
||||
// Add the difference to make the value a multiple
|
||||
if(remainder != 0) {
|
||||
value += (multiple-remainder);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
// This function reads in a text file and stores it as a char pointer
|
||||
char* readSource(char* kernelPath) {
|
||||
|
||||
cl_int status;
|
||||
FILE *fp;
|
||||
char *source;
|
||||
long int size;
|
||||
|
||||
printf("Program file is: %s\n", kernelPath);
|
||||
|
||||
fp = fopen(kernelPath, "rb");
|
||||
if(!fp) {
|
||||
printf("Could not open kernel file\n");
|
||||
exit(-1);
|
||||
}
|
||||
status = fseek(fp, 0, SEEK_END);
|
||||
if(status != 0) {
|
||||
printf("Error seeking to end of file\n");
|
||||
exit(-1);
|
||||
}
|
||||
size = ftell(fp);
|
||||
if(size < 0) {
|
||||
printf("Error getting file position\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
rewind(fp);
|
||||
|
||||
source = (char *)malloc(size + 1);
|
||||
|
||||
int i;
|
||||
for (i = 0; i < size+1; i++) {
|
||||
source[i]='\0';
|
||||
}
|
||||
|
||||
if(source == NULL) {
|
||||
printf("Error allocating space for the kernel source\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
fread(source, 1, size, fp);
|
||||
source[size] = '\0';
|
||||
|
||||
return source;
|
||||
}
|
||||
|
||||
void chk(cl_int status, const char* cmd) {
|
||||
|
||||
if(status != CL_SUCCESS) {
|
||||
printf("%s failed (%d)\n", cmd, status);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
||||
int i, j, k, l;
|
||||
|
||||
// Rows and columns in the input image
|
||||
int imageHeight;
|
||||
int imageWidth;
|
||||
|
||||
const char* inputFile = "input.bmp";
|
||||
const char* outputFile = "output.bmp";
|
||||
|
||||
// Homegrown function to read a BMP from file
|
||||
float* inputImage = readImage(inputFile, &imageWidth,
|
||||
&imageHeight);
|
||||
|
||||
// Size of the input and output images on the host
|
||||
int dataSize = imageHeight*imageWidth*sizeof(float);
|
||||
|
||||
// Output image on the host
|
||||
float* outputImage = NULL;
|
||||
outputImage = (float*)malloc(dataSize);
|
||||
float* refImage = NULL;
|
||||
refImage = (float*)malloc(dataSize);
|
||||
|
||||
// 45 degree motion blur
|
||||
float filter[49] =
|
||||
{0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, -1, 0, 1, 0, 0,
|
||||
0, 0, -2, 0, 2, 0, 0,
|
||||
0, 0, -1, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
// The convolution filter is 7x7
|
||||
int filterWidth = 7;
|
||||
int filterSize = filterWidth*filterWidth; // Assume a square kernel
|
||||
|
||||
// Set up the OpenCL environment
|
||||
cl_int status;
|
||||
|
||||
// Discovery platform
|
||||
cl_platform_id platform;
|
||||
status = clGetPlatformIDs(1, &platform, NULL);
|
||||
chk(status, "clGetPlatformIDs");
|
||||
|
||||
// Discover device
|
||||
cl_device_id device;
|
||||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
|
||||
chk(status, "clGetDeviceIDs");
|
||||
|
||||
// Create context
|
||||
cl_context_properties props[3] = {CL_CONTEXT_PLATFORM,
|
||||
(cl_context_properties)(platform), 0};
|
||||
cl_context context;
|
||||
context = clCreateContext(props, 1, &device, NULL, NULL, &status);
|
||||
chk(status, "clCreateContext");
|
||||
|
||||
// Create command queue
|
||||
cl_command_queue queue;
|
||||
queue = clCreateCommandQueue(context, device, 0, &status);
|
||||
chk(status, "clCreateCommandQueue");
|
||||
|
||||
// The image format describes how the data will be stored in memory
|
||||
cl_image_format format;
|
||||
format.image_channel_order = CL_R; // single channel
|
||||
format.image_channel_data_type = CL_FLOAT; // float data type
|
||||
|
||||
// Create space for the source image on the device
|
||||
cl_mem d_inputImage = clCreateImage2D(context, 0, &format, imageWidth,
|
||||
imageHeight, 0, NULL, &status);
|
||||
chk(status, "clCreateImage2D");
|
||||
|
||||
// Create space for the output image on the device
|
||||
cl_mem d_outputImage = clCreateImage2D(context, 0, &format, imageWidth,
|
||||
imageHeight, 0, NULL, &status);
|
||||
chk(status, "clCreateImage2D");
|
||||
|
||||
// Create space for the 7x7 filter on the device
|
||||
cl_mem d_filter = clCreateBuffer(context, 0, filterSize*sizeof(float),
|
||||
NULL, &status);
|
||||
chk(status, "clCreateBuffer");
|
||||
|
||||
// Copy the source image to the device
|
||||
size_t origin[3] = {0, 0, 0}; // Offset within the image to copy from
|
||||
size_t region[3] = {imageWidth, imageHeight, 1}; // Elements to per dimension
|
||||
status = clEnqueueWriteImage(queue, d_inputImage, CL_FALSE, origin, region,
|
||||
0, 0, inputImage, 0, NULL, NULL);
|
||||
chk(status, "clEnqueueWriteImage");
|
||||
|
||||
// Copy the 7x7 filter to the device
|
||||
status = clEnqueueWriteBuffer(queue, d_filter, CL_FALSE, 0,
|
||||
filterSize*sizeof(float), filter, 0, NULL, NULL);
|
||||
chk(status, "clEnqueueWriteBuffer");
|
||||
|
||||
// Create the image sampler
|
||||
cl_sampler sampler = clCreateSampler(context, CL_FALSE,
|
||||
CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &status);
|
||||
chk(status, "clCreateSampler");
|
||||
|
||||
const char* source = readSource("kernel.cl");
|
||||
|
||||
// Create a program object with source and build it
|
||||
cl_program program;
|
||||
program = clCreateProgramWithSource(context, 1, &source, NULL, NULL);
|
||||
chk(status, "clCreateProgramWithSource");
|
||||
status = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
|
||||
chk(status, "clBuildProgram");
|
||||
|
||||
// Create the kernel object
|
||||
cl_kernel kernel;
|
||||
kernel = clCreateKernel(program, "convolution", &status);
|
||||
chk(status, "clCreateKernel");
|
||||
|
||||
// Set the kernel arguments
|
||||
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage);
|
||||
status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage);
|
||||
status |= clSetKernelArg(kernel, 2, sizeof(int), &imageHeight);
|
||||
status |= clSetKernelArg(kernel, 3, sizeof(int), &imageWidth);
|
||||
status |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &d_filter);
|
||||
status |= clSetKernelArg(kernel, 5, sizeof(int), &filterWidth);
|
||||
status |= clSetKernelArg(kernel, 6, sizeof(cl_sampler), &sampler);
|
||||
chk(status, "clSetKernelArg");
|
||||
|
||||
// Set the work item dimensions
|
||||
size_t globalSize[2] = {imageWidth, imageHeight};
|
||||
status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0,
|
||||
NULL, NULL);
|
||||
chk(status, "clEnqueueNDRange");
|
||||
|
||||
// Read the image back to the host
|
||||
status = clEnqueueReadImage(queue, d_outputImage, CL_TRUE, origin,
|
||||
region, 0, 0, outputImage, 0, NULL, NULL);
|
||||
chk(status, "clEnqueueReadImage");
|
||||
|
||||
// Write the output image to file
|
||||
storeImage(outputImage, outputFile, imageHeight, imageWidth, inputFile);
|
||||
|
||||
// Compute the reference image
|
||||
for(i = 0; i < imageHeight; i++) {
|
||||
for(j = 0; j < imageWidth; j++) {
|
||||
refImage[i*imageWidth+j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Iterate over the rows of the source image
|
||||
int halfFilterWidth = filterWidth/2;
|
||||
float sum;
|
||||
for(i = 0; i < imageHeight; i++) {
|
||||
// Iterate over the columns of the source image
|
||||
for(j = 0; j < imageWidth; j++) {
|
||||
sum = 0; // Reset sum for new source pixel
|
||||
// Apply the filter to the neighborhood
|
||||
for(k = - halfFilterWidth; k <= halfFilterWidth; k++) {
|
||||
for(l = - halfFilterWidth; l <= halfFilterWidth; l++) {
|
||||
if(i+k >= 0 && i+k < imageHeight &&
|
||||
j+l >= 0 && j+l < imageWidth) {
|
||||
sum += inputImage[(i+k)*imageWidth + j+l] *
|
||||
filter[(k+halfFilterWidth)*filterWidth +
|
||||
l+halfFilterWidth];
|
||||
}
|
||||
}
|
||||
}
|
||||
refImage[i*imageWidth+j] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
int failed = 0;
|
||||
for(i = 0; i < imageHeight; i++) {
|
||||
for(j = 0; j < imageWidth; j++) {
|
||||
if(abs(outputImage[i*imageWidth+j]-refImage[i*imageWidth+j]) > 0.01) {
|
||||
printf("Results are INCORRECT\n");
|
||||
printf("Pixel mismatch at <%d,%d> (%f vs. %f)\n", i, j,
|
||||
outputImage[i*imageWidth+j], refImage[i*imageWidth+j]);
|
||||
failed = 1;
|
||||
}
|
||||
if(failed) break;
|
||||
}
|
||||
if(failed) break;
|
||||
}
|
||||
if(!failed) {
|
||||
printf("Results are correct\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
180
benchmarks/opencl/convolution/utils.cpp
Normal file
180
benchmarks/opencl/convolution/utils.cpp
Normal file
|
@ -0,0 +1,180 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
void storeImage(float *imageOut,
|
||||
const char *filename,
|
||||
int rows,
|
||||
int cols,
|
||||
const char* refFilename) {
|
||||
|
||||
FILE *ifp, *ofp;
|
||||
unsigned char tmp;
|
||||
int offset;
|
||||
unsigned char *buffer;
|
||||
int i, j;
|
||||
|
||||
int bytes;
|
||||
|
||||
int height, width;
|
||||
|
||||
ifp = fopen(refFilename, "rb");
|
||||
if(ifp == NULL) {
|
||||
perror(filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
fseek(ifp, 10, SEEK_SET);
|
||||
fread(&offset, 4, 1, ifp);
|
||||
|
||||
fseek(ifp, 18, SEEK_SET);
|
||||
fread(&width, 4, 1, ifp);
|
||||
fread(&height, 4, 1, ifp);
|
||||
|
||||
fseek(ifp, 0, SEEK_SET);
|
||||
|
||||
buffer = (unsigned char *)malloc(offset);
|
||||
if(buffer == NULL) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
fread(buffer, 1, offset, ifp);
|
||||
|
||||
printf("Writing output image to %s\n", filename);
|
||||
ofp = fopen(filename, "wb");
|
||||
if(ofp == NULL) {
|
||||
perror("opening output file");
|
||||
exit(-1);
|
||||
}
|
||||
bytes = fwrite(buffer, 1, offset, ofp);
|
||||
if(bytes != offset) {
|
||||
printf("error writing header!\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
// NOTE bmp formats store data in reverse raster order (see comment in
|
||||
// readImage function), so we need to flip it upside down here.
|
||||
int mod = width % 4;
|
||||
if(mod != 0) {
|
||||
mod = 4 - mod;
|
||||
}
|
||||
// printf("mod = %d\n", mod);
|
||||
for(i = height-1; i >= 0; i--) {
|
||||
for(j = 0; j < width; j++) {
|
||||
tmp = (unsigned char)imageOut[i*cols+j];
|
||||
fwrite(&tmp, sizeof(char), 1, ofp);
|
||||
}
|
||||
// In bmp format, rows must be a multiple of 4-bytes.
|
||||
// So if we're not at a multiple of 4, add junk padding.
|
||||
for(j = 0; j < mod; j++) {
|
||||
fwrite(&tmp, sizeof(char), 1, ofp);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(ofp);
|
||||
fclose(ifp);
|
||||
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read bmp image and convert to byte array. Also output the width and height
|
||||
*/
|
||||
float* readImage(const char *filename, int* widthOut, int* heightOut) {
|
||||
|
||||
uchar* imageData;
|
||||
|
||||
int height, width;
|
||||
uchar tmp;
|
||||
int offset;
|
||||
int i, j;
|
||||
|
||||
printf("Reading input image from %s\n", filename);
|
||||
FILE *fp = fopen(filename, "rb");
|
||||
if(fp == NULL) {
|
||||
perror(filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
fseek(fp, 10, SEEK_SET);
|
||||
fread(&offset, 4, 1, fp);
|
||||
|
||||
fseek(fp, 18, SEEK_SET);
|
||||
fread(&width, 4, 1, fp);
|
||||
fread(&height, 4, 1, fp);
|
||||
|
||||
printf("width = %d\n", width);
|
||||
printf("height = %d\n", height);
|
||||
|
||||
*widthOut = width;
|
||||
*heightOut = height;
|
||||
|
||||
imageData = (uchar*)malloc(width*height);
|
||||
if(imageData == NULL) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
fseek(fp, offset, SEEK_SET);
|
||||
fflush(NULL);
|
||||
|
||||
int mod = width % 4;
|
||||
if(mod != 0) {
|
||||
mod = 4 - mod;
|
||||
}
|
||||
|
||||
// NOTE bitmaps are stored in upside-down raster order. So we begin
|
||||
// reading from the bottom left pixel, then going from left-to-right,
|
||||
// read from the bottom to the top of the image. For image analysis,
|
||||
// we want the image to be right-side up, so we'll modify it here.
|
||||
|
||||
// First we read the image in upside-down
|
||||
|
||||
// Read in the actual image
|
||||
for(i = 0; i < height; i++) {
|
||||
|
||||
// add actual data to the image
|
||||
for(j = 0; j < width; j++) {
|
||||
fread(&tmp, sizeof(char), 1, fp);
|
||||
imageData[i*width + j] = tmp;
|
||||
}
|
||||
// For the bmp format, each row has to be a multiple of 4,
|
||||
// so I need to read in the junk data and throw it away
|
||||
for(j = 0; j < mod; j++) {
|
||||
fread(&tmp, sizeof(char), 1, fp);
|
||||
}
|
||||
}
|
||||
|
||||
// Then we flip it over
|
||||
int flipRow;
|
||||
for(i = 0; i < height/2; i++) {
|
||||
flipRow = height - (i+1);
|
||||
for(j = 0; j < width; j++) {
|
||||
tmp = imageData[i*width+j];
|
||||
imageData[i*width+j] = imageData[flipRow*width+j];
|
||||
imageData[flipRow*width+j] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
// Input image on the host
|
||||
float* floatImage = NULL;
|
||||
floatImage = (float*)malloc(sizeof(float)*width*height);
|
||||
if(floatImage == NULL) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
// Convert the BMP image to float (not required)
|
||||
for(i = 0; i < height; i++) {
|
||||
for(j = 0; j < width; j++) {
|
||||
floatImage[i*width+j] = (float)imageData[i*width+j];
|
||||
}
|
||||
}
|
||||
|
||||
free(imageData);
|
||||
return floatImage;
|
||||
}
|
11
benchmarks/opencl/convolution/utils.h
Normal file
11
benchmarks/opencl/convolution/utils.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
#ifndef __UTILS__
|
||||
#define __UTILS__
|
||||
|
||||
typedef unsigned char uchar;
|
||||
|
||||
float* readImage(const char *filename, int* widthOut, int* heightOut);
|
||||
|
||||
void storeImage(float *imageOut, const char *filename, int rows, int cols,
|
||||
const char* refFilename);
|
||||
|
||||
#endif
|
|
@ -29,7 +29,7 @@ CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|||
CXXFLAGS += -I$(POCL_INC_PATH) -I.
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||
QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
|
||||
PROJECT = cutcp
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|||
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||
QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
|
||||
PROJECT = gaussian
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|||
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||
QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
|
||||
PROJECT = kmeans
|
||||
SRCS = main.cc read_input.c rmse.c cluster.c kmeans_clustering.c
|
||||
|
|
|
@ -29,7 +29,7 @@ CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|||
CXXFLAGS += -I$(POCL_INC_PATH) -I.
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||
QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
|
||||
PROJECT = lbm
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|||
CXXFLAGS += -I$(POCL_INC_PATH) -I.
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||
QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
|
||||
PROJECT = mri-q
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|||
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||
QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
|
||||
PROJECT = nearn
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue