mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 22:07:41 -04:00
change d2d to d2h & h2d
This commit is contained in:
parent
ea26d69751
commit
80c81fc77d
2 changed files with 1437 additions and 1438 deletions
|
@ -6,22 +6,19 @@
|
|||
#ifndef _CL_HELPER_
|
||||
#define _CL_HELPER_
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include "util.h"
|
||||
|
||||
#ifdef TIMING
|
||||
#include "timing.h"
|
||||
#endif
|
||||
#include "util.h"
|
||||
#include <CL/cl.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using std::string;
|
||||
using std::ifstream;
|
||||
using std::cerr;
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
//using std::cout;
|
||||
using std::ifstream;
|
||||
using std::string;
|
||||
|
||||
//#define PROFILE_
|
||||
#ifdef PROFILE_
|
||||
|
@ -132,29 +129,6 @@ int work_group_size = BLOCK_SIZE_0;
|
|||
int device_id_inused = 0; //deviced id used (default : 0)
|
||||
int number_devices = 0;
|
||||
|
||||
int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return -1;
|
||||
}
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*------------------------------------------------------------
|
||||
@function: select device to use
|
||||
@params:
|
||||
|
@ -162,7 +136,8 @@ int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
|||
@return: NULL
|
||||
@date: 24/03/2011
|
||||
------------------------------------------------------------*/
|
||||
void _clSetDevice(int idx) throw(string){
|
||||
void _clSetDevice(int idx) throw(string)
|
||||
{
|
||||
|
||||
cl_int resultCL;
|
||||
oclHandles.context = NULL;
|
||||
|
@ -200,7 +175,6 @@ void _clSetDevice(int idx) throw(string){
|
|||
if (idx > (deviceListSize - 1))
|
||||
throw(string(":invalid device ID:"));
|
||||
device_id_inused = idx;
|
||||
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------
|
||||
|
@ -211,7 +185,8 @@ void _clSetDevice(int idx) throw(string){
|
|||
@return: prop
|
||||
@date: 24/03/2011
|
||||
------------------------------------------------------------*/
|
||||
void _clGetDeviceProperties(int idx, _clDeviceProp *prop) throw(string){
|
||||
void _clGetDeviceProperties(int idx, _clDeviceProp* prop) throw(string)
|
||||
{
|
||||
|
||||
oclHandles.cl_status = clGetDeviceInfo(oclHandles.devices[idx], CL_DEVICE_NAME, 100, prop->device_name, NULL);
|
||||
|
||||
|
@ -234,10 +209,33 @@ void _clGetDeviceProperties(int idx, _clDeviceProp *prop) throw(string){
|
|||
#endif
|
||||
}
|
||||
|
||||
int read_kernel_file(const char* filename, uint8_t** data, size_t* size)
|
||||
{
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return -1;
|
||||
}
|
||||
fseek(fp, 0, SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Converts the contents of a file into a string
|
||||
*/
|
||||
string FileToString(const string fileName){
|
||||
string FileToString(const string fileName)
|
||||
{
|
||||
ifstream f(fileName.c_str(), ifstream::in | ifstream::binary);
|
||||
|
||||
try {
|
||||
|
@ -252,7 +250,8 @@ string FileToString(const string fileName){
|
|||
f.seekg(0, ifstream::beg);
|
||||
|
||||
str = new char[size + 1];
|
||||
if (!str) throw(string("Could not allocate memory"));
|
||||
if (!str)
|
||||
throw(string("Could not allocate memory"));
|
||||
|
||||
f.read(str, fileSize);
|
||||
f.close();
|
||||
|
@ -262,13 +261,11 @@ string FileToString(const string fileName){
|
|||
delete[] str;
|
||||
return s;
|
||||
}
|
||||
}
|
||||
catch(std::string msg){
|
||||
} catch (std::string msg) {
|
||||
cerr << "Exception caught in FileToString(): " << msg << endl;
|
||||
if (f.is_open())
|
||||
f.close();
|
||||
}
|
||||
catch(...){
|
||||
} catch (...) {
|
||||
cerr << "Exception caught in FileToString()" << endl;
|
||||
if (f.is_open())
|
||||
f.close();
|
||||
|
@ -287,14 +284,14 @@ string FileToString(const string fileName){
|
|||
char device_type[3];
|
||||
int device_id = 0;
|
||||
int platform_id = 0;
|
||||
void _clCmdParams(int argc, char* argv[]){
|
||||
void _clCmdParams(int argc, char* argv[])
|
||||
{
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
switch (argv[i][1]) {
|
||||
case 't': //--t stands for device type
|
||||
if (++i < argc) {
|
||||
sscanf(argv[i], "%s", device_type);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
std::cerr << "Could not read argument after option " << argv[i - 1] << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
@ -302,8 +299,7 @@ void _clCmdParams(int argc, char* argv[]){
|
|||
case 'd': //--d stands for device id
|
||||
if (++i < argc) {
|
||||
sscanf(argv[i], "%d", &device_id);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
std::cerr << "Could not read argument after option " << argv[i - 1] << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
@ -311,14 +307,12 @@ void _clCmdParams(int argc, char* argv[]){
|
|||
case 'p': //--p stands for platform id
|
||||
if (++i < argc) {
|
||||
sscanf(argv[i], "%d", &platform_id);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
std::cerr << "Could not read argument after option " << argv[i - 1] << std::endl;
|
||||
throw;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
;
|
||||
default:;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -340,7 +334,8 @@ void _clCmdParams(int argc, char* argv[]){
|
|||
get the number of devices and devices have no relationship with context
|
||||
@date: 24/03/2011
|
||||
------------------------------------------------------------*/
|
||||
void _clInit(string device_type, int device_id)throw(string){
|
||||
void _clInit(string device_type, int device_id) throw(string)
|
||||
{
|
||||
#ifdef TIMING
|
||||
gettimeofday(&tv_total_start, NULL);
|
||||
#endif
|
||||
|
@ -380,7 +375,9 @@ void _clInit(string device_type, int device_id)throw(string){
|
|||
if (resultCL != CL_SUCCESS)
|
||||
throw(string("InitCL()::Error: Getting number of platforms (clGetPlatformIDs)"));
|
||||
//printf("number of platforms:%d\n",numPlatforms); //by cambine
|
||||
printf("--cambine: number of platforms: %d\n",numPlatforms);
|
||||
#ifdef DEV_INFO
|
||||
//std::cout << "--cambine: number of platforms: " << numPlatforms << std::endl;
|
||||
#endif
|
||||
|
||||
if (!(numPlatforms > 0))
|
||||
throw(string("InitCL()::Error: No platforms found (clGetPlatformIDs)"));
|
||||
|
@ -393,8 +390,7 @@ void _clInit(string device_type, int device_id)throw(string){
|
|||
|
||||
// Select the target platform. Default: first platform
|
||||
targetPlatform = allPlatforms[platform_id];
|
||||
for (int i = 0; i < numPlatforms; i++)
|
||||
{
|
||||
for (int i = 0; i < numPlatforms; i++) {
|
||||
char pbuff[128];
|
||||
resultCL = clGetPlatformInfo(allPlatforms[i],
|
||||
CL_PLATFORM_VENDOR,
|
||||
|
@ -408,7 +404,6 @@ void _clInit(string device_type, int device_id)throw(string){
|
|||
#ifdef DEV_INFO
|
||||
//std::cout << "--cambine: vedor is: " << pbuff << std::endl;
|
||||
#endif
|
||||
|
||||
}
|
||||
free(allPlatforms);
|
||||
//-----------------------------------------------
|
||||
|
@ -433,8 +428,7 @@ void _clInit(string device_type, int device_id)throw(string){
|
|||
throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR"));
|
||||
}
|
||||
}
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize);
|
||||
if (oclHandles.cl_status != CL_SUCCESS) {
|
||||
throw(string("exception in _clInit -> clGetDeviceIDs -> ALL"));
|
||||
|
@ -444,9 +438,9 @@ void _clInit(string device_type, int device_id)throw(string){
|
|||
if (deviceListSize == 0)
|
||||
throw(string("InitCL()::Error: No devices found."));
|
||||
|
||||
//#ifdef DEV_INFO
|
||||
printf("--cambine: number of device\n"); //<<deviceListSize<<std::endl;
|
||||
//#endif
|
||||
#ifdef DEV_INFO
|
||||
//std::cout << "--cambine: number of device=" << deviceListSize << std::endl;
|
||||
#endif
|
||||
number_devices = deviceListSize;
|
||||
// Now, allocate the device list
|
||||
// oclHandles.devices = (cl_device_id *)malloc(deviceListSize);
|
||||
|
@ -475,8 +469,7 @@ void _clInit(string device_type, int device_id)throw(string){
|
|||
throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR -> 2"));
|
||||
}
|
||||
}
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, deviceListSize, oclHandles.devices, NULL);
|
||||
if (oclHandles.cl_status != CL_SUCCESS) {
|
||||
throw(string("exception in _clInit -> clGetDeviceIDs -> ALL -> 2"));
|
||||
|
@ -491,9 +484,9 @@ void _clInit(string device_type, int device_id)throw(string){
|
|||
_clGetDeviceProperties(DEVICE_ID_INUSED, &prop);
|
||||
//std::cout << "--cambine: device name=" << prop.device_name << std::endl;
|
||||
|
||||
//#ifdef DEV_INFO
|
||||
printf("--cambine: return device list successfully!\n");
|
||||
//#endif
|
||||
#ifdef DEV_INFO
|
||||
//std::cout << "--cambine: return device list successfully!" << std::endl;
|
||||
#endif
|
||||
|
||||
//-----------------------------------------------
|
||||
//--cambine-3: create an OpenCL context
|
||||
|
@ -510,11 +503,12 @@ void _clInit(string device_type, int device_id)throw(string){
|
|||
|
||||
if ((resultCL != CL_SUCCESS) || (oclHandles.context == NULL))
|
||||
throw(string("InitCL()::Error: Creating Context (clCreateContextFromType)"));
|
||||
printf("--cambine: create OCL context successfully!\n");
|
||||
#ifdef DEV_INFO
|
||||
//std::cout << "--cambine: create OCL context successfully!" << std::endl;
|
||||
#endif
|
||||
|
||||
//-----------------------------------------------
|
||||
//--cambine-4: Create an OpenCL command queue
|
||||
/*
|
||||
#ifdef TIMING
|
||||
oclHandles.queue = clCreateCommandQueue(oclHandles.context,
|
||||
oclHandles.devices[DEVICE_ID_INUSED],
|
||||
|
@ -523,26 +517,16 @@ void _clInit(string device_type, int device_id)throw(string){
|
|||
#else
|
||||
oclHandles.queue = clCreateCommandQueue(oclHandles.context,
|
||||
oclHandles.devices[DEVICE_ID_INUSED],
|
||||
0,
|
||||
CL_QUEUE_PROFILING_ENABLE,
|
||||
&resultCL);
|
||||
#endif
|
||||
printf("???\n");*/
|
||||
#ifdef TIMING
|
||||
oclHandles.queue = clCreateCommandQueueWithProperties(oclHandles.context,
|
||||
oclHandles.devices[DEVICE_ID_INUSED], CL_QUEUE_PROFILING_ENABLE,
|
||||
&resultCL);
|
||||
#else
|
||||
oclHandles.queue = clCreateCommandQueueWithProperties(oclHandles.context,
|
||||
oclHandles.devices[DEVICE_ID_INUSED], 0, &resultCL);
|
||||
|
||||
#endif
|
||||
|
||||
if (oclHandles.queue == NULL)
|
||||
if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))
|
||||
throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
|
||||
//#ifdef PROFILE_
|
||||
// double t2 = gettime();
|
||||
// CC += t2 - t1;
|
||||
//#endif
|
||||
#ifdef PROFILE_
|
||||
double t2 = gettime();
|
||||
CC += t2 - t1;
|
||||
#endif
|
||||
//-----------------------------------------------
|
||||
//--cambine-5: Load CL file, build CL program object, create CL kernel object
|
||||
/*
|
||||
|
@ -566,10 +550,9 @@ void _clInit(string device_type, int device_id)throw(string){
|
|||
std::abort();
|
||||
|
||||
oclHandles.program = clCreateProgramWithBinary(
|
||||
oclHandles.context, 1, &oclHandles.devices[device_id_inused], &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &resultCL);
|
||||
oclHandles.context, 1, &oclHandles.devices[DEVICE_ID_INUSED], &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &resultCL);
|
||||
free(kernel_bin);
|
||||
|
||||
|
||||
//insert debug information
|
||||
std::string options = "";
|
||||
//options += " -cl-nv-opt-level=3";
|
||||
|
@ -606,8 +589,6 @@ void _clInit(string device_type, int device_id)throw(string){
|
|||
|
||||
throw(string("InitCL()::Error: Building Program (clBuildProgram)"));
|
||||
}
|
||||
printf("build program");
|
||||
|
||||
#ifdef PROFILE_
|
||||
double t3 = gettime();
|
||||
KC += t3 - t2;
|
||||
|
@ -644,15 +625,13 @@ void _clInit(string device_type, int device_id)throw(string){
|
|||
free(binaries[i]);
|
||||
#endif
|
||||
|
||||
for (int nKernel = 0; nKernel < total_kernels; nKernel++)
|
||||
{
|
||||
for (int nKernel = 0; nKernel < total_kernels; nKernel++) {
|
||||
// get a kernel object handle for a kernel with the given name
|
||||
cl_kernel kernel = clCreateKernel(oclHandles.program,
|
||||
(kernel_names[nKernel]).c_str(),
|
||||
&resultCL);
|
||||
|
||||
if ((resultCL != CL_SUCCESS) || (kernel == NULL))
|
||||
{
|
||||
if ((resultCL != CL_SUCCESS) || (kernel == NULL)) {
|
||||
string errorMsg = "InitCL()::Error: Creating Kernel (clCreateKernel) \"" + kernel_names[nKernel] + "\"";
|
||||
throw(errorMsg);
|
||||
}
|
||||
|
@ -729,8 +708,7 @@ void _clRelease()
|
|||
|
||||
if (oclHandles.queue != NULL) {
|
||||
cl_int resultCL = clReleaseCommandQueue(oclHandles.queue);
|
||||
if (resultCL != CL_SUCCESS)
|
||||
{
|
||||
if (resultCL != CL_SUCCESS) {
|
||||
cerr << "ReleaseCL()::Error: In clReleaseCommandQueue" << endl;
|
||||
errorFlag = true;
|
||||
}
|
||||
|
@ -748,7 +726,8 @@ void _clRelease()
|
|||
oclHandles.context = NULL;
|
||||
}
|
||||
|
||||
if (errorFlag) throw(string("ReleaseCL()::Error encountered."));
|
||||
if (errorFlag)
|
||||
throw(string("ReleaseCL()::Error encountered."));
|
||||
#ifdef PROFILE_
|
||||
double t2 = gettime();
|
||||
CR += t2 - t1;
|
||||
|
@ -767,7 +746,8 @@ void _clRelease()
|
|||
@return: mem_d
|
||||
@date: 24/03/2011
|
||||
------------------------------------------------------------*/
|
||||
cl_mem _clMalloc(int size) throw(string){
|
||||
cl_mem _clMalloc(int size) throw(string)
|
||||
{
|
||||
#ifdef TIMING
|
||||
gettimeofday(&tv_mem_alloc_start, NULL);
|
||||
#endif
|
||||
|
@ -827,7 +807,8 @@ cl_mem _clMalloc(int size) throw(string){
|
|||
@date: 06/04/2011
|
||||
------------------------------------------------------------*/
|
||||
|
||||
void* _clMallocHost(int size)throw(string){
|
||||
void* _clMallocHost(int size) throw(string)
|
||||
{
|
||||
void* mem_h;
|
||||
oclHandles.pinned_mem_out = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size, NULL, &oclHandles.cl_status);
|
||||
#ifdef ERRMSG
|
||||
|
@ -907,7 +888,8 @@ void* _clMallocHost(int size)throw(string){
|
|||
@return: NULL
|
||||
@date: 06/04/2011
|
||||
------------------------------------------------------------*/
|
||||
void _clFreeHost(int io, void * mem_h){
|
||||
void _clFreeHost(int io, void* mem_h)
|
||||
{
|
||||
if (io == 0) { //in
|
||||
if (mem_h) {
|
||||
oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_in, (void*)mem_h, 0, NULL, NULL);
|
||||
|
@ -941,8 +923,7 @@ void _clFreeHost(int io, void * mem_h){
|
|||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else if(io==1){ //out
|
||||
} else if (io == 1) { //out
|
||||
if (mem_h) {
|
||||
oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_out, (void*)mem_h, 0, NULL, NULL);
|
||||
#ifdef ERRMSG
|
||||
|
@ -975,8 +956,7 @@ void _clFreeHost(int io, void * mem_h){
|
|||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
} else
|
||||
throw(string("encounter invalid choice when freeing pinned memmory"));
|
||||
}
|
||||
/*------------------------------------------------------------
|
||||
|
@ -988,7 +968,8 @@ void _clFreeHost(int io, void * mem_h){
|
|||
@return: NULL
|
||||
@date: 17/01/2011
|
||||
------------------------------------------------------------*/
|
||||
void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){
|
||||
void _clMemcpyH2D(cl_mem dst, const void* src, int size) throw(string)
|
||||
{
|
||||
#ifdef PROFILE_
|
||||
double t1 = gettime();
|
||||
#endif
|
||||
|
@ -1044,7 +1025,8 @@ void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){
|
|||
@return: NULL
|
||||
@date: 17/01/2011
|
||||
------------------------------------------------------------*/
|
||||
void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){
|
||||
void _clMemcpyD2H(void* dst, cl_mem src, int size) throw(string)
|
||||
{
|
||||
#ifdef PROFILE_
|
||||
double t1 = gettime();
|
||||
#endif
|
||||
|
@ -1099,7 +1081,8 @@ void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){
|
|||
@return: NULL
|
||||
@date: 27/03/2011
|
||||
------------------------------------------------------------*/
|
||||
void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string){
|
||||
void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string)
|
||||
{
|
||||
#ifdef PROFILE_
|
||||
double t1 = gettime();
|
||||
#endif
|
||||
|
@ -1165,7 +1148,8 @@ void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string){
|
|||
@return: NULL
|
||||
@date: 03/04/2011
|
||||
------------------------------------------------------------*/
|
||||
void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(string){
|
||||
void _clSetArgs(int kernel_id, int arg_idx, void* d_mem, int size = 0) throw(string)
|
||||
{
|
||||
if (!size) {
|
||||
oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, sizeof(d_mem), &d_mem);
|
||||
#ifdef ERRMSG
|
||||
|
@ -1202,8 +1186,7 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
|
|||
if (oclHandles.cl_status != CL_SUCCESS)
|
||||
throw(oclHandles.error_str);
|
||||
#endif
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, size, d_mem);
|
||||
#ifdef ERRMSG
|
||||
oclHandles.error_str = "excpetion in _clSetKernelArg()-2 ";
|
||||
|
@ -1241,7 +1224,8 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
|
|||
#endif
|
||||
}
|
||||
}
|
||||
void _clFinish() throw(string){
|
||||
void _clFinish() throw(string)
|
||||
{
|
||||
oclHandles.cl_status = clFinish(oclHandles.queue);
|
||||
#ifdef ERRMSG
|
||||
if (oclHandles.cl_status != CL_SUCCESS) {
|
||||
|
@ -1273,7 +1257,8 @@ void _clFinish() throw(string){
|
|||
@return: NULL
|
||||
@date: 03/04/2011
|
||||
------------------------------------------------------------*/
|
||||
void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(string){
|
||||
void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(string)
|
||||
{
|
||||
#ifdef PROFILE_
|
||||
double t1 = gettime();
|
||||
#endif
|
||||
|
@ -1283,7 +1268,7 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
|
|||
work_items = work_items + (work_group_size - (work_items % work_group_size));
|
||||
size_t local_work_size[] = { work_group_size, 1 };
|
||||
size_t global_work_size[] = { work_items, 1 };
|
||||
oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
|
||||
oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0,
|
||||
global_work_size, local_work_size, 0, 0, &(e[0]));
|
||||
#ifdef ERRMSG
|
||||
if (oclHandles.cl_status != CL_SUCCESS) {
|
||||
|
@ -1389,7 +1374,8 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
|
|||
@date: 03/04/2011
|
||||
------------------------------------------------------------*/
|
||||
|
||||
void _clMemset(cl_mem mem_d, short val, int number_bytes)throw(string){
|
||||
void _clMemset(cl_mem mem_d, short val, int number_bytes) throw(string)
|
||||
{
|
||||
int kernel_id = 0;
|
||||
int arg_idx = 0;
|
||||
_clSetArgs(kernel_id, arg_idx++, mem_d);
|
||||
|
@ -1408,7 +1394,8 @@ void _clMemset(cl_mem mem_d, short val, int number_bytes)throw(string){
|
|||
@return: NULL
|
||||
@date: 03/04/2011
|
||||
------------------------------------------------------------*/
|
||||
void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int group_y) throw(string){
|
||||
void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int group_y) throw(string)
|
||||
{
|
||||
#ifdef PROFILE_
|
||||
double t1 = gettime();
|
||||
#endif
|
||||
|
@ -1418,7 +1405,7 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
|
|||
cl_event e[1];
|
||||
/*if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size
|
||||
work_items = work_items + (work_group_size-(work_items%work_group_size));*/
|
||||
oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
|
||||
oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0,
|
||||
global_work_size, local_work_size, 0, 0, &(e[0]));
|
||||
#ifdef ERRMSG
|
||||
if (oclHandles.cl_status != CL_SUCCESS) {
|
||||
|
@ -1496,7 +1483,8 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
|
|||
@date: 03/04/2011
|
||||
------------------------------------------------------------*/
|
||||
|
||||
void _clFree(cl_mem ob) throw(string){
|
||||
void _clFree(cl_mem ob) throw(string)
|
||||
{
|
||||
#ifdef TIMING
|
||||
gettimeofday(&tv_close_start, NULL);
|
||||
#endif
|
||||
|
@ -1543,7 +1531,8 @@ void _clFree(cl_mem ob) throw(string){
|
|||
@return: NULL
|
||||
@date: 03/04/2011
|
||||
------------------------------------------------------------*/
|
||||
void _clStatistics(){
|
||||
void _clStatistics()
|
||||
{
|
||||
#ifdef PROFILE_
|
||||
FILE* fp_pd = fopen("PD_OCL.txt", "a");
|
||||
fprintf(fp_pd, "%lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf\n", CC, CR, MA, MF, H2D, D2H, D2D, KE, KC);
|
||||
|
@ -1552,7 +1541,8 @@ void _clStatistics(){
|
|||
return;
|
||||
}
|
||||
|
||||
void _clPrintTiming(){
|
||||
void _clPrintTiming()
|
||||
{
|
||||
#ifdef TIMING
|
||||
gettimeofday(&tv_total_end, NULL);
|
||||
tvsub(&tv_total_end, &tv_total_start, &tv);
|
||||
|
@ -1567,6 +1557,5 @@ void _clPrintTiming(){
|
|||
printf("Close: %f\n", close_time);
|
||||
printf("Total: %f\n", total_time);
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif //_CL_HELPER_
|
||||
|
|
|
@ -12,10 +12,10 @@
|
|||
on 24/03/2011
|
||||
********************************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <math.h>
|
||||
#include "CLHelper.h"
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
/*
|
||||
* Options
|
||||
|
@ -38,12 +38,10 @@
|
|||
* not options
|
||||
*/
|
||||
|
||||
|
||||
#if block_length > 128
|
||||
#warning "the kernels may fail too launch on some systems if the block length is too large"
|
||||
#endif
|
||||
|
||||
|
||||
#define VAR_DENSITY 0
|
||||
#define VAR_MOMENTUM 1
|
||||
#define VAR_DENSITY_ENERGY (VAR_MOMENTUM + NDIM)
|
||||
|
@ -60,47 +58,52 @@ typedef struct{
|
|||
* Generic functions
|
||||
*/
|
||||
template <typename T>
|
||||
cl_mem alloc(int N){
|
||||
cl_mem alloc(int N)
|
||||
{
|
||||
cl_mem mem_d = _clMalloc(sizeof(T) * N);
|
||||
return mem_d;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void dealloc(cl_mem array){
|
||||
void dealloc(cl_mem array)
|
||||
{
|
||||
_clFree(array);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void copy(cl_mem dst, cl_mem src, int N){
|
||||
void copy(cl_mem dst, cl_mem src, int N)
|
||||
{
|
||||
_clMemcpyD2D(dst, src, N * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void upload(cl_mem dst, T* src, int N){
|
||||
void upload(cl_mem dst, T* src, int N)
|
||||
{
|
||||
_clMemcpyH2D(dst, src, N * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void download(T* dst, cl_mem src, int N){
|
||||
void download(T* dst, cl_mem src, int N)
|
||||
{
|
||||
_clMemcpyD2H(dst, src, N * sizeof(T));
|
||||
}
|
||||
|
||||
void dump(cl_mem variables, int nel, int nelr){
|
||||
void dump(cl_mem variables, int nel, int nelr)
|
||||
{
|
||||
float* h_variables = new float[nelr * NVAR];
|
||||
download(h_variables, variables, nelr * NVAR);
|
||||
|
||||
{
|
||||
std::ofstream file("density.txt");
|
||||
file << nel << " " << nelr << std::endl;
|
||||
for(int i = 0; i < nel; i++) file << h_variables[i + VAR_DENSITY*nelr] << std::endl;
|
||||
for (int i = 0; i < nel; i++)
|
||||
file << h_variables[i + VAR_DENSITY * nelr] << std::endl;
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
std::ofstream file("momentum.txt");
|
||||
file << nel << " " << nelr << std::endl;
|
||||
for(int i = 0; i < nel; i++)
|
||||
{
|
||||
for (int i = 0; i < nel; i++) {
|
||||
for (int j = 0; j != NDIM; j++)
|
||||
file << h_variables[i + (VAR_MOMENTUM + j) * nelr] << " ";
|
||||
file << std::endl;
|
||||
|
@ -110,12 +113,14 @@ void dump(cl_mem variables, int nel, int nelr){
|
|||
{
|
||||
std::ofstream file("density_energy.txt");
|
||||
file << nel << " " << nelr << std::endl;
|
||||
for(int i = 0; i < nel; i++) file << h_variables[i + VAR_DENSITY_ENERGY*nelr] << std::endl;
|
||||
for (int i = 0; i < nel; i++)
|
||||
file << h_variables[i + VAR_DENSITY_ENERGY * nelr] << std::endl;
|
||||
}
|
||||
delete[] h_variables;
|
||||
}
|
||||
|
||||
void initialize_variables(int nelr, cl_mem variables, cl_mem ff_variable) throw(string){
|
||||
void initialize_variables(int nelr, cl_mem variables, cl_mem ff_variable) throw(string)
|
||||
{
|
||||
|
||||
int work_items = nelr;
|
||||
int work_group_size = BLOCK_SIZE_1;
|
||||
|
@ -127,7 +132,8 @@ void initialize_variables(int nelr, cl_mem variables, cl_mem ff_variable) throw(
|
|||
_clInvokeKernel(kernel_id, work_items, work_group_size);
|
||||
}
|
||||
|
||||
void compute_step_factor(int nelr, cl_mem variables, cl_mem areas, cl_mem step_factors){
|
||||
void compute_step_factor(int nelr, cl_mem variables, cl_mem areas, cl_mem step_factors)
|
||||
{
|
||||
|
||||
int work_items = nelr;
|
||||
int work_group_size = BLOCK_SIZE_2;
|
||||
|
@ -140,11 +146,12 @@ void compute_step_factor(int nelr, cl_mem variables, cl_mem areas, cl_mem step_f
|
|||
_clInvokeKernel(kernel_id, work_items, work_group_size);
|
||||
}
|
||||
|
||||
void compute_flux(int nelr, cl_mem elements_surrounding_elements, cl_mem normals, cl_mem variables, cl_mem ff_variable, \
|
||||
void compute_flux(int nelr, cl_mem elements_surrounding_elements, cl_mem normals, cl_mem variables, cl_mem ff_variable,
|
||||
cl_mem fluxes, cl_mem ff_flux_contribution_density_energy,
|
||||
cl_mem ff_flux_contribution_momentum_x,
|
||||
cl_mem ff_flux_contribution_momentum_y,
|
||||
cl_mem ff_flux_contribution_momentum_z){
|
||||
cl_mem ff_flux_contribution_momentum_z)
|
||||
{
|
||||
|
||||
int work_items = nelr;
|
||||
int work_group_size = BLOCK_SIZE_3;
|
||||
|
@ -163,7 +170,8 @@ void compute_flux(int nelr, cl_mem elements_surrounding_elements, cl_mem normals
|
|||
_clInvokeKernel(kernel_id, work_items, work_group_size);
|
||||
}
|
||||
|
||||
void time_step(int j, int nelr, cl_mem old_variables, cl_mem variables, cl_mem step_factors, cl_mem fluxes){
|
||||
void time_step(int j, int nelr, cl_mem old_variables, cl_mem variables, cl_mem step_factors, cl_mem fluxes)
|
||||
{
|
||||
|
||||
int work_items = nelr;
|
||||
int work_group_size = BLOCK_SIZE_4;
|
||||
|
@ -184,7 +192,6 @@ inline void compute_flux_contribution(float& density, float3& momentum, float& d
|
|||
fc_momentum_x.y = velocity.x * momentum.y;
|
||||
fc_momentum_x.z = velocity.x * momentum.z;
|
||||
|
||||
|
||||
fc_momentum_y.x = fc_momentum_x.y;
|
||||
fc_momentum_y.y = velocity.y * momentum.y + pressure;
|
||||
fc_momentum_y.z = velocity.y * momentum.z;
|
||||
|
@ -202,11 +209,13 @@ inline void compute_flux_contribution(float& density, float3& momentum, float& d
|
|||
/*
|
||||
* Main function
|
||||
*/
|
||||
int main(int argc, char** argv){
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
printf("WG size of kernel:initialize = %d, WG size of kernel:compute_step_factor = %d, WG size of kernel:compute_flux = %d, WG size of kernel:time_step = %d\n", BLOCK_SIZE_1, BLOCK_SIZE_2, BLOCK_SIZE_3, BLOCK_SIZE_4);
|
||||
|
||||
if (argc < 2) {
|
||||
//std::cout << "specify data file name and [device type] [device id]" << std::endl;
|
||||
printf("specify data file name and [device type] [device id]\n");
|
||||
return 0;
|
||||
}
|
||||
const char* data_file_name = argv[1];
|
||||
|
@ -263,7 +272,6 @@ int main(int argc, char** argv){
|
|||
_clMemcpyH2D(ff_flux_contribution_momentum_z, &h_ff_flux_contribution_momentum_z, sizeof(float3));
|
||||
_clMemcpyH2D(ff_flux_contribution_density_energy, &h_ff_flux_contribution_density_energy, sizeof(float3));
|
||||
_clFinish();
|
||||
|
||||
}
|
||||
int nel;
|
||||
int nelr;
|
||||
|
@ -273,29 +281,26 @@ int main(int argc, char** argv){
|
|||
//float* normals;
|
||||
{
|
||||
std::ifstream file(data_file_name);
|
||||
if(!file.good()){
|
||||
if ((file.rdstate() & std::ifstream::failbit) != 0) {
|
||||
throw(string("can not find/open file!"));
|
||||
}
|
||||
file >> nel;
|
||||
nelr = block_length * ((nel / block_length) + std::min(1, nel % block_length));
|
||||
////std::cout<<"--cambine: nel="<<nel<<", nelr="<<nelr<<std::endl;
|
||||
//std::cout << "--cambine: nel=" << nel << ", nelr=" << nelr << std::endl;
|
||||
float* h_areas = new float[nelr];
|
||||
int* h_elements_surrounding_elements = new int[nelr * NNB];
|
||||
float* h_normals = new float[nelr * NDIM * NNB];
|
||||
|
||||
|
||||
// read in data
|
||||
for(int i = 0; i < nel; i++)
|
||||
{
|
||||
for (int i = 0; i < nel; i++) {
|
||||
file >> h_areas[i];
|
||||
for(int j = 0; j < NNB; j++)
|
||||
{
|
||||
for (int j = 0; j < NNB; j++) {
|
||||
file >> h_elements_surrounding_elements[i + j * nelr];
|
||||
if(h_elements_surrounding_elements[i+j*nelr] < 0) h_elements_surrounding_elements[i+j*nelr] = -1;
|
||||
if (h_elements_surrounding_elements[i + j * nelr] < 0)
|
||||
h_elements_surrounding_elements[i + j * nelr] = -1;
|
||||
h_elements_surrounding_elements[i + j * nelr]--; //it's coming in with Fortran numbering
|
||||
|
||||
for(int k = 0; k < NDIM; k++)
|
||||
{
|
||||
for (int k = 0; k < NDIM; k++) {
|
||||
file >> h_normals[i + (j + k * NNB) * nelr];
|
||||
h_normals[i + (j + k * NNB) * nelr] = -h_normals[i + (j + k * NNB) * nelr];
|
||||
}
|
||||
|
@ -304,14 +309,13 @@ int main(int argc, char** argv){
|
|||
|
||||
// fill in remaining data
|
||||
int last = nel - 1;
|
||||
for(int i = nel; i < nelr; i++)
|
||||
{
|
||||
for (int i = nel; i < nelr; i++) {
|
||||
h_areas[i] = h_areas[last];
|
||||
for(int j = 0; j < NNB; j++)
|
||||
{
|
||||
for (int j = 0; j < NNB; j++) {
|
||||
// duplicate the last element
|
||||
h_elements_surrounding_elements[i + j * nelr] = h_elements_surrounding_elements[last + j * nelr];
|
||||
for(int k = 0; k < NDIM; k++) h_normals[last + (j + k*NNB)*nelr] = h_normals[last + (j + k*NNB)*nelr];
|
||||
for (int k = 0; k < NDIM; k++)
|
||||
h_normals[last + (j + k * NNB) * nelr] = h_normals[last + (j + k * NNB) * nelr];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -343,15 +347,20 @@ int main(int argc, char** argv){
|
|||
// make sure CUDA isn't still doing something before we start timing
|
||||
_clFinish();
|
||||
// these need to be computed the first time in order to compute time step
|
||||
printf( "Starting...\n");
|
||||
//std::cout << "Starting..." << std::endl;
|
||||
printf("starting .. \n");
|
||||
|
||||
// Begin iterations
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
copy<float>(old_variables, variables, nelr*NVAR);
|
||||
//copy<float>(old_variables, variables, nelr * NVAR);
|
||||
//download(ff_variable, old_variables, nelr * NVAR);
|
||||
// upload(ff_variable, old_variables, nelr * NVAR);
|
||||
_clMemcpyH2D(ff_variable, h_ff_variable, NVAR * sizeof(float));
|
||||
_clMemcpyD2H(h_ff_variable, variables, NVAR * sizeof(float));
|
||||
// for the first iteration we compute the time step
|
||||
compute_step_factor(nelr, variables, areas, step_factors);
|
||||
for (int j = 0; j < RK; j++) {
|
||||
compute_flux(nelr, elements_surrounding_elements, normals, variables, ff_variable, fluxes, ff_flux_contribution_density_energy, \
|
||||
compute_flux(nelr, elements_surrounding_elements, normals, variables, ff_variable, fluxes, ff_flux_contribution_density_energy,
|
||||
ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y, ff_flux_contribution_momentum_z);
|
||||
|
||||
time_step(j, nelr, old_variables, variables, step_factors, fluxes);
|
||||
|
@ -363,6 +372,7 @@ int main(int argc, char** argv){
|
|||
//std::cout << "Saved solution..." << std::endl;
|
||||
_clStatistics();
|
||||
//std::cout << "Cleaning up..." << std::endl;
|
||||
printf("Cleaning up ...\n");
|
||||
|
||||
//--release resources
|
||||
_clFree(ff_variable);
|
||||
|
@ -378,11 +388,11 @@ int main(int argc, char** argv){
|
|||
_clFree(fluxes);
|
||||
_clFree(step_factors);
|
||||
_clRelease();
|
||||
printf("Done...\n");
|
||||
//std::cout << "Done..." << std::endl;
|
||||
_clPrintTiming();
|
||||
}
|
||||
catch(string msg){
|
||||
printf("--cambine:( an exception catched in main body ->%s\n", msg.c_str());
|
||||
} catch (string msg) {
|
||||
//std::cout << "--cambine:( an exception catched in main body ->" << msg << std::endl;
|
||||
printf("--cambine:( an exception catched in main body\n");
|
||||
_clFree(ff_variable);
|
||||
_clFree(ff_flux_contribution_momentum_x);
|
||||
_clFree(ff_flux_contribution_momentum_y);
|
||||
|
@ -396,8 +406,8 @@ int main(int argc, char** argv){
|
|||
_clFree(fluxes);
|
||||
_clFree(step_factors);
|
||||
_clRelease();
|
||||
}
|
||||
catch(...){
|
||||
} catch (...) {
|
||||
printf("--cambine:( unknow exceptions in main body...\n");
|
||||
//std::cout << "--cambine:( unknow exceptions in main body..." << std::endl;
|
||||
_clFree(ff_variable);
|
||||
_clFree(ff_flux_contribution_momentum_x);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue